In [2]:
# --- 0) bootstrap ---
import os, sys
from pathlib import Path
ROOT = Path.cwd()
while not (ROOT / "pyproject.toml").exists() and ROOT != ROOT.parent:
    ROOT = ROOT.parent
os.chdir(ROOT)
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))
print("Project root:", ROOT)

DOC_ID = "NFS_2019"

Project root: d:\IIT BBS\Job Resources\Business Optima\new-pdf-agent


In [3]:
# --- 1) mount (reuses your 7.1 mount) ---
from packages.chat.router import mount_chat
mount = mount_chat(DOC_ID)
print("Sessions dir:", mount.sessions_dir)
print("Adapter used:", mount.profile.adapter_path)
print("Collection:  ", mount.profile.collection)

Sessions dir: data\sessions
Adapter used: data\models\NFS_2019\20bb948f\adapter
Collection:   NFS_2019


  return ChatOllama(


In [5]:
# --- 2) sanity ping + memory store init ---
from packages.chat.memory import SessionStore, SummaryBuffer
store = SessionStore(mount.sessions_dir, doc_id=DOC_ID).load_or_create()
buf = SummaryBuffer(store, llm=mount.llm_output)
print("Session file:", store.path)

Session file: data\sessions\NFS_2019_82aebab93556.json


In [6]:
# --- 3) stream helper for run_turn ---
import asyncio
from packages.chat.router import run_turn

async def chat_once(session_id: str, text: str):
    events = []
    async for ev in run_turn(DOC_ID, session_id, text):
        t = ev.get("type")
        if t in ("final_token",):
            # print streaming tokens briefly
            print(ev["data"], end="", flush=True)
        elif t == "final":
            print("\n\n— FINAL —")
            print(ev["data"]["text"][:600])
        else:
            # collect debug events
            events.append(ev)
    return events

session_id = "sess7eA"

In [7]:
# --- 4) OOS handoff demo ---
print(">>> OOS demo")
oos_events = await chat_once(session_id, "Who won the Premier League in 2021?")
print("\nGuard + Misc events:", [e["type"] for e in oos_events])


>>> OOS demo
There is no information available to provide a response on the topic. The requested information about the Premier League winner is out of scope for this document. 

If you're looking for information on football teams, league standings, or player statistics, consider asking about those topics instead. You could also ask about the history of the Premier League or its current season.

— FINAL —
There is no information available to provide a response on the topic. The requested information about the Premier League winner is out of scope for this document. 

If you're looking for information on football teams, league standings, or player statistics, consider asking about those topics instead. You could also ask about the history of the Premier League or its current season.

Guard + Misc events: ['guard']


In [9]:
# --- 5) In-scope demo (split->core->stitch) ---
print("\n>>> In-scope demo")
ins_events = await chat_once(session_id, "What are Section 3 fees and define conversion factor?")
print("\nGuard + Misc events:", [e["type"] for e in ins_events])


>>> In-scope demo
The conversion factor for Section 3 fees can be found in the fee schedule. According to the provided information, the following codes are subject to this conversion factor: [id]NFS_2019-11015[/id]. The specific code mentioned is "The following codes are subject to the conversion factor from the section listed." (CORE_ANSWERS_JSON[1]["answer"]).

— FINAL —
The conversion factor for Section 3 fees can be found in the fee schedule. According to the provided information, the following codes are subject to this conversion factor: [id]NFS_2019-11015[/id]. The specific code mentioned is "The following codes are subject to the conversion factor from the section listed." (CORE_ANSWERS_JSON[1]["answer"]).

Guard + Misc events: ['guard', 'split', 'core_token', 'core_token', 'core_token', 'core_token', 'core_token', 'core_token', 'core_token', 'core_token', 'core_token', 'core_token', 'core_token', 'core_token', 'core_token', 'core_token', 'core_token', 'core_token', 'core_token

In [10]:
# --- 6) Memory summary check ---
from pathlib import Path
print("\nSummary:\n", store.summary_text())
print("\nTail turns:", [(t.role, t.content[:50]) for t in store.state.turns[-6:]])


Summary:
 

Tail turns: []


In [12]:
# --- 7) New session to verify isolation ---
print("\n>>> New session B: memory should be cold")
session_id_b = "sess7eB"
_ = await chat_once(session_id_b, "Define conversion factor.")
print("\nSummary (B):", SessionStore(mount.sessions_dir, DOC_ID, session_id_b).load_or_create().summary_text())


>>> New session B: memory should be cold
To define a conversion factor, we can use the provided JSON schema and create an object that conforms to it.

Here's an example of how to do this:
```
{
  "$defs": {
    "Citation": {
      ...
    }
  },
  "properties": {
    "conversion_factor": {
      "description": "Conversion factor value",
      "title": "Conversion Factor",
      "type": "number"
    }
  },
  "required": ["answer"]
}
```
This object defines a new property called `conversion_factor` with a type of `number`, which represents the conversion factor value.

To create an instance of this schema, we can use the provided top snippets as context. Since there is no direct mention of a conversion factor in the text, we'll have to make an educated guess based on the available information.

After analyzing the text, it appears that the conversion factor might be related to the "B+" grade mentioned in snippet #NFS_2019-11071. However, without more context or information about how the

In [4]:
# 1) Sanity: retriever hit sample (to see snippets are short/weak today)
hits = mount.retriever.search("Define conversion factor.")
for i, h in enumerate(hits[:6], 1):
    m = h.get("metadata", {})
    print(f"{i:>2}. id={h['id']} score={h['score']:.3f} heading={m.get('heading_path')}")
    print("   text:", (h["text"][:120] + "…") if len(h["text"])>120 else h["text"])

 1. id=NFS_2019-11219 score=0.685 heading=FEE SCHEDULE > CONVERSION FACTORS
   text: Description Value
 2. id=NFS_2019-11090 score=0.680 heading=FEE SCHEDULE > CONVERSION FACTORS
   text: Description
 3. id=NFS_2019-11851 score=0.677 heading=FEE SCHEDULE > CONVERSION FACTORS
   text: be calculated as follows:
 4. id=NFS_2019-11040 score=0.676 heading=FEE SCHEDULE > CONVERSION FACTORS
   text: Code Description
 5. id=NFS_2019-11242 score=0.672 heading=FEE SCHEDULE > CONVERSION FACTORS
   text: R
 6. id=NFS_2019-11243 score=0.672 heading=FEE SCHEDULE > CONVERSION FACTORS
   text: R


In [5]:
# 2) Core RAG non-stream (check that there's no schema/prose leakage anymore)
from packages.chat.core_rag import CoreRAG
core = CoreRAG(mount.retriever, mount.llm_core)

q = "What are the Section 3 fees?"
ans = await core.answer_one(q)
print("ANSWER:\n", ans.answer)
print("CITATIONS:", [c.id for c in ans.citations])

ANSWER:
 The Section 3 fees are not explicitly stated in the provided context snippets.
CITATIONS: ['NFS_2019-h-207']


In [6]:
# 3) Core RAG stream (tokens should be JSON-looking or clean text; no schema dump)
buf = ""
async for ev in core.astream_one("Define 'conversion factor' used in this document."):
    if ev["type"] == "core_token":
        t = ev["data"]
        if len(buf) < 200:  # print brief prefix
            print(t, end="")
        buf += t
    elif ev["type"] == "core_final":
        fin = ev["data"]
        print("\n\n— FINAL —")
        print("ANSWER:", fin.answer)
        print("CITATIONS:", [c.id for c in fin.citations])

{
  "answer": "a ratio or factor used to convert from one unit of measurement to another",
  "citations": [
    {
      "id": "NFS_2019-11015"
    },
    {
      "id": "NFS_2019-11851"
    }
  ],
  "notes

— FINAL —
ANSWER: a ratio or factor used to convert from one unit of measurement to another
CITATIONS: ['NFS_2019-11015', 'NFS_2019-11851']


In [7]:
# 4) Stitcher (in-scope): ensure no internal names show up
from packages.chat.output_llm import astitch_text_stream

fake_answers = [
    {"answer": "Section 3 fees are listed under the applicable fee schedule.", "citations":[{"id":"NFS_2019-11118"}], "notes":""},
    {"answer": "The conversion factor is defined in the Conversion Factors section.", "citations":[{"id":"NFS_2019-11040"}], "notes":""}
]

buf = ""
async for tok in astitch_text_stream(mount.llm_output, fake_answers, memory_summary="(none)"):
    buf += tok
print("STITCHED:\n", buf)
assert "CORE_ANSWERS" not in buf and "schema" not in buf.lower()

STITCHED:
 I can help with questions about Section 3 fees and conversion factors. 

For Section 3 fees, please refer to the applicable fee schedule.

For conversion factors, check the Conversion Factors section for definitions.


In [8]:
# 5) Stitcher (OOS): friendly handoff with reason; still clean prose
buf = ""
async for tok in astitch_text_stream(mount.llm_output, [], memory_summary="(none)", oos_reason="This document does not cover football results."):
    buf += tok
print("OOS STITCHED:\n", buf)
assert "CORE_ANSWERS" not in buf and "schema" not in buf.lower()

OOS STITCHED:
 I'm unable to provide information on football results as it is out of scope. If you're looking for sports statistics, I can help with general information or suggest follow-up questions such as "What type of statistics are you interested in?" or "Are you looking for historical data or current trends?".


In [9]:
# 6) Memory summary demo (yours looked empty because thresholds weren't met)
from packages.chat.memory import SessionStore, SummaryBuffer

sess_dir = mount.sessions_dir
store = SessionStore(sess_dir, DOC_ID).load_or_create()   # creates a fresh session
sid = store.session_id
print("Session file:", store.path)

# Append some turns so there's something to summarize
store.append("user", "Hi, I need the Section 3 fees and the conversion factor.")
store.append("assistant", "Let me check the relevant sections.")
store.append("user", "Also, what is the PDF publication year?")
store.append("assistant", "I'll include that too.")

# Force summarization now (no threshold wait)
sb = SummaryBuffer(store, mount.llm_output, summarize_every=2, char_budget=1000)
summary = await sb.asummarize_now()

print("\nSUMMARY:\n", summary)
print("\nTAIL TURNS:", [t.content for t in store.last_n(4)])

Session file: data\sessions\NFS_2019_75acb9189433.json

SUMMARY:
 The user requested information on Section 3 fees, a conversion factor, and the PDF publication year. The assistant agreed to provide this information.

TAIL TURNS: ['Hi, I need the Section 3 fees and the conversion factor.', 'Let me check the relevant sections.', 'Also, what is the PDF publication year?', "I'll include that too."]


In [10]:
# 7) End-to-end: OOS + In-scope via your router.run_turn (final tokens only)
from packages.chat.router import run_turn

async def run_and_collect(q: str):
    buf = ""
    async for ev in run_turn(DOC_ID, session_id=sid, user_query=q):
        if ev.get("type") == "final_token":
            buf += ev["data"]
    return buf

print(">>> OOS")
print(await run_and_collect("Who won the Premier League in 2021?"))

print("\n>>> In-scope")
print(await run_and_collect("Define conversion factor used in this document."))

>>> OOS
I can provide information on Section 3 fees and the PDF publication year. However, I couldn't find any information on a conversion factor related to this topic.

For Section 3 fees, please refer to [id] for more details. The PDF publication year is not available in my current database.

To further assist you, I can provide information on Premier League winners or offer general guidance on finding the required conversion factor.

>>> In-scope
Unfortunately, I couldn't find information on a conversion factor. However, you can refer to [NFS_2019-11219] for more details on Section 3 fees. The PDF publication year for your specific topic is not available in my database.
