In [1]:
# --- 0) bootstrap ---
import os, sys
from pathlib import Path

ROOT = Path.cwd()
while not (ROOT / "pyproject.toml").exists() and ROOT != ROOT.parent:
    ROOT = ROOT.parent
os.chdir(ROOT)
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

print("Project root:", ROOT)

DOC_ID = "NFS_2019"      # pick any mounted/ingested doc id
ARTIFACTS_ROOT = Path("data/artifacts")
MODELS_ROOT    = Path("data/models")

from packages.core_config.config import load_yaml
cfg = load_yaml("configs/providers.yaml")

print({
    "models": cfg.get("chat.models"),
    "splitter": cfg.get("chat.splitter"),
    "guardrails": cfg.get("chat.guardrails"),
    "structured_output": cfg.get("chat.structured_output"),
})

Project root: d:\IIT BBS\Job Resources\Business Optima\new-pdf-agent
{'models': {'intro': {'provider': 'ollama', 'base_url': 'http://127.0.0.1:11434', 'model': 'llama3.2:latest', 'temperature': 0.2, 'max_new_tokens': 256}, 'splitter': {'provider': 'ollama', 'base_url': 'http://127.0.0.1:11434', 'model': 'llama3.2:latest', 'temperature': 0.2, 'max_new_tokens': 256}, 'core': {'provider': 'ollama', 'base_url': 'http://127.0.0.1:11434', 'model': 'llama3.2:latest', 'temperature': 0.2, 'max_new_tokens': 512}, 'output': {'provider': 'ollama', 'base_url': 'http://127.0.0.1:11434', 'model': 'llama3.2:latest', 'temperature': 0.3, 'max_new_tokens': 512}}, 'splitter': {'max_questions': 6, 'allow_notes': True, 'prompt_path': 'configs/prompts/chat/splitter.txt'}, 'guardrails': {'model_role': 'intro', 'prompt_path': 'configs/prompts/chat/intro_guard.txt', 'max_input_chars': 4000, 'pii_block': False, 'pii_regex': ['[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}', '\\b(?:\\+?\\d{1,3}[-.\\s]?)?(?:\\(?\

In [2]:
# --- 1) mount chat (retriever + models + tools) ---
from packages.chat.router import mount_chat

mount = mount_chat(DOC_ID)
print("Sessions dir:", mount.sessions_dir)
print("Adapter used:", mount.profile.adapter_path)
print("Collection:  ", mount.profile.collection)
print("Tools:       ", [t.name for t in mount.tools])

# small helpers for optional profile info
sections_text = getattr(mount.profile, "sections_text", None) or "(outline not available)"
abbreviations_text = getattr(mount.profile, "abbreviations_text", None) or "(abbreviations not available)"
print("Sections (first 200 chars):", sections_text[:200] or "(n/a)")
print("Abbreviations (first 200):", abbreviations_text[:200] or "(n/a)")

Sessions dir: data\sessions
Adapter used: data\models\NFS_2019\20bb948f\adapter
Collection:   NFS_2019
Tools:        ['doc_retrieve', 'calc_run']
Sections (first 200 chars): (outline not available)
Abbreviations (first 200): (abbreviations not available)


  return ChatOllama(


In [3]:
# --- 2) Ollama health check for the chat roles (intro/splitter/output) ---
import requests, yaml

def check_roles_health(yaml_path="configs/providers.yaml", roles=("intro","splitter","output")):
    cfg = yaml.safe_load(open(yaml_path, "r", encoding="utf-8"))
    failed = []
    for role in roles:
        url = cfg["chat"]["models"][role]["base_url"]
        try:
            r = requests.get(f"{url}/api/tags", timeout=10)
            print(f"{role:9s} {url} -> {r.status_code}")
            if r.status_code != 200:
                failed.append((role, url, f"HTTP {r.status_code}"))
        except Exception as e:
            print(f"{role:9s} {url} -> ERROR: {type(e).__name__}: {e}")
            failed.append((role, url, str(e)))
    return failed

fails = check_roles_health()
if fails:
    raise SystemExit(
        "Ollama health check failed for roles:\n" +
        "\n".join(f" - {role} at {url}: {err}" for role, url, err in fails) +
        "\n\nQuick fixes:\n"
        "  1) Ensure `ollama serve` is running on that port\n"
        "  2) Make YAML base_url match the running port (e.g., http://127.0.0.1:11434)\n"
    )

intro     http://127.0.0.1:11434 -> 200
splitter  http://127.0.0.1:11434 -> 200
output    http://127.0.0.1:11434 -> 200


In [4]:
# --- 3) guardrails smoke ---
from packages.chat.guardrails import route_scope

tests = [
    "What are the Section 3 fees?",
    "Compute the reimbursement using the table in section N 25 8 28 3 for code 12345.",
    "Who won the Premier League in 2021?",
    "Define 'conversion factor' used in this document.",
    "What's the PDF version and publication year?",
]

for q in tests:
    r = route_scope(
        mount.llm_intro,
        doc_id=mount.profile.doc_id,
        sections_text=sections_text,
        abbreviations_text=abbreviations_text,
        user_query=q,
    )
    decision = r["decision"]
    print(f"\nQ: {q}\n→ prefilter: {r['prefilter']}\n→ decision : {decision}")


Q: What are the Section 3 fees?
→ prefilter: {'ok': True, 'reason': 'ok'}
→ decision : {'in_scope': True, 'intent': 'glossary', 'reason': '', 'rewritten': 'What are the Section 3 fees?'}

Q: Compute the reimbursement using the table in section N 25 8 28 3 for code 12345.
→ prefilter: {'ok': True, 'reason': 'ok'}
→ decision : {'in_scope': True, 'intent': 'table', 'reason': 'The reimbursement can be computed using the table in section 25 8 28 3 for code 12345.', 'rewritten': 'Compute the reimbursement using the table in section 25 8 28 3 for code 12345.'}

Q: Who won the Premier League in 2021?
→ prefilter: {'ok': True, 'reason': 'ok'}
→ decision : {'in_scope': False, 'intent': 'other', 'reason': 'Premier League winner not covered in this document', 'rewritten': ''}

Q: Define 'conversion factor' used in this document.
→ prefilter: {'ok': True, 'reason': 'ok'}
→ decision : {'in_scope': True, 'intent': 'glossary', 'reason': '', 'rewritten': "Define 'conversion factor' used in this docume

In [5]:
# --- 4) splitter smoke ---
from packages.chat.splitter import split_and_clean

multi = [
    "What are Section 3 fees and also define conversion factor.",
    "Compute reimbursement from section N 25 8 28 3 for code 12345; also, what’s the PDF version?",
    "Only this: conversion factor definition.",
    "Three things: A) Section 3, B) formula for X, C) any glossary of N 25 8 28 3?",
    "What are the fees; and the conversion factor; and publication year 2019?"
]

for q in multi:
    plan = split_and_clean(mount.llm_splitter, q)
    print("\nQ:", q)
    print("→", [f"{x.id}: {x.text}" for x in plan.questions], "| notes:", plan.notes)


Q: What are Section 3 fees and also define conversion factor.
→ ['q1: What are Section 3 fees?', 'q2: Define conversion factor.'] | notes: 

Q: Compute reimbursement from section N 25 8 28 3 for code 12345; also, what’s the PDF version?
→ ['q1: What is the reimbursement for code 12345?', 'q2: What is the PDF version?'] | notes: 

Q: Only this: conversion factor definition.
→ ['q1: What is the conversion factor definition?'] | notes: 

Q: Three things: A) Section 3, B) formula for X, C) any glossary of N 25 8 28 3?
→ ['q1: What is the section number?', 'q2: What is the formula for X?', 'q3: Can you provide a glossary of N 25 8 28 3?'] | notes: 

Q: What are the fees; and the conversion factor; and publication year 2019?
→ ['q1: What are the fees?', 'q2: What is the conversion factor?', 'q3: What was the publication year in 2019?'] | notes: 


In [6]:
# --- 5) retriever + minimal tool demo ---
import json

hits = mount.retriever.search("What are the Section 3 fees?")
print("Top", len(hits), "hits")
for i, h in enumerate(hits, 1):
    m = h.get("metadata", {})
    print(f"{i:>2}. id={h['id']} score={h['score']:.3f} page={m.get('page')}, heading={m.get('heading_path')}")

# A tiny tool-like function (no new files): returns JSON list for the question
def retrieve_context_json(question: str, top_k: int = 6) -> str:
    snips = mount.retriever.search(question)[:top_k]
    return json.dumps(snips, ensure_ascii=False)

print("\nTool JSON sample:", retrieve_context_json("Define conversion factor.")[:240], "...")

Top 10 hits
 1. id=NFS_2019-11267 score=0.650 page=None, heading=FEE SCHEDULE > N 25 8 28 3
 2. id=NFS_2019-h-207 score=0.642 page=None, heading=FEE SCHEDULE > 3.76 BR XXX XXX
 3. id=NFS_2019-11118 score=0.642 page=None, heading=FEE SCHEDULE > CONVERSION FACTORS
 4. id=NFS_2019-11334 score=0.640 page=None, heading=FEE SCHEDULE > N 25 8 28 3
 5. id=NFS_2019-11404 score=0.640 page=None, heading=FEE SCHEDULE > N 25 8 28 3
 6. id=NFS_2019-488 score=0.640 page=None, heading=FEE SCHEDULE > GENERAL GROUND RULES
 7. id=NFS_2019-h-3 score=0.640 page=None, heading=FEE SCHEDULE
 8. id=NFS_2019-10230 score=0.639 page=None, heading=FEE SCHEDULE > MEDICINE GROUND RULES
 9. id=NFS_2019-11376 score=0.639 page=None, heading=FEE SCHEDULE > N 25 8 28 3
10. id=NFS_2019-11403 score=0.637 page=None, heading=FEE SCHEDULE > N 25 8 28 3

Tool JSON sample: [{"id": "NFS_2019-11219", "text": "Description Value", "metadata": {"block_type": "para", "page_end": 36, "page_start": 36, "heading_path": "FEE SCHEDULE > C

In [7]:
# --- 6) Core RAG: non-stream answer_one ---
from packages.chat.core_rag import CoreRAG  # ensure this import sees the new file
core = CoreRAG(mount.retriever, mount.llm_core)

q = "What are the Section 3 fees?"
ans = await core.answer_one(q)
print("Answer:", ans.answer[:400], "...")
print("Citations:", [c.id for c in ans.citations])

Answer: {"answer": "Section 3 fees are not explicitly defined in the provided context.", "citations": [{"id": "NFS_2019-11267", "text": "Category III Codes", "metadata": {"page_start": 5, "heading_path": "FEE SCHEDULE > N 25 8 28 3", "page_end": 5, "doc_id": "NFS_2019", "block_type": "para"}, "score": 0.6500933298345947}, {"id": "NFS_2019-h-207", "text": "3.76 BR XXX XXX", "metadata": {"block_type": "head ...
Citations: ['NFS_2019-11267', 'NFS_2019-h-207']


In [8]:
# DEBUG: show full retrieved snippets for a question
from pprint import pprint
q = "What are the Section 3 fees?"

snips, cits = await core._get_ctx(q, limit=8)   # internal helper; fine for notebooks
print("Retrieved", len(snips), "snippets")
for i, s in enumerate(snips, 1):
    m = s.get("metadata", {})
    print(f"\n[{i}] id={s['id']} score={s['score']:.3f}")
    print("heading:", m.get("heading_path"))
    print("page:", m.get("page"))
    print("-"*80)
    print(s["text"])   # FULL TEXT (may be long)

Retrieved 8 snippets

[1] id=NFS_2019-11267 score=0.650
heading: FEE SCHEDULE > N 25 8 28 3
page: None
--------------------------------------------------------------------------------
Category III Codes

[2] id=NFS_2019-h-207 score=0.642
heading: FEE SCHEDULE > 3.76 BR XXX XXX
page: None
--------------------------------------------------------------------------------
3.76 BR XXX XXX

[3] id=NFS_2019-11118 score=0.642
heading: FEE SCHEDULE > CONVERSION FACTORS
page: None
--------------------------------------------------------------------------------
Category III Codes

[4] id=NFS_2019-11334 score=0.640
heading: FEE SCHEDULE > N 25 8 28 3
page: None
--------------------------------------------------------------------------------
Description

[5] id=NFS_2019-11404 score=0.640
heading: FEE SCHEDULE > N 25 8 28 3
page: None
--------------------------------------------------------------------------------
Description

[6] id=NFS_2019-488 score=0.640
heading: FEE SCHEDULE > GENERAL GROUND RUL

In [9]:
# DEBUG: show the exact prompt the model got (truncated for display)
from langchain.output_parsers import PydanticOutputParser
from packages.schemas.chat import CoreAnswer
import json

snips, cits = await core._get_ctx(q, limit=8)
ctx_json = json.dumps(snips, ensure_ascii=False)
parser = PydanticOutputParser(pydantic_object=CoreAnswer)
prompt = core._prompt_tmpl.format(
    question=q,
    context_json=ctx_json,
    format_instructions=parser.get_format_instructions()
)

print(prompt[:3000])  # truncate to avoid flooding the notebook

You are a careful, grounded assistant. Use ONLY the provided CONTEXT to answer.
Cite snippet ids you used.

Return strictly in this JSON schema:
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"Citation": {"properties": {"id": {"description": "Snippet id from retriever.", "title": "Id", "type": "string"}, "page": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "Page number if available.", "title": "Page"}, "heading_path": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Full heading path if available.", "title":

In [10]:
# DEBUG: raw model output (non-stream) before parsing
msg = await mount.llm_core.ainvoke(prompt)
raw = getattr(msg, "content", str(msg))
print(raw)

{"citations": [{"id": "NFS_2019-11267", "text": "Category III Codes", "metadata": {"heading_path": "FEE SCHEDULE > N 25 8 28 3", "doc_id": "NFS_2019", "page_start": 5, "block_type": "para", "page_end": 5}, "score": 0.6500933298345947}, {"id": "NFS_2019-11334", "text": "Description", "metadata": {"page_start": 8, "block_type": "para", "doc_id": "NFS_2019", "heading_path": "FEE SCHEDULE > N 25 8 28 3", "page_end": 8}, "score": 0.6399046284720062}, {"id": "NFS_2019-11404", "text": "Description", "metadata": {"page_start": 8, "heading_path": "FEE SCHEDULE > N 25 8 28 3", "block_type": "para", "doc_id": "NFS_2019", "page_end": 8}, "score": 0.6399046284720062}, {"id": "NFS_2019-488", "text": "97", "metadata": {"page_end": 7, "block_type": "para", "heading_path": "FEE SCHEDULE > GENERAL GROUND RULES", "page_start": 7, "doc_id": "NFS_2019"}, "score": 0.6397812271530076}, {"id": "NFS_2019-10230", "text": "3", "metadata": {"heading_path": "FEE SCHEDULE > MEDICINE GROUND RULES", "doc_id": "NFS_20

In [11]:
# DEBUG: see exactly what CoreRAG parsed into CoreAnswer
parsed = await core.answer_one(q)
print(parsed)

answer='{"answer": "Section 3 fees are not explicitly stated in the provided context.", "citations": [{"id": "NFS_2019-11267", "text": "Category III Codes", "metadata": {"page_end": 5, "doc_id": "NFS_2019", "page_start": 5, "heading_path": "FEE SCHEDULE > N 25 8 28 3", "block_type": "para"}, "score": 0.6500933298345947}, {"id": "NFS_2019-h-207", "text": "3.76 BR XXX XXX", "metadata": {"doc_id": "NFS_2019", "page_end": 14, "heading_path": "FEE SCHEDULE > 3.76 BR XXX XXX", "page_start": 14, "block_type": "heading"}, {"id": "NFS_2019-11118", "text": "Category III Codes", "metadata": {"block_type": "para", "page_start": 5, "page_end": 5, "heading_path": "FEE SCHEDULE > CONVERSION FACTORS", "doc_id": "NFS_2019"}, "score": 0.6415055843189101}, {"id": "NFS_2019-11334", "text": "Description", "metadata": {"page_start": 8, "page_end": 8, "heading_path": "FEE SCHEDULE > N 25 8 28 3", "block_type": "para", "doc_id": "NFS_2019"}, "score": 0.6399046284720062}, {"id": "NFS_2019-11404", "text": "Desc

In [12]:
# --- 7) Core RAG: streaming answer_one ---
from packages.chat.core_rag import CoreRAG  # (re-import after file change)
core = CoreRAG(mount.retriever, mount.llm_core)

q = "Define 'conversion factor' used in this document."
buf = ""
async for ev in core.astream_one(q):
    if ev.get("type") == "core_token":
        tok = ev.get("data", "")
        buf += tok
        if len(buf) < 200 and tok.strip():
            print(tok, end="", flush=True)
    elif ev.get("type") == "core_final":
        fin = ev["data"]
        print("\n\n— final —")
        print("Answer:", fin.answer[:400], "...")
        print("Citations:", [c.id for c in fin.citations])

{"id": "CONV_FACTOR", "page": null, "heading_path": "FEE SCHEDULE > CONVERSION FACTORS", "table_id": null, "score": null, "answer": "The conversion factor is calculated as the ratio of the amount of

— final —
Answer: The conversion factor is calculated as the ratio of the amount of goods or services received to the value of those goods or services. ...
Citations: ['NFS_2019-11219', 'NFS_2019-11040']


In [13]:
# --- 8) Core RAG: batch parallel answers ---
qs = [
    "What's the PDF version and publication year?",
    "Compute reimbursement for code 12345 from N 25 8 28 3."
]

batch = await core.answer_batch(qs)
for q, a in zip(qs, batch):
    print("\nQ:", q)
    print("A:", a.answer[:300], "...")
    print("Citations:", [c.id for c in a.citations])


Q: What's the PDF version and publication year?
A: {"answer": "This publication is made available with the understanding that\nthe publisher is not engaged in rendering legal and other\nservices that require a professional license.", "citations": [{"id": "NFS_2019-12256", "text": "All rights reserved. Printed in the United States of America. No part ...
Citations: ['NFS_2019-12256', 'NFS_2019-12']

Q: Compute reimbursement for code 12345 from N 25 8 28 3.
A: Reimbursement for code 12345 from N 25 8 28 3 is not explicitly stated in the provided context. ...
Citations: ['NFS_2019-11403']


In [14]:
# --- 9) sanity asserts (won't crash the run if they fail; tweak as needed) ---
try:
    # a) guard should mark a clearly unrelated question out-of-scope
    g = route_scope(mount.llm_intro,
                    doc_id=mount.profile.doc_id,
                    sections_text=sections_text,
                    abbreviations_text=abbreviations_text,
                    user_query="Who won the Premier League in 2021?")
    assert g["decision"]["in_scope"] is False

    # b) splitter should produce 2+ items for multi-intent
    sp = split_and_clean(mount.llm_splitter, "Fees and conversion factor?")
    assert len(sp.questions) >= 1

    # c) core answer should return citations list (even if fallback added)
    a1 = await core.answer_one("What are the Section 3 fees?")
    assert getattr(a1, "citations", None) is not None and len(a1.citations) >= 1

    print("Sanity assertions passed")
except AssertionError as e:
    print("Sanity assert failed:", e)

Sanity assertions passed


In [15]:
# --- 10) optional: tiny pings (safe) ---
def safe_ping(llm, prompt):
    try:
        return llm.invoke(prompt).content[:140]
    except Exception as e:
        return f"[unavailable: {type(e).__name__}]"

print("intro   :", safe_ping(mount.llm_intro, "Say 'ready'."))
print("splitter:", safe_ping(mount.llm_splitter, "Split: A and B?"))
print("core    :", safe_ping(mount.llm_core, "Return exactly: {\"answer\":\"ok\",\"citations\":[]}"))
print("output  :", safe_ping(mount.llm_output, "Summarize: ready -> ok"))

intro   : Ready.
splitter: I don't have enough information to determine whether you're referring to the movie "Split" or a mathematical concept. Could you please provi
core    : def return_citations():
    return {"answer": "ok", "citations": []}

print(return_citations())
output  : It seems like you're indicating that something is complete or confirmed. "Ready" implies preparation or anticipation, while "ok" indicates a
