In [1]:
# --- 0) bootstrap ---
import os, sys
from pathlib import Path

ROOT = Path.cwd()
while not (ROOT / "pyproject.toml").exists() and ROOT != ROOT.parent:
    ROOT = ROOT.parent
os.chdir(ROOT)
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

print("Project root:", ROOT)

DOC_ID = os.environ.get("PDF_AGENT_DOC_ID", "NFS_2019")   # override via env if needed
ARTIFACTS_ROOT = Path("data/artifacts")
MODELS_ROOT    = Path("data/models")

Project root: d:\IIT BBS\Job Resources\Business Optima\new-pdf-agent


In [2]:
from packages.core_config.config import load_yaml
import pprint, yaml

cfg = load_yaml("configs/providers.yaml")
snap = {
    "structured_output": cfg.get("chat.structured_output", {}),
    "splitter": cfg.get("chat.splitter", {}),
    "guardrails": cfg.get("chat.guardrails", {}),
    "models": {
        "intro": cfg.get("chat.models.intro", {}),
        "splitter": cfg.get("chat.models.splitter", {}),
        "output": cfg.get("chat.models.output", {}),
    }
}
pprint.pprint(snap)

# Convenience vars
SO_PREFER = bool(cfg.get("chat.structured_output.prefer_native", True))
SO_METHOD = str(cfg.get("chat.structured_output.method", "json_schema"))

{'guardrails': {'blocked_regex': ['(?i)hack|exploit|payload'],
                'max_input_chars': 4000,
                'model_role': 'intro',
                'pii_block': False,
                'pii_regex': ['[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}',
                              '\\b(?:\\+?\\d{1,3}[-.\\s]?)?(?:\\(?\\d{3}\\)?[-.\\s]?)?\\d{3}[-.\\s]?\\d{4}\\b'],
                'prompt_path': 'configs/prompts/chat/intro_guard.txt'},
 'models': {'intro': {'base_url': 'http://127.0.0.1:11434',
                      'max_new_tokens': 256,
                      'model': 'llama3.2:latest',
                      'provider': 'ollama',
                      'temperature': 0.2},
            'output': {'base_url': 'http://127.0.0.1:11434',
                       'max_new_tokens': 512,
                       'model': 'llama3.2:latest',
                       'provider': 'ollama',
                       'temperature': 0.3},
            'splitter': {'base_url': 'http://127.0.0.1:11434',
   

In [3]:
import requests

def check_roles_health(yaml_path="configs/providers.yaml", roles=("intro","splitter","output")):
    cfg = yaml.safe_load(open(yaml_path, "r", encoding="utf-8"))
    failed = []
    for role in roles:
        url = cfg["chat"]["models"][role]["base_url"]
        try:
            r = requests.get(f"{url}/api/tags", timeout=10)
            print(f"{role:9s} {url} -> {r.status_code}")
            if r.status_code != 200:
                failed.append((role, url, f"HTTP {r.status_code}"))
        except Exception as e:
            print(f"{role:9s} {url} -> ERROR: {type(e).__name__}: {e}")
            failed.append((role, url, str(e)))
    return failed

fails = check_roles_health()
if fails:
    raise SystemExit(
        "Ollama health check failed for roles:\n" +
        "\n".join(f" - {role} at {url}: {err}" for role, url, err in fails) +
        "\n\nQuick fixes:\n"
        "  1) Ensure `ollama serve` is running on that port\n"
        "  2) Make YAML base_url match the running port (e.g., http://127.0.0.1:11434)\n"
        "  3) In this terminal, you can set:\n"
        "       set OLLAMA_HOST=http://127.0.0.1:11434   (cmd)\n"
        "       $env:OLLAMA_HOST = 'http://127.0.0.1:11434'   (PowerShell)\n"
    )

intro     http://127.0.0.1:11434 -> 200
splitter  http://127.0.0.1:11434 -> 200
output    http://127.0.0.1:11434 -> 200


In [4]:
from packages.chat.router import mount_chat

mount = mount_chat(DOC_ID)  # uses providers.yaml & profile/auto-discovery
print("Sessions dir:", mount.sessions_dir)
print("Adapter used:", mount.profile.adapter_path)
print("Collection:  ", mount.profile.collection)

  return ChatOllama(
The 8-bit optimizer is not available on your device, only available on CUDA for now.


Sessions dir: data\sessions
Adapter used: data\models\NFS_2019\20bb948f\adapter
Collection:   NFS_2019


In [5]:
def _load_artifact_text(doc_id: str, fname: str) -> str:
    # try a few common places; fall back empty
    candidates = [
        ARTIFACTS_ROOT / doc_id / fname,
        ARTIFACTS_ROOT / doc_id / "meta" / fname,
        ARTIFACTS_ROOT / doc_id / "notes" / fname,
    ]
    for p in candidates:
        if p.exists():
            return p.read_text(encoding="utf-8").strip()
    return ""

sections_text = _load_artifact_text(DOC_ID, "outline.txt") or "(outline not available)"
abbreviations_text = _load_artifact_text(DOC_ID, "abbreviations.txt") or "(abbreviations not available)"

print("Sections (first 200 chars):", sections_text[:200].replace("\n"," ") + ("..." if len(sections_text)>200 else ""))
print("Abbreviations (first 200 chars):", abbreviations_text[:200].replace("\n"," ") + ("..." if len(abbreviations_text)>200 else ""))

Sections (first 200 chars): (outline not available)
Abbreviations (first 200 chars): (abbreviations not available)


In [6]:
from packages.chat.guardrails import route_scope

tests_guard = [
    "What are the Section 3 fees?",
    "Compute the reimbursement using the table in section N 25 8 28 3 for code 12345.",
    "Who won the Premier League in 2021?",
    "Define 'conversion factor' used in this document.",
    "What's the PDF version and publication year?",
]

def try_guard(q: str):
    return route_scope(
        mount.llm_intro,
        doc_id=DOC_ID,
        sections_text=sections_text,
        abbreviations_text=abbreviations_text,
        user_query=q,
    )

import json
for q in tests_guard:
    r = try_guard(q)
    print("\nQ:", q)
    print("→ prefilter:", r["prefilter"])
    print("→ decision :", json.dumps(r["decision"], ensure_ascii=False))


Q: What are the Section 3 fees?
→ prefilter: {'ok': True, 'reason': 'ok'}
→ decision : {"in_scope": true, "intent": "glossary", "reason": "", "rewritten": "What are the Section 3 fees?"}

Q: Compute the reimbursement using the table in section N 25 8 28 3 for code 12345.
→ prefilter: {'ok': True, 'reason': 'ok'}
→ decision : {"in_scope": true, "intent": "table", "reason": "The reimbursement can be computed using the table in section N 25 8 28 3 for code 12345.", "rewritten": "Compute the reimbursement using the table in section N 25 8 28 3 for code 12345."}

Q: Who won the Premier League in 2021?
→ prefilter: {'ok': True, 'reason': 'ok'}
→ decision : {"in_scope": false, "intent": "other", "reason": "Premier League winner not covered in this document", "rewritten": ""}

Q: Define 'conversion factor' used in this document.
→ prefilter: {'ok': True, 'reason': 'ok'}
→ decision : {"in_scope": true, "intent": "glossary", "reason": "", "rewritten": "Define 'conversion factor' used in this do

In [7]:
from packages.chat.splitter import split_and_clean

tests_splitter = [
    "What are Section 3 fees and also define conversion factor.",
    "Compute reimbursement from section N 25 8 28 3 for code 12345; also, what’s the PDF version?",
    "Only this: conversion factor definition.",
    "Three things: A) Section 3, B) formula for X, C) any glossary of N 25 8 28 3?",
    "What are the fees; and the conversion factor; and publication year 2019?",
]

def pretty_plan(plan):
    return [f"{q.id}: {q.text}" for q in plan.questions], plan.notes

for q in tests_splitter:
    plan = split_and_clean(mount.llm_splitter, q)
    items, notes = pretty_plan(plan)
    print("\nQ:", q)
    print("→", items, "| notes:", notes or "")


Q: What are Section 3 fees and also define conversion factor.
→ ['q1: What are Section 3 fees?', 'q2: Define conversion factor.'] | notes: 

Q: Compute reimbursement from section N 25 8 28 3 for code 12345; also, what’s the PDF version?
→ ['q1: What is the reimbursement for section N 25 8 28 3?', 'q2: What is the PDF version?'] | notes: 

Q: Only this: conversion factor definition.
→ ['q1: What is the conversion factor definition?'] | notes: 

Q: Three things: A) Section 3, B) formula for X, C) any glossary of N 25 8 28 3?
→ ['q1: What section is being referred to?', 'q2: What is the formula for X?', 'q3: Can you provide a glossary of N 25 8 28 3?'] | notes: 

Q: What are the fees; and the conversion factor; and publication year 2019?
→ ['q1: What are the fees?', 'q2: What is the conversion factor?', 'q3: What was the publication year in 2019?'] | notes: 


In [8]:
# a simple out-of-scope check and simple split size check
g1 = try_guard("Who won the Premier League in 2021?")
assert g1["decision"] and (g1["decision"]["in_scope"] is False), "Sports Q should be out-of-scope"

plan = split_and_clean(mount.llm_splitter, "A) Section 3 fees; B) conversion factor; C) PDF version?")
assert 1 <= len(plan.questions) <= int(cfg.get("chat.splitter.max_questions", 6)), "Split count violates config bound"

print("Sanity assertions passed")

Sanity assertions passed


In [9]:
def guard_then_split(user_query: str):
    # 1) guard
    g = route_scope(
        mount.llm_intro,
        doc_id=DOC_ID,
        sections_text=sections_text,
        abbreviations_text=abbreviations_text,
        user_query=user_query,
    )
    dec = g["decision"]
    if not dec or not dec.get("in_scope", False):
        return {
            "prefilter": g["prefilter"],
            "decision": dec,
            "split": None,
            "note": "Out-of-scope or blocked; stop early (no retriever / no tools)."
        }

    # 2) choose the text we pass downstream (rewritten if present)
    rewritten = (dec.get("rewritten") or user_query).strip()

    # 3) split (the cleaner is baked into split_and_clean)
    plan = split_and_clean(mount.llm_splitter, rewritten)

    return {"prefilter": g["prefilter"], "decision": dec, "split": plan}

# demo
pipe_tests = [
    "What are Section 3 fees and define conversion factor.",
    "Who won the Premier League in 2021?",
    "Compute reimbursement for code 12345 from N 25 8 28 3 and tell the PDF version.",
]

for q in pipe_tests:
    out = guard_then_split(q)
    print("\nQ:", q)
    print("decision:", out["decision"])
    if out["split"]:
        print("split:", [f"{s.id}:{s.text}" for s in out["split"].questions], "| notes:", out["split"].notes)
    else:
        print(out["note"])


Q: What are Section 3 fees and define conversion factor.
decision: {'in_scope': True, 'intent': 'glossary', 'reason': '', 'rewritten': 'define conversion factor for Section 3 fees'}
split: ['q1:What is the conversion factor for Section 3 fees?', 'q2:How to calculate Section 3 fees conversion factor?'] | notes: 

Q: Who won the Premier League in 2021?
decision: {'in_scope': False, 'intent': 'other', 'reason': 'Premier League winner not covered in this document', 'rewritten': 'Who won the Premier League?'}
Out-of-scope or blocked; stop early (no retriever / no tools).

Q: Compute reimbursement for code 12345 from N 25 8 28 3 and tell the PDF version.
decision: {'in_scope': True, 'intent': 'meta', 'reason': 'The document provides information about the PDF version.', 'rewritten': 'What is the PDF version of this document?'}
split: ['q1:Is there a PDF version of this document?', 'q2:Where can I find the PDF version of this document?'] | notes: 


In [10]:
def safe_ping(llm, prompt):
    try:
        return llm.invoke(prompt).content[:140]
    except Exception as e:
        return f"[unavailable: {type(e).__name__}]"

print("intro   :", safe_ping(mount.llm_intro, "Say 'ready'."))
print("splitter:", safe_ping(mount.llm_splitter, "Split: A and B?"))
print("output  :", safe_ping(mount.llm_output, "Summarize 'ready -> ok'."))

intro   : Ready.
splitter: I don't have enough information to accurately determine whether you're referring to a specific context or topic related to "Split" (A and B)
output  : I couldn't find any information on "Ready -> OK". Could you provide more context or clarify what this term refers to? I'll do my best to hel
