In [1]:
# --- 0) bootstrap (same as 7.1) ---
import os, sys
from pathlib import Path
ROOT = Path.cwd()
while not (ROOT / "pyproject.toml").exists() and ROOT != ROOT.parent:
    ROOT = ROOT.parent
os.chdir(ROOT)
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))
print("Project root:", ROOT)

DOC_ID = "NFS_2019"
ARTIFACTS_ROOT = Path("data/artifacts")

Project root: d:\IIT BBS\Job Resources\Business Optima\new-pdf-agent


In [2]:
# --- 1) mount chat (re-uses your 7.1 code) ---
from packages.chat.router import mount_chat
mount = mount_chat(DOC_ID)
print("Mounted:", mount.profile.doc_id, "collection:", mount.profile.collection)

  return ChatOllama(
The 8-bit optimizer is not available on your device, only available on CUDA for now.


Mounted: NFS_2019 collection: NFS_2019


In [3]:
# --- 2) Build light 'sections' and 'abbreviations' strings for intro guard ---
# Heuristic fallback: gather distinct heading_path values from a quick search.
hits = mount.retriever.search("table of contents OR introduction OR overview OR definitions")
seen = set()
sections_list = []
for h in hits:
    hp = (h.get("metadata", {}) or {}).get("heading_path")
    if hp and hp not in seen:
        sections_list.append(f"- {hp}")
        seen.add(hp)
    if len(sections_list) >= 8:
        break

sections_text = "\n".join(sections_list) or "- (no headings found in quick probe)"

# Abbreviations: if you have a stored glossary, load it here; else simple fallback.
abbrev_guess = mount.retriever.search("abbreviations OR glossary OR definitions")
abbr_seen, abbr_list = set(), []
for h in abbrev_guess:
    hp = (h.get("metadata", {}) or {}).get("heading_path")
    if hp and ("GLOSSARY" in hp.upper() or "ABBREVIATION" in hp.upper()):
        if hp not in abbr_seen:
            abbr_list.append(f"- {hp}")
            abbr_seen.add(hp)
    if len(abbr_list) >= 6:
        break

abbreviations_text = "\n".join(abbr_list) or "- (no glossary section found)"
print("sections:\n", sections_text, "\n\nabbreviations:\n", abbreviations_text)

sections:
 - FEE SCHEDULE > COPYRIGHT
- FEE SCHEDULE > FILING NOTICE
- FEE SCHEDULE > AMERICAN MEDICAL ASSOCIATION NOTICE 

abbreviations:
 - (no glossary section found)


In [4]:
# --- 3) Guardrail call (structured) ---
from packages.chat.guardrails import route_scope

def try_guard(q: str):
    result = route_scope(
        mount.llm_intro,
        doc_id=mount.profile.doc_id,
        sections_text=sections_text,
        abbreviations_text=abbreviations_text,
        user_query=q,
    )
    return result

tests = [
    "What are the Section 3 fees?",
    "Compute the reimbursement using the table in section N 25 8 28 3 for code 12345.",
    "Who won the Premier League in 2021?",  # out-of-scope
    "Define 'conversion factor' used in this document.",
    "What's the PDF version and publication year?",
]

for q in tests:
    r = try_guard(q)
    print(f"\nQ: {q}\n→ prefilter: {r['prefilter']}\n→ decision: {r['decision']}")


Q: What are the Section 3 fees?
→ prefilter: {'ok': True, 'reason': 'ok'}
→ decision: {'in_scope': True, 'intent': 'table', 'reason': 'Section 3 fees are listed in the FEE SCHEDULE section of the document.', 'rewritten': 'What are the Section 3 fees?'}

Q: Compute the reimbursement using the table in section N 25 8 28 3 for code 12345.
→ prefilter: {'ok': True, 'reason': 'ok'}
→ decision: {'in_scope': True, 'intent': 'table', 'reason': 'Section N 25 8 28 3 contains the reimbursement rates for code 12345.', 'rewritten': 'Compute the reimbursement using the table in section N 25 8 28 3 for code 12345.'}

Q: Who won the Premier League in 2021?
→ prefilter: {'ok': True, 'reason': 'ok'}
→ decision: {'in_scope': False, 'intent': 'other', 'reason': 'This document does not contain information about sports or specific events.', 'rewritten': 'Who won the Premier League in 2021?'}

Q: Define 'conversion factor' used in this document.
→ prefilter: {'ok': True, 'reason': 'ok'}
→ decision: {'in_sco

In [6]:
# --- 4) Assertions for CI-ish sanity ---
outcomes = [try_guard("Who won the Premier League in 2021?"),
            try_guard("What are the Section 3 fees?")]

assert outcomes[0]["decision"]["in_scope"] is False, "Sports Q should be out-of-scope"
assert outcomes[1]["decision"]["in_scope"] is True, "Section 3 fees should be in-scope"
print("Basic guardrail assertions passed.")

AssertionError: Sports Q should be out-of-scope