In [1]:
# --- 0) bootstrap ---
import os, sys
from pathlib import Path

ROOT = Path.cwd()
while not (ROOT / "pyproject.toml").exists() and ROOT != ROOT.parent:
    ROOT = ROOT.parent
os.chdir(ROOT)
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))
print("Project root:", ROOT)

DOC_ID = "NFS_2019"      # <- change doc here to mount any ingested PDF
ARTIFACTS_ROOT = Path("data/artifacts")
MODELS_ROOT    = Path("data/models")

Project root: d:\IIT BBS\Job Resources\Business Optima\new-pdf-agent


In [2]:
# --- 1) resolve run_id from YAML (optional pin) and build adapter path hint ---
from packages.core_config.config import load_yaml

cfg = load_yaml("configs/providers.yaml")
yaml_run_id = cfg.get("chat.models.core.run_id")
print("YAML run_id:", yaml_run_id)

def candidate_adapter(doc_id: str, run_id: str) -> Path:
    # prefer adapter/ over hf_out/
    a = MODELS_ROOT / doc_id / run_id / "adapter"
    if a.exists():
        return a
    return MODELS_ROOT / doc_id / run_id / "hf_out"

cand = candidate_adapter(DOC_ID, yaml_run_id) if yaml_run_id else None
print("Candidate adapter:", cand)

YAML run_id: 20bb948f
Candidate adapter: data\models\NFS_2019\20bb948f\adapter


In [3]:
# --- 2) Ollama health check for the chat roles (intro/splitter/output) ---
import requests, yaml

def check_roles_health(yaml_path="configs/providers.yaml", roles=("intro","splitter","output")):
    cfg = yaml.safe_load(open(yaml_path, "r", encoding="utf-8"))
    failed = []
    for role in roles:
        url = cfg["chat"]["models"][role]["base_url"]
        try:
            r = requests.get(f"{url}/api/tags", timeout=10)
            print(f"{role:9s} {url} -> {r.status_code}")
            if r.status_code != 200:
                failed.append((role, url, f"HTTP {r.status_code}"))
        except Exception as e:
            print(f"{role:9s} {url} -> ERROR: {type(e).__name__}: {e}")
            failed.append((role, url, str(e)))
    return failed

fails = check_roles_health()
if fails:
    raise SystemExit(
        "Ollama health check failed for roles:\n" +
        "\n".join(f" - {role} at {url}: {err}" for role, url, err in fails) +
        "\n\nQuick fixes:\n"
        "  1) Ensure 'ollama serve' is running on that port\n"
        "  2) Make YAML base_url match the running port (e.g., http://127.0.0.1:11434)\n"
        "  3) In *this* terminal, you can also set:\n"
        "       set OLLAMA_HOST=http://127.0.0.1:11434   (cmd)\n"
        "       $env:OLLAMA_HOST = 'http://127.0.0.1:11434'   (PowerShell)\n"
    )

intro     http://127.0.0.1:11434 -> 200
splitter  http://127.0.0.1:11434 -> 200
output    http://127.0.0.1:11434 -> 200


In [4]:
# --- 3) inspect what loader will pick (even without profile.json) ---
from packages.chat.router import load_profile

probe = load_profile(DOC_ID, artifacts_root=ARTIFACTS_ROOT)  # auto-discovers under data/models/<doc_id>
print("Selected collection:", probe.collection)
print("Selected adapter   :", probe.adapter_path)
print("Selected run_id    :", getattr(probe, "adapter_run_id", None))

Selected collection: NFS_2019
Selected adapter   : data\models\NFS_2019\20bb948f\adapter
Selected run_id    : 20bb948f


In [5]:
# --- 4) mount chat (models + retriever + tools) ---
from packages.chat.router import mount_chat

mount = mount_chat(DOC_ID)  # uses providers.yaml & profile/auto-discovery
print("Sessions dir:", mount.sessions_dir)
print("Adapter used:", mount.profile.adapter_path)
print("Collection:  ", mount.profile.collection)
print("Tools:       ", [t.name for t in mount.tools])

  return ChatOllama(
The 8-bit optimizer is not available on your device, only available on CUDA for now.


Sessions dir: data\sessions
Adapter used: data\models\NFS_2019\20bb948f\adapter
Collection:   NFS_2019
Tools:        ['doc_retrieve', 'calc_run']


In [6]:
# --- 5) retriever smoke test ---
hits = mount.retriever.search("What are the Section 3 fees?")
print("Top", len(hits), "hits")
for i, h in enumerate(hits, 1):
    m = h.get("metadata", {})
    print(f"{i:>2}. id={h['id']} score={h['score']:.3f} page={m.get('page')}, heading={m.get('heading_path')}")

Top 6 hits
 1. id=NFS_2019-11267 score=0.650 page=None, heading=FEE SCHEDULE > N 25 8 28 3
 2. id=NFS_2019-h-207 score=0.642 page=None, heading=FEE SCHEDULE > 3.76 BR XXX XXX
 3. id=NFS_2019-11118 score=0.642 page=None, heading=FEE SCHEDULE > CONVERSION FACTORS
 4. id=NFS_2019-11334 score=0.640 page=None, heading=FEE SCHEDULE > N 25 8 28 3
 5. id=NFS_2019-11404 score=0.640 page=None, heading=FEE SCHEDULE > N 25 8 28 3
 6. id=NFS_2019-488 score=0.640 page=None, heading=FEE SCHEDULE > GENERAL GROUND RULES


In [7]:
# --- 6) retriever 'tool' (LangChain StructuredTool) smoke test ---
tool = mount.retriever_tool
tool_out = tool.invoke({"query": "Provide summary of Section 3 fee schedule", "top_k": 3})
print(tool_out.keys())
print("Citations sample:", tool_out["citations"][:2])

dict_keys(['snippets', 'citations'])
Citations sample: [{'id': 'NFS_2019-11284', 'page': None, 'heading_path': 'FEE SCHEDULE > N 25 8 28 3', 'table_id': None, 'score': 0.7133106227939584}, {'id': 'NFS_2019-11334', 'page': None, 'heading_path': 'FEE SCHEDULE > N 25 8 28 3', 'table_id': None, 'score': 0.7132730034384358}]


In [8]:
# --- 7) session store + summary buffer (phase 7.0 memory) ---
from packages.chat.memory import SessionStore, SummaryBuffer

store = SessionStore(mount.sessions_dir, doc_id=mount.profile.doc_id).load_or_create()
buf   = SummaryBuffer(store, llm=mount.llm_output)

store.append("user", "Hi, can you help me understand section 3 fees?")
store.append("assistant", "Sure—what specifically about section 3?")
buf.maybe_summarize()

print("Session file:", store.path)

Session file: data\sessions\NFS_2019_7f75880dff8b.json


In [9]:
# --- 8) Optional: quick sanity pings (won't crash if Ollama is down) ---
def safe_ping(llm, prompt):
    try:
        return llm.invoke(prompt).content[:140]
    except Exception as e:
        return f"[unavailable: {type(e).__name__}]"

print("intro   :", safe_ping(mount.llm_intro, "Say 'ready'."))
print("splitter:", safe_ping(mount.llm_splitter, "Split: A and B?"))
print("core    :", safe_ping(mount.llm_core, "Say 'ready core'."))
print("output  :", safe_ping(mount.llm_output, "Summarize: ready -> ok"))

intro   : Ready.
splitter: I'm happy to help, but I don't see a question. Could you please provide more context or clarify what you're asking about "A" and "B"? Are th
core    : Say 'ready core'.

2. The "ready core" is a technique used by professional wrestlers to perform a series of moves in a short amount of time.
output  : It seems like you're asking me to summarize a sequence of states, but I'm not sure what the context is. Could you provide more information o
