In [None]:
## Notebook Cell — QueryRouter Test (Llama-3.1-8B-Instruct)

import sys, time
from collections import defaultdict

sys.path.insert(0, "scripts/")
from scripts.query_router import QueryRouter

# ── Initialize ───────────────────────────────────────────────────────────────
router = QueryRouter(verbose=False)  # suppress per-call prints during batch
print(f"Model : {router.model_id}")


## Router now returns the TOP-2 collections (ranked).
TEST_CASES = [
    # Collection A — general / historical
    ("When was Carnegie Mellon University founded?",               {"A", "B"}),
    ("What was the original purpose of the Carnegie Technical Schools?",   {"A", "B"}),
    ("When did the Carnegie Technical Schools become the Carnegie Institute of Technology?", {"A", "B"}),
    ("Which Pittsburgh bridge is the longest?",                    {"A", "D"}),

    # Collection B — regulatory / fiscal
    ("What is the payroll tax rate in Pittsburgh?",                 {"B", "A"}),
    ("What permits do I need to open a restaurant?",                {"B", "A"}),
    ("Explain Pittsburgh's 2024 municipal budget.",                 {"B", "A"}),
    ("What are the property tax regulations in Allegheny County?",  {"B", "A"}),

    # Collection C — events / schedules
    ("Are there any concerts next Friday downtown?",                {"C", "D"}),
    ("What events are happening at CMU this week?",                 {"C", "A"}),
    ("When is the next Pittsburgh Jazz Festival?",                  {"C", "D"}),
    ("What's on the schedule at the Pittsburgh Symphony?",          {"C", "D"}),

    # Collection D — specific local entities
    ("Tell me about the Pittsburgh Steelers' offensive line.",      {"D", "A"}),
    ("What exhibitions are currently at the Carnegie Museum?",      {"D", "A"}),
    ("What are the hours of the Andy Warhol Museum?",               {"D", "A"}),
    ("What is the signature dish of Pamela's Diner?",           {"D", "A"}),
]

print(f"\n{'='*65}")
print(f"Llama-3.1-8B-Instruct routing test  ({len(TEST_CASES)} queries)")
print(f"{'='*65}")

results = []
for query, expected in TEST_CASES:
    t0      = time.time()
    labels  = router.route(query)          # returns two labels
    primary = labels[0] if labels else ""
    ms      = (time.time() - t0) * 1000

    hit = expected.issubset(set(labels))
    results.append({
        "query": query,
        "expected": expected,
        "got": labels,
        "correct": hit,
        "ms": ms,
    })

    mark = "✓" if hit else "✗"
    print(f"  {mark}  {labels} exp~{sorted(expected)}  {ms:>6.0f}ms  {query[:50]}")


## Summary
print(f"\n{'='*65}")
correct_total = sum(r["correct"] for r in results)
avg_ms        = sum(r["ms"] for r in results) / len(results)
print(f"Match accuracy (expected subset in top-2) : {correct_total}/{len(results)}  "
      f"({correct_total / len(results) * 100:.0f}%)")
print(f"Avg latency                            : {avg_ms:.0f} ms/query")

per_col = defaultdict(lambda: {"correct": 0, "total": 0})
for r in results:
    for col in r["expected"]:
        per_col[col]["total"]   += 1
        per_col[col]["correct"] += r["correct"]

print("\nPer-collection (counts incremented for each expected label):")
for col in "ABCD":
    s   = per_col[col]
    bar = "█" * s["correct"] + "░" * (s["total"] - s["correct"])
    print(f"  Collection {col}: {s['correct']}/{s['total']}  {bar}")

# Flag misses for inspection
wrong = [r for r in results if not r["correct"]]
if wrong:
    print(f"\nMisses ({len(wrong)}):")
    for r in wrong:
        print(f"  exp={sorted(r['expected'])}  got={r['got']}  | {r['query']}")
else:
    print("\nAll queries routed correctly ✓")


Model : meta-llama/Llama-3.1-8B-Instruct

Llama-3.1-8B-Instruct routing test  (16 queries)
  ✓  [A] exp=[A]     760ms  When was Carnegie Mellon University founded?
  ✓  [A] exp=[A]     228ms  What was the original purpose of the Carnegie Tech
  ✓  [A] exp=[A]     289ms  When did the Carnegie Technical Schools become the
  ✓  [A] exp=[A]     232ms  Which Pittsburgh bridge is the longest?
  ✓  [B] exp=[B]     321ms  What is the payroll tax rate in Pittsburgh?
  ✓  [B] exp=[B]     246ms  What permits do I need to open a restaurant?
  ✓  [B] exp=[B]     357ms  Explain Pittsburgh's 2024 municipal budget.
  ✓  [B] exp=[B]     280ms  What are the property tax regulations in Allegheny
  ✓  [C] exp=[C]     284ms  Are there any concerts next Friday downtown?
  ✓  [C] exp=[C]     425ms  What events are happening at CMU this week?
  ✓  [C] exp=[C]     307ms  When is the next Pittsburgh Jazz Festival?
  ✓  [C] exp=[C]     304ms  What's on the schedule at the Pittsburgh Symphony?
  ✓  [D] exp=[D]   