In [3]:
## Notebook Cell — QueryRouter Test (Llama-3.1-8B-Instruct)

import sys, time
from collections import defaultdict

sys.path.insert(0, "scripts/")
from scripts.query_router import QueryRouter

# ── Initialize ───────────────────────────────────────────────────────────────
router = QueryRouter(verbose=False)  # suppress per-call prints during batch
print(f"Model : {router.model_id}")


# ── Test suite ───────────────────────────────────────────────────────────────
TEST_CASES = [
    # Collection A — general / historical
    ("When was Carnegie Mellon University founded?",               "A"),
    ("What was the original purpose of the Carnegie Technical Schools?",                 "A"),
    ("When did the Carnegie Technical Schools become the Carnegie Institute of Technology?",   "A"),
    ("What neighborhoods make up the North Side?",            "A"),

    # Collection B — regulatory / fiscal
    ("What is the payroll tax rate in Pittsburgh?",           "B"),
    ("What permits do I need to open a restaurant?",          "B"),
    ("Explain Pittsburgh's 2024 municipal budget.",           "B"),
    ("What are the property tax regulations in Allegheny County?", "B"),

    # Collection C — events / schedules
    ("Are there any concerts next Friday downtown?",          "C"),
    ("What events are happening at CMU this week?",           "C"),
    ("When is the next Pittsburgh Jazz Festival?",            "C"),
    ("What's on the schedule at the Pittsburgh Symphony?",    "C"),

    # Collection D — specific local entities
    ("Tell me about the Pittsburgh Steelers' offensive line.", "D"),
    ("What exhibitions are currently at the Carnegie Museum?", "D"),
    ("What are the hours of the Andy Warhol Museum?",          "D"),
    ("How are the Pittsburgh Pirates doing this season?",      "D"),
]

print(f"\n{'='*65}")
print(f"Llama-3.1-8B-Instruct routing test  ({len(TEST_CASES)} queries)")
print(f"{'='*65}")

results = []
for query, expected in TEST_CASES:
    t0    = time.time()
    label = router.route(query)
    ms    = (time.time() - t0) * 1000

    correct = label == expected
    results.append({"query": query, "expected": expected,
                    "got": label, "correct": correct, "ms": ms})

    mark = "✓" if correct else "✗"
    print(f"  {mark}  [{label}] exp=[{expected}]  {ms:>6.0f}ms  {query[:50]}")


# ── Summary ───────────────────────────────────────────────────────────────────
print(f"\n{'='*65}")
correct_total = sum(r["correct"] for r in results)
avg_ms        = sum(r["ms"] for r in results) / len(results)
print(f"Overall accuracy : {correct_total}/{len(results)}  "
      f"({correct_total / len(results) * 100:.0f}%)")
print(f"Avg latency      : {avg_ms:.0f} ms/query")

per_col = defaultdict(lambda: {"correct": 0, "total": 0})
for r in results:
    per_col[r["expected"]]["total"]   += 1
    per_col[r["expected"]]["correct"] += r["correct"]

print("\nPer-collection:")
for col in "ABCD":
    s   = per_col[col]
    bar = "█" * s["correct"] + "░" * (s["total"] - s["correct"])
    print(f"  Collection {col}: {s['correct']}/{s['total']}  {bar}")

# Flag wrong predictions for inspection
wrong = [r for r in results if not r["correct"]]
if wrong:
    print(f"\nMispredicted ({len(wrong)}):")
    for r in wrong:
        print(f"  expected={r['expected']}  got={r['got']}  | {r['query']}")
else:
    print("\nAll predictions correct ✓")


Model : meta-llama/Llama-3.1-8B-Instruct

Llama-3.1-8B-Instruct routing test  (16 queries)
  ✗  [C] exp=[A]     326ms  When was Carnegie Mellon University founded?
  ✓  [A] exp=[A]     255ms  What was the original purpose of the Carnegie Tech
  ✓  [A] exp=[A]     397ms  When did the Carnegie Technical Schools become the
  ✓  [A] exp=[A]     387ms  What neighborhoods make up the North Side?
  ✓  [B] exp=[B]     289ms  What is the payroll tax rate in Pittsburgh?
  ✓  [B] exp=[B]     308ms  What permits do I need to open a restaurant?
  ✓  [B] exp=[B]     308ms  Explain Pittsburgh's 2024 municipal budget.
  ✓  [B] exp=[B]     240ms  What are the property tax regulations in Allegheny
  ✓  [C] exp=[C]     246ms  Are there any concerts next Friday downtown?
  ✓  [C] exp=[C]     239ms  What events are happening at CMU this week?
  ✓  [C] exp=[C]     237ms  When is the next Pittsburgh Jazz Festival?
  ✓  [C] exp=[C]     375ms  What's on the schedule at the Pittsburgh Symphony?
  ✓  [D] exp=[D]