In [1]:

from scripts.rag_pipeline import RAGPipeline

pipeline = RAGPipeline(verbose=True)
top_k_retrieval =  pipeline.top_k_retrieval
top_n_context = pipeline.top_n_context
top_n_rerank = pipeline.top_n_rerank

print(f"Top-K retrieval value: {top_k_retrieval}")
print(f"Top-N context value: {top_n_context}")
print(f"Top-N rerank value: {top_n_rerank}")

  from .autonotebook import tqdm as notebook_tqdm


Initialising RAG pipeline...
  (local reranking with BAAI/bge-reranker-v2-m3; generation with meta-llama/Llama-3.1-8B-Instruct)
  ✓ QueryRouter  (meta-llama/Llama-3.1-8B-Instruct)
  Reranker        : BAAI/bge-reranker-v2-m3  (local inference)
  Device          : mps


`torch_dtype` is deprecated! Use `dtype` instead!
Loading weights: 100%|██████████| 393/393 [00:00<00:00, 1691.19it/s, Materializing param=roberta.encoder.layer.23.output.dense.weight]              


  ✓ Reranker  (BAAI/bge-reranker-v2-m3  on local MPS)
  Loaded index: dim=1024  ntotal=592
  Loaded BM25 index for Collection A  (vocab: ?  docs: 592)
  ✓ Collection A  (592 chunks)
  Loaded index: dim=1024  ntotal=1450
  Loaded BM25 index for Collection B  (vocab: ?  docs: 1450)
  ✓ Collection B  (1450 chunks)
  Loaded index: dim=1024  ntotal=962
  Loaded BM25 index for Collection C  (vocab: ?  docs: 962)
  ✓ Collection C  (818 chunks)
  Loaded index: dim=1024  ntotal=1434
  Loaded BM25 index for Collection D  (vocab: ?  docs: 1247)
  ✓ Collection D  (1434 chunks)
  Loading model : BAAI/bge-m3
  Device        : MPS


Loading weights: 100%|██████████| 391/391 [00:00<00:00, 2309.11it/s, Materializing param=pooler.dense.weight]                               


  Model loaded in 4.8s
  ✓ Embedder  (bge-m3  on mps:0)
Pipeline ready.

Pipeline ready.

Top-K retrieval value: 20
Top-N context value: 10
Top-N rerank value: 40


In [None]:
# Cell 2 — run one query 
QUERY = "Which Carnegie Mellon alumnus is known for creating the emoticon ':-)'?"   

result = pipeline.run(QUERY)
print(result.pretty())

In [2]:
"""
run_leaderboard.py
------------------
Batch-run RAG pipeline on all questions in leaderboard_queries.json and
write answers.json in the required format.
"""

import json
import time
from pathlib import Path

from scripts.rag_pipeline import RAGPipeline

# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------

ANDREW_ID   = "wenguand"                         
INPUT_FILE  = Path("quesies.json")
OUTPUT_FILE = Path("system_output.json")

# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

# 1. Load questions
queries = json.loads(INPUT_FILE.read_text(encoding="utf-8"))
print(f"Total {len(queries)} questions\n")

# 2. Init pipeline (once)
pipeline = RAGPipeline(verbose=False)

# 3. Answer each question
answers = {"andrewid": ANDREW_ID}

for i, item in enumerate(queries, 1):
    qid      = item["id"]
    question = item["question"]
    print(f"[{i}/{len(queries)}] Q{qid}: {question}")

    try:
        result = pipeline.run(question)
        answers[qid] = result.answer
        print(f"  → {result.answer[:80]}...")
    except Exception as e:
        print(f"  ❌ error: {e}")
        answers[qid] = ""

    time.sleep(0.5)   # avoid HF API rate limits

# 4. Write results
OUTPUT_FILE.write_text(
    json.dumps(answers, ensure_ascii=False, indent=4),
    encoding="utf-8",
 )
print(f"\n✅ Done. Results written to {OUTPUT_FILE}")
print(json.dumps(answers, ensure_ascii=False, indent=4))

Total 400 questions

  Reranker        : BAAI/bge-reranker-v2-m3  (local inference)
  Device          : mps


Loading weights: 100%|██████████| 393/393 [00:00<00:00, 1527.67it/s, Materializing param=roberta.encoder.layer.23.output.dense.weight]              


  Loaded index: dim=1024  ntotal=592
  Loaded BM25 index for Collection A  (vocab: ?  docs: 592)
  Loaded index: dim=1024  ntotal=1450
  Loaded BM25 index for Collection B  (vocab: ?  docs: 1450)
  Loaded index: dim=1024  ntotal=962
  Loaded BM25 index for Collection C  (vocab: ?  docs: 962)
  Loaded index: dim=1024  ntotal=1434
  Loaded BM25 index for Collection D  (vocab: ?  docs: 1247)
  Loading model : BAAI/bge-m3
  Device        : MPS


Loading weights: 100%|██████████| 391/391 [00:00<00:00, 1671.87it/s, Materializing param=pooler.dense.weight]                               


  Model loaded in 5.3s
[1/400] Q1: What year did Pittsburgh suffer its worst flood?
  → Pittsburgh suffered its worst flood in 1889, which was caused by the failure of ...
[2/400] Q2: When did the Carnegie-Mellon merger occur?
  → The Carnegie-Mellon merger occurred in 1967, when the Carnegie Institute of Tech...
[3/400] Q3: What Kennywood food is most iconic?
  → The most iconic Kennywood food items mentioned are fried cheesecakes, s’mores, a...
[4/400] Q4: How many schools and colleges does CMU have?
  → Carnegie Mellon University consists of seven colleges, including the College of ...
[5/400] Q5: What former church is a music venue?
  → Calvary Episcopal Church is used as a venue for the performance of Britten's Cur...
[6/400] Q6: What year does Pittsburgh host NFL Draft?
  → The retrieved context does not contain any information about the Pittsburgh host...
[7/400] Q7: What is the area in square miles of Squirrel Hill as stated by the Squirrel Hill Historical Society?
  → The retr

# Run without query router

In [1]:
# Run all-collections pipeline
from scripts.rag_pipeline_all_collections import RAGPipelineAllCollections

all_cols_pipeline = RAGPipelineAllCollections(verbose=True)
QUERY_ALL = "What is the signature dish of Pamela's Diner?"
result_all = all_cols_pipeline.run(QUERY_ALL)
print(result_all.pretty())

  from .autonotebook import tqdm as notebook_tqdm


Initialising all-collections RAG pipeline...
  (fixed retrieval over A/B/C/D; no query router)
  Reranker        : BAAI/bge-reranker-v2-m3  (local inference)
  Device          : mps


`torch_dtype` is deprecated! Use `dtype` instead!
Loading weights: 100%|██████████| 393/393 [00:00<00:00, 1363.65it/s, Materializing param=roberta.encoder.layer.23.output.dense.weight]              


  ✓ Reranker  (BAAI/bge-reranker-v2-m3  on local MPS)
  Loaded index: dim=1024  ntotal=592
  Loaded BM25 index for Collection A  (vocab: ?  docs: 592)
  ✓ Collection A  (592 chunks)
  Loaded index: dim=1024  ntotal=1450
  Loaded BM25 index for Collection B  (vocab: ?  docs: 1450)
  ✓ Collection B  (1450 chunks)
  Loaded index: dim=1024  ntotal=962
  Loaded BM25 index for Collection C  (vocab: ?  docs: 962)
  ✓ Collection C  (818 chunks)
  Loaded index: dim=1024  ntotal=1434
  Loaded BM25 index for Collection D  (vocab: ?  docs: 1247)
  ✓ Collection D  (1434 chunks)
  Loading model : BAAI/bge-m3
  Device        : MPS


Loading weights: 100%|██████████| 391/391 [00:00<00:00, 2244.95it/s, Materializing param=pooler.dense.weight]                               


  Model loaded in 5.4s
  ✓ Embedder  (bge-m3  on mps:0)
Pipeline ready.


───────────────────────────────────────────────────────
Query: "What is the signature dish of Pamela's Diner?"
[1] Route (disabled) → ['A', 'B', 'C', 'D']
[2] Dense  → 40 hits  (1401ms)
[3] Sparse → 31 hits  (5ms)
[4] Rerank → 69 candidates → 69 hits  (8448ms)
[6] Generate  (10322ms)
    Total: 20180ms
Query      : What is the signature dish of Pamela's Diner?
Collection : ['A', 'B', 'C', 'D']

Answer
------
Pamela's Diner is known for its breakfast offerings and has been voted Best Breakfast in Pittsburgh. It is particularly noted for being named in the top 10 of USA's Hotcakes, suggesting that hotcakes (or pancakes) are a signature item. However, a specific signature dish is not explicitly mentioned.

Context (10 chunks used)
--------------------------------------------------
  [1] rank=1  score=0.0866  D_D_pittsburgh_restaurants__0101
      section : Pamela's P&G Diners
      preview : [Site: Pittsburgh Restau

In [3]:
# Batch run all-collections pipeline on leaderboard queries
import json
import time
from pathlib import Path
from scripts.rag_pipeline_all_collections import RAGPipelineAllCollections

ANDREW_ID = "wenguand"
INPUT_FILE = Path("quesies.json")
OUTPUT_FILE = Path("system_output.json")

queries = json.loads(INPUT_FILE.read_text(encoding="utf-8"))
print(f"Total {len(queries)} questions\n")

pipeline_all = RAGPipelineAllCollections(verbose=False)
answers_all = {"andrewid": ANDREW_ID}

for i, item in enumerate(queries, 1):
    qid = item["id"]
    question = item["question"]
    print(f"[{i}/{len(queries)}] Q{qid}: {question}")
    try:
        result = pipeline_all.run(question)
        answers_all[qid] = result.answer
        print(f"  → {result.answer[:80]}...")
    except Exception as e:
        print(f"  error: {e}")
        answers_all[qid] = ""
    time.sleep(0.5)  # avoid API rate limits

OUTPUT_FILE.write_text(json.dumps(answers_all, ensure_ascii=False, indent=4), encoding="utf-8")
print(f"\n✅ Done. Results written to {OUTPUT_FILE}")
print(json.dumps(answers_all, ensure_ascii=False, indent=4))

Total 400 questions

  Reranker        : BAAI/bge-reranker-v2-m3  (local inference)
  Device          : mps


Loading weights: 100%|██████████| 393/393 [00:00<00:00, 1100.03it/s, Materializing param=roberta.encoder.layer.23.output.dense.weight]              


  Loaded index: dim=1024  ntotal=592
  Loaded BM25 index for Collection A  (vocab: ?  docs: 592)
  Loaded index: dim=1024  ntotal=1450
  Loaded BM25 index for Collection B  (vocab: ?  docs: 1450)
  Loaded index: dim=1024  ntotal=962
  Loaded BM25 index for Collection C  (vocab: ?  docs: 962)
  Loaded index: dim=1024  ntotal=1434
  Loaded BM25 index for Collection D  (vocab: ?  docs: 1247)
  Loading model : BAAI/bge-m3
  Device        : MPS


Loading weights: 100%|██████████| 391/391 [00:00<00:00, 2175.46it/s, Materializing param=pooler.dense.weight]                               


  Model loaded in 7.6s
[1/400] Q1: What year did Pittsburgh suffer its worst flood?
  → Pittsburgh suffered its worst flood in 1889, which was caused by the failure of ...
[2/400] Q2: When did the Carnegie-Mellon merger occur?
  → The Carnegie-Mellon merger occurred in 1967, when the Carnegie Institute of Tech...
[3/400] Q3: What Kennywood food is most iconic?
  → The most iconic Kennywood food items mentioned are fried cheesecakes, s’mores, a...
[4/400] Q4: How many schools and colleges does CMU have?
  → Carnegie Mellon University consists of seven colleges, including the College of ...
[5/400] Q5: What former church is a music venue?
  → Calvary Episcopal Church is used as a venue for the opera "Curlew River" in Janu...
[6/400] Q6: What year does Pittsburgh host NFL Draft?
  → Pittsburgh will host the NFL Draft in 2026....
[7/400] Q7: What is the area in square miles of Squirrel Hill as stated by the Squirrel Hill Historical Society?
  → The retrieved context does not provide the sp