In [1]:
# Cell 1 — 初始化 pipeline（只需运行一次）
from scripts.rag_pipeline import RAGPipeline

pipeline = RAGPipeline(verbose=True)
top_k_retrieval =  pipeline.top_k_retrieval
top_n_context = pipeline.top_n_context
top_n_rerank = pipeline.top_n_rerank

print(f"Top-K retrieval value: {top_k_retrieval}")
print(f"Top-N context value: {top_n_context}")
print(f"Top-N rerank value: {top_n_rerank}")

  from .autonotebook import tqdm as notebook_tqdm


Initialising RAG pipeline...
  (local reranking with BAAI/bge-reranker-v2-m3; generation with meta-llama/Llama-3.1-8B-Instruct)
  ✓ QueryRouter  (meta-llama/Llama-3.1-8B-Instruct)
  Reranker        : BAAI/bge-reranker-v2-m3  (本地推理)
  Device          : mps


`torch_dtype` is deprecated! Use `dtype` instead!
Loading weights: 100%|██████████| 393/393 [00:00<00:00, 1592.51it/s, Materializing param=roberta.encoder.layer.23.output.dense.weight]              


  ✓ Reranker  (BAAI/bge-reranker-v2-m3  on local MPS)
  Loaded index: dim=1024  ntotal=592
  Loaded BM25 index for Collection A  (vocab: ?  docs: 592)
  ✓ Collection A  (592 chunks)
  Loaded index: dim=1024  ntotal=1450
  Loaded BM25 index for Collection B  (vocab: ?  docs: 1450)
  ✓ Collection B  (1450 chunks)
  Loaded index: dim=1024  ntotal=962
  Loaded BM25 index for Collection C  (vocab: ?  docs: 962)
  ✓ Collection C  (818 chunks)
  Loaded index: dim=1024  ntotal=1434
  Loaded BM25 index for Collection D  (vocab: ?  docs: 1247)
  ✓ Collection D  (1434 chunks)
  Loading model : BAAI/bge-m3
  Device        : MPS


Loading weights: 100%|██████████| 391/391 [00:00<00:00, 1792.23it/s, Materializing param=pooler.dense.weight]                               


  Model loaded in 5.1s
  ✓ Embedder  (bge-m3  on mps:0)
Pipeline ready.

Pipeline ready.

Top-K retrieval value: 20
Top-N context value: 10
Top-N rerank value: 40


In [4]:
top_k_retrieval =  pipeline.top_k_retrieval
top_n_context = pipeline.top_n_context
top_n_rerank = pipeline.top_n_rerank

print(f"Top-K retrieval value: {top_k_retrieval}")
print(f"Top-N context value: {top_n_context}")
print(f"Top-N rerank value: {top_n_rerank}")

Top-K retrieval value: 20
Top-N context value: 10
Top-N rerank value: 40


In [2]:
# 完整监控一次查询的全过程
import time

QUERY = "What is the signature dish of Pamela's Diner?"

# ── 0. 确认 pipeline 参数 ────────────────────────────────────
print("=" * 60)
print("Pipeline 参数确认")
print("=" * 60)
print(f"  top_k_retrieval : {pipeline.top_k_retrieval}")
print(f"  top_n_rerank    : {pipeline.top_n_rerank}")
print(f"  top_n_context   : {pipeline.top_n_context}")
print(f"  collections     : {pipeline.collections}")

# ── 1. Router ────────────────────────────────────────────────
print("\n" + "=" * 60)
print("[1] Router")
print("=" * 60)
t0 = time.time()
routed_cols = pipeline._router.route(QUERY)
print(f"  路由结果 : {routed_cols}  ({(time.time()-t0)*1000:.0f}ms)")

# ── 2. Dense retrieval ───────────────────────────────────────
print("\n" + "=" * 60)
print("[2] Dense Retrieval")
print("=" * 60)
from scripts.build_index import search

t0 = time.time()
dense_hits = []
for col in routed_cols:
    hits = search(
        QUERY,
        pipeline._faiss[col],
        pipeline._faiss_meta[col],
        pipeline._embedder,
        top_k=pipeline.top_k_retrieval,
    )
    print(f"  [{col}] {len(hits)} hits  top-3 scores: "
          f"{[round(h['score'], 4) for h in hits[:3]]}")
    dense_hits.extend(hits)
print(f"  合并后共 {len(dense_hits)} 条  ({(time.time()-t0)*1000:.0f}ms)")

# ── 3. Sparse retrieval ──────────────────────────────────────
print("\n" + "=" * 60)
print("[3] Sparse Retrieval (BM25)")
print("=" * 60)
from scripts.build_bm25 import bm25_search

t0 = time.time()
sparse_hits = []
for col in routed_cols:
    hits = bm25_search(QUERY, retriever=pipeline._bm25[col], top_k=pipeline.top_k_retrieval)
    print(f"  [{col}] {len(hits)} hits  top-3 scores: "
          f"{[round(h['score'], 4) for h in hits[:3]]}")
    sparse_hits.extend(hits)
print(f"  合并后共 {len(sparse_hits)} 条  ({(time.time()-t0)*1000:.0f}ms)")

# ── 4. Merge + Rerank ────────────────────────────────────────
print("\n" + "=" * 60)
print("[4] Merge + Rerank")
print("=" * 60)
from scripts.reranker import merge_results, rerank

combined_chunk_texts = {
    **pipeline._chunk_texts[routed_cols[0]],
    **pipeline._chunk_texts[routed_cols[1]],
}
candidates = merge_results(dense_hits, sparse_hits)
print(f"  merge 后候选数 : {len(candidates)}")

t0 = time.time()
reranked = rerank(QUERY, candidates, pipeline._reranker, combined_chunk_texts,
                  top_n=len(candidates))
elapsed_rerank = (time.time() - t0) * 1000
print(f"  rerank 耗时    : {elapsed_rerank:.0f}ms")
print(f"\n  {'Rank':<6} {'Rerank':>8}  {'Dense':>8}  {'Sparse':>8}  chunk_id")
print("  " + "-" * 65)
for r in reranked[:10]:
    print(f"  #{r['rerank_rank']:<4} {r['rerank_score']:>8.4f}  "
          f"{r['dense_score']:>8.4f}  {r['sparse_score']:>8.4f}  {r['chunk_id']}")

# ── 5. Context ───────────────────────────────────────────────
print("\n" + "=" * 60)
print(f"[5] Context (top {pipeline.top_n_context} chunks)")
print("=" * 60)
context_chunks = reranked[:pipeline.top_n_context]
for i, hit in enumerate(context_chunks, 1):
    text_preview = combined_chunk_texts.get(hit["chunk_id"], "")[:]
    print(f"  [{i}] score={hit['rerank_score']:.4f}  {hit['chunk_id']}")
    print(f"       {text_preview}...")

# ── 6. Generate ──────────────────────────────────────────────
print("\n" + "=" * 60)
print("[6] Generate")
print("=" * 60)
t0 = time.time()
result = pipeline.run(QUERY)
print(f"  耗时 : {(time.time()-t0)*1000:.0f}ms")
print(f"\n  Answer: {result.answer}")

Pipeline 参数确认
  top_k_retrieval : 20
  top_n_rerank    : 40
  top_n_context   : 10
  collections     : ['A', 'B', 'C', 'D']

[1] Router
  路由结果 : ['D', 'A']  (716ms)

[2] Dense Retrieval
  [D] 20 hits  top-3 scores: [0.5442, 0.4934, 0.4926]
  [A] 20 hits  top-3 scores: [0.3638, 0.3586, 0.3517]
  合并后共 40 条  (437ms)

[3] Sparse Retrieval (BM25)
  [D] 20 hits  top-3 scores: [3.7621, 3.3635, 3.0148]
  [A] 20 hits  top-3 scores: [2.8624, 2.0383, 2.0153]
  合并后共 40 条  (2ms)

[4] Merge + Rerank
  merge 后候选数 : 80
  rerank 耗时    : 8362ms

  Rank     Rerank     Dense    Sparse  chunk_id
  -----------------------------------------------------------------
  #1      0.0866    0.5442    0.0000  D_D_pittsburgh_restaurants__0101
  #2      0.0482    0.4923    0.0000  D_D_pittsburgh_restaurants__0083
  #3      0.0052    0.4873    0.0000  D_D_pittsburgh_restaurants__0140
  #4      0.0042    0.4934    0.0000  D_D_pittsburgh_restaurants__0103
  #5      0.0032    0.3489    0.0000  A_wiki_Scotch'n'Soda Theatre

In [5]:
# Cell 2 — 运行一个查询
QUERY = "What is the signature dish of Pamela's Diner?"   # ← 改成你的查询词

result = pipeline.run(QUERY)
print(result.pretty())


───────────────────────────────────────────────────────
Query: "What is the signature dish of Pamela's Diner?"
[1] Route → ['D', 'A']  (339ms)
[2] Dense  → 40 hits  (411ms)
[3] Sparse → 40 hits  (2ms)
[4] Rerank → 80 candidates → 40 hits  (8030ms)
[6] Generate  (612ms)
    Total: 9397ms
Query      : What is the signature dish of Pamela's Diner?
Collection : ['D', 'A']

Answer
------
Pamela's Diner is known for its hotcakes, and it was named in the top 10 of USA's Hotcakes.

Context (10 chunks used)
--------------------------------------------------
  [1] rank=1  score=0.0866  D_D_pittsburgh_restaurants__0101
      section : Pamela's P&G Diners
      preview : [Site: Pittsburgh Restaurants Directory | Category: ]
Section: Pittsburgh Restaurants Directory > Pamela's P&G Diners

Address: 60 21st St., Pittsburg...
  [2] rank=2  score=0.0482  D_D_pittsburgh_restaurants__0083
      section : La Feria - Pamela's Peruvian Cousin
      preview : [Site: Pittsburgh Restaurants Directory | Catego

In [None]:
import os
# 允许 OpenMP 库重复加载
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

In [3]:
"""
run_leaderboard.py
------------------
批量运行 RAG pipeline 回答 leaderboard_queries.json 中的所有问题，
输出标准格式的 answers.json。
"""

import json
import time
from pathlib import Path

from scripts.rag_pipeline import RAGPipeline

# ---------------------------------------------------------------------------
# 配置
# ---------------------------------------------------------------------------

ANDREW_ID   = "wenguand"                          # ← 改成你的 Andrew ID
INPUT_FILE  = Path("leaderboard_queries.json")
OUTPUT_FILE = Path("answers.json")

# ---------------------------------------------------------------------------
# 主流程
# ---------------------------------------------------------------------------

# 1. 加载问题
queries = json.loads(INPUT_FILE.read_text(encoding="utf-8"))
print(f"共 {len(queries)} 个问题\n")

# 2. 初始化 pipeline（只初始化一次）
pipeline = RAGPipeline(verbose=False)

# 3. 逐题回答
answers = {"andrewid": ANDREW_ID}

for i, item in enumerate(queries, 1):
    qid      = item["id"]
    question = item["question"]
    print(f"[{i}/{len(queries)}] Q{qid}: {question}")

    try:
        result = pipeline.run(question)
        answers[qid] = result.answer
        print(f"  → {result.answer[:80]}...")
    except Exception as e:
        print(f"  ❌ 出错: {e}")
        answers[qid] = ""

    time.sleep(0.5)   # 避免 HF API 限速

# 4. 写出结果
OUTPUT_FILE.write_text(
    json.dumps(answers, ensure_ascii=False, indent=4),
    encoding="utf-8",
)
print(f"\n✅ 完成，结果已写入 {OUTPUT_FILE}")
print(json.dumps(answers, ensure_ascii=False, indent=4))

共 157 个问题

  Reranker        : BAAI/bge-reranker-v2-m3  (本地推理)
  Device          : mps


Loading weights: 100%|██████████| 393/393 [00:00<00:00, 1533.69it/s, Materializing param=roberta.encoder.layer.23.output.dense.weight]              


  Loaded index: dim=1024  ntotal=592
  Loaded BM25 index for Collection A  (vocab: ?  docs: 592)
  Loaded index: dim=1024  ntotal=1450
  Loaded BM25 index for Collection B  (vocab: ?  docs: 1450)
  Loaded index: dim=1024  ntotal=962
  Loaded BM25 index for Collection C  (vocab: ?  docs: 962)
  Loaded index: dim=1024  ntotal=1434
  Loaded BM25 index for Collection D  (vocab: ?  docs: 1247)
  Loading model : BAAI/bge-m3
  Device        : MPS


Loading weights: 100%|██████████| 391/391 [00:00<00:00, 1866.68it/s, Materializing param=pooler.dense.weight]                               


  Model loaded in 6.4s
[1/157] Q1: When was Carnegie Mellon University founded?
  → Carnegie Mellon University was founded in 1900 by Andrew Carnegie as the Carnegi...
[2/157] Q2: What was the original purpose of the Carnegie Technical Schools?
  → The original purpose of the Carnegie Technical Schools, founded by Andrew Carneg...
[3/157] Q3: When did the Carnegie Technical Schools become the Carnegie Institute of Technology?
  → The Carnegie Technical Schools became the Carnegie Institute of Technology in 19...
[4/157] Q4: What significant event occurred in 1967 concerning Carnegie Mellon University?
  → In 1967, two significant events occurred concerning Carnegie Mellon University:
...
[5/157] Q5: Who were the founders of the Mellon Institute of Industrial Research?
  → The founders of the Mellon Institute of Industrial Research were Andrew Mellon a...
[6/157] Q6: What is the motto of Carnegie Mellon University?
  → The motto of Carnegie Mellon University is not explicitly mentioned 