In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib

# Custom tokenizer
def whitespace_tokenizer(text):
    return text.split()

# Load preprocessed ANTIQUE dataset
df = pd.read_csv("antique_cleaned.csv")

# Rejoin tokens into text
df['joined'] = df['tokens'].apply(eval).apply(lambda tokens: ' '.join(tokens))

# TF-IDF vectorizer setup
vectorizer = TfidfVectorizer(
    lowercase=False,
    tokenizer=whitespace_tokenizer,
    preprocessor=None,
    token_pattern=None,
    max_df=0.85,
    min_df=5,
)

# Fit and transform
tfidf_matrix = vectorizer.fit_transform(df['joined'])

# Save everything
joblib.dump(vectorizer, "antique_vectorizer.pkl")
joblib.dump(tfidf_matrix, "antique_tfidf_matrix.pkl")
df[['doc_id', 'original_text']].to_csv("antique_metadata.csv", index=False)

print("✅ TF-IDF index for ANTIQUE built and saved.")

✅ TF-IDF index for ANTIQUE built and saved.


In [4]:
import pandas as pd
from rank_bm25 import BM25Okapi
import joblib
import ast

# Load cleaned ANTIQUE dataset
df = pd.read_csv("antique_cleaned.csv")

# Convert 'tokens' column from string to list
df['tokens'] = df['tokens'].apply(ast.literal_eval)

# Build BM25
tokenized_corpus = df['tokens'].tolist()
bm25 = BM25Okapi(tokenized_corpus)

# Save the model and supporting data
joblib.dump(tokenized_corpus, "antique_bm25_corpus.pkl")
joblib.dump(bm25, "antique_bm25_model.pkl")
df[['doc_id', 'original_text']].to_csv("antique_metadata.csv", index=False)

print("✅ BM25 index for ANTIQUE built and saved.")

✅ BM25 index for ANTIQUE built and saved.


In [5]:
import pandas as pd
import joblib
from rank_bm25 import BM25Okapi
import numpy as np
import textwrap

# Load BM25 index components
with open("antique_bm25_corpus.pkl", "rb") as f:
    tokenized_corpus = joblib.load(f)

with open("antique_bm25_model.pkl", "rb") as f:
    bm25 = joblib.load(f)

df_meta = pd.read_csv("antique_metadata.csv")

# Define queries to test
queries = [
    "How do I reset my Gmail password?",
    "What is the average salary for a registered nurse?",
    "Can I take ibuprofen and paracetamol together?",
    "What are the side effects of quitting smoking?",
    "How to apply for a passport online?"
]

# Tokenizer
def tokenize(text):
    return text.lower().split()

# Run queries
for q in queries:
    print("="*30)
    print(f"🔍 Query: {q}\n")
    tokenized_query = tokenize(q)
    scores = bm25.get_scores(tokenized_query)

    top_n = np.argsort(scores)[::-1][:5]

    for idx in top_n:
        doc_id = df_meta.iloc[idx]['doc_id']
        text = df_meta.iloc[idx]['original_text']
        score = scores[idx]
        print(f"🔹 Doc ID: {doc_id}  (Score: {score:.4f})")
        print(textwrap.fill(text, width=100))
        print()

print("✅ All BM25 queries processed.")

🔍 Query: How do I reset my Gmail password?

🔹 Doc ID: 3185001_3  (Score: 16.3129)
GO TO GMAIL AND DOWNLOAD GOOGLE TALK.SIMPLE!

🔹 Doc ID: 3485540_1  (Score: 15.7508)
Or you could jump onto Gmail or hotmail or or or...

🔹 Doc ID: 3852761_1  (Score: 14.6298)
mykonthc? I'm not sure what that is, but google is very free. the search aspects of it are free and
so is the email which they call gmail. There are pay aspects as well tho, such as the professional
version of the map creator and a higher level of gmail.

🔹 Doc ID: 3475044_9  (Score: 14.5809)
the light does not go out automatically it has to be reset. if the first guy who changed it did not
reset the light then i would have to be reset!

🔹 Doc ID: 845979_0  (Score: 14.4186)
Reset.. . Its a button to reset the thermal overload.

🔍 Query: What is the average salary for a registered nurse?

🔹 Doc ID: 1296437_3  (Score: 15.0384)
Figure out your budget. Go to free salary wizard for average pay in your area,this site gives you
the lowest a

In [6]:
import pandas as pd
import joblib
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

# Load cleaned ANTIQUE data
df = pd.read_csv("antique_cleaned.csv")

# Use original text for embedding
texts = df['original_text'].tolist()

# Load BERT model (same as Clinical Trials for consistency)
model = SentenceTransformer('all-MiniLM-L6-v2')

# Encode with batching
batch_size = 64
all_embeddings = []
for i in tqdm(range(0, len(texts), batch_size), desc="Embedding ANTIQUE"):
    batch = texts[i:i+batch_size]
    embeddings = model.encode(batch, show_progress_bar=False, convert_to_numpy=True)
    all_embeddings.extend(embeddings)

# Save embeddings and metadata
joblib.dump(all_embeddings, "antique_bert_embeddings.pkl")
df[['doc_id', 'original_text']].to_csv("antique_metadata.csv", index=False)

print("✅ BERT embeddings for ANTIQUE built and saved.")

Embedding ANTIQUE: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 6308/6308 [4:55:43<00:00,  2.81s/it]


✅ BERT embeddings for ANTIQUE built and saved.


In [7]:
import pandas as pd
import joblib
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import textwrap

# Load precomputed BERT document embeddings and metadata
doc_embeddings = joblib.load("antique_bert_embeddings.pkl")
df_meta = pd.read_csv("antique_metadata.csv")

# Load the same BERT model used during indexing
model = SentenceTransformer('all-MiniLM-L6-v2')

# Example test queries
queries = [
    "How do I reset my Gmail password?",
    "What is the average salary for a registered nurse?",
    "Can I take ibuprofen and paracetamol together?",
    "What are the side effects of quitting smoking?",
    "How to apply for a passport online?"
]

# Number of results to retrieve per query
TOP_K = 5

# Run queries
for query in queries:
    print("=" * 60)
    print(f"🔍 Query: {query}\n")

    # Encode the query
    query_embedding = model.encode([query])

    # Compute cosine similarity with all documents
    scores = cosine_similarity(query_embedding, doc_embeddings)[0]

    # Get top-k documents
    top_k_idx = np.argsort(scores)[::-1][:TOP_K]

    for idx in top_k_idx:
        doc_id = df_meta.iloc[idx]['doc_id']
        text = df_meta.iloc[idx]['original_text']
        score = scores[idx]
        print(f"🔹 Doc ID: {doc_id}  (Score: {score:.4f})\n{textwrap.fill(text, width=100)}\n")

🔍 Query: How do I reset my Gmail password?

🔹 Doc ID: 3966426_6  (Score: 0.6157)
you should go to google.com then type how to hack a email password. thats all.

🔹 Doc ID: 1382227_3  (Score: 0.5815)
go to your yahoo mail, then go to mail options, then you can see CHANGE PASSWORD

🔹 Doc ID: 3720134_2  (Score: 0.5797)
If it's your email account, I think you have to call your email service provider But if it's an
account with a website of some kind, there's should be a link you can click on to get them to send
you a password reminder, or a new password.

🔹 Doc ID: 2261213_1  (Score: 0.5594)
Hey just go to "My Account",which is just below u'r id and beside signout. There u can find a option
"change password".. or else if u rember u'r question and answer while creating u'r id u can do
1thing just enter u'r id and below click the option "forgot password" then it will ask the question
set by u and if u give correct  answer it will tell u'r password and thereafter u can change it from
there..  

In [3]:
# build_hybrid_antique.py

import pandas as pd
import joblib
from rank_bm25 import BM25Okapi
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

# Load cleaned ANTIQUE data
df = pd.read_csv("antique_cleaned.csv")

# Tokenize for BM25
tokenized_corpus = [eval(doc) for doc in df['tokens']]
bm25 = BM25Okapi(tokenized_corpus)

# Save BM25
joblib.dump(bm25, "antique_bm25_model.pkl")
joblib.dump(tokenized_corpus, "antique_bm25_corpus.pkl")

# Load BERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Convert token lists back to text
texts = [' '.join(tokens) for tokens in tokenized_corpus]

# Compute BERT embeddings
bert_embeddings = model.encode(texts, show_progress_bar=False, batch_size=64)
joblib.dump(bert_embeddings, "antique_bert_embeddings.pkl")

# Save metadata
df[['doc_id', 'original_text']].to_csv("antique_metadata.csv", index=False)

print("✅ Hybrid BM25 + BERT model for ANTIQUE dataset built and saved.")

✅ Hybrid BM25 + BERT model for ANTIQUE dataset built and saved.


In [6]:
import joblib
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
from rank_bm25 import BM25Okapi
import textwrap

# Load preprocessed tokens, metadata, and hybrid components
with open("antique_bm25_corpus.pkl", "rb") as f:
    corpus = joblib.load(f)

with open("antique_bm25_model.pkl", "rb") as f:
    bm25 = joblib.load(f)

doc_embeddings = joblib.load("antique_bert_embeddings.pkl")
df_meta = pd.read_csv("antique_metadata.csv")

# Map from doc_id to index (for embedding lookup)
doc_id_to_index = {doc_id: idx for idx, doc_id in enumerate(df_meta['doc_id'])}

# Load the same BERT model used for embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# Define your test queries
queries = [
    "How do I reset my Gmail password?",
    "What is the average salary for a registered nurse?",
    "Can I take ibuprofen and paracetamol together?",
    "What are the side effects of quitting smoking?",
    "How to apply for a passport online?"
]

def hybrid_search(query, top_k=5, bm25_k=20, alpha=0.6):
    # Step 1: BM25 search
    tokenized_query = query.lower().split()
    bm25_scores = bm25.get_scores(tokenized_query)
    top_bm25_indices = np.argsort(bm25_scores)[::-1][:bm25_k]

    # Step 2: BERT query embedding
    query_embedding = model.encode(query, convert_to_tensor=True)

    # Step 3: BERT similarity with top BM25 docs
    top_doc_embeddings = np.array([doc_embeddings[i] for i in top_bm25_indices])
    bert_scores = util.cos_sim(query_embedding, top_doc_embeddings)[0].cpu().numpy()

    # Combine scores
    hybrid_scores = alpha * bm25_scores[top_bm25_indices] + (1 - alpha) * bert_scores

    # Rank final top_k
    top_indices = np.argsort(hybrid_scores)[::-1][:top_k]
    results = []
    for rank in top_indices:
        doc_idx = top_bm25_indices[rank]
        results.append({
            "doc_id": df_meta.iloc[doc_idx]['doc_id'],
            "score": hybrid_scores[rank],
            "text": df_meta.iloc[doc_idx]['original_text']
        })
    return results

# Run and print results
for query in queries:
    print("="*30)
    print(f"🔍 Query: {query}\n")
    results = hybrid_search(query)
    for res in results:
        print(f"🔹 Doc ID: {res['doc_id']}  (Score: {res['score']:.4f})")
        print(textwrap.fill(res['text'], 100))
        print()
print("✅ All hybrid queries on ANTIQUE processed.")

🔍 Query: How do I reset my Gmail password?

🔹 Doc ID: 3185001_3  (Score: 9.9261)
GO TO GMAIL AND DOWNLOAD GOOGLE TALK.SIMPLE!

🔹 Doc ID: 3485540_1  (Score: 9.6185)
Or you could jump onto Gmail or hotmail or or or...

🔹 Doc ID: 3852761_1  (Score: 8.9039)
mykonthc? I'm not sure what that is, but google is very free. the search aspects of it are free and
so is the email which they call gmail. There are pay aspects as well tho, such as the professional
version of the map creator and a higher level of gmail.

🔹 Doc ID: 3475044_9  (Score: 8.8609)
the light does not go out automatically it has to be reset. if the first guy who changed it did not
reset the light then i would have to be reset!

🔹 Doc ID: 845979_0  (Score: 8.7395)
Reset.. . Its a button to reset the thermal overload.

🔍 Query: What is the average salary for a registered nurse?

🔹 Doc ID: 1296437_3  (Score: 9.1852)
Figure out your budget. Go to free salary wizard for average pay in your area,this site gives you
the lowest and hig

In [7]:
import pandas as pd
import json
from collections import defaultdict

# Load preprocessed dataset
df = pd.read_csv("antique_cleaned.csv")
df['tokens'] = df['tokens'].apply(eval)

# Build inverted index
inverted_index = defaultdict(set)

for idx, row in df.iterrows():
    doc_id = row['doc_id']
    tokens = row['tokens']
    for token in set(tokens):  # Avoid duplicate tokens in same doc
        inverted_index[token].add(doc_id)

# Convert sets to sorted lists for JSON serialization
inverted_index = {term: sorted(list(doc_ids)) for term, doc_ids in inverted_index.items()}

# Save inverted index
with open("antique_inverted_index.json", "w", encoding="utf-8") as f:
    json.dump(inverted_index, f, indent=2)

print("✅ Inverted index for ANTIQUE built and saved.")

✅ Inverted index for ANTIQUE built and saved.


In [8]:
import json
import pandas as pd
from collections import Counter
import textwrap

# Load inverted index and metadata
with open("antique_inverted_index.json", "r", encoding="utf-8") as f:
    inverted_index = json.load(f)

df_meta = pd.read_csv("antique_cleaned.csv")
df_meta['original_text'] = df_meta['original_text'].astype(str)
doc_map = dict(zip(df_meta['doc_id'], df_meta['original_text']))

def search(query, top_k=5):
    terms = query.lower().split()
    doc_scores = Counter()

    for term in terms:
        doc_ids = inverted_index.get(term, [])
        for doc_id in doc_ids:
            doc_scores[doc_id] += 1

    top_docs = doc_scores.most_common(top_k)
    return [(doc_id, doc_map.get(doc_id, ""), score) for doc_id, score in top_docs]

# Example queries
queries = [
    "gmail password reset",
    "side effects of ibuprofen",
    "apply for passport"
]

for query in queries:
    print("="*80)
    print(f"🔍 Query: {query}")
    results = search(query)
    for doc_id, text, score in results:
        print(f"\n🔹 Doc ID: {doc_id}  (Score: {score})")
        print(textwrap.fill(text, 100))

🔍 Query: gmail password reset

🔹 Doc ID: 1603128_2  (Score: 2)
unplug the TV.  That will reset all of the passwords.  Then, you can go in and change them to what
you want.. . Don't tell them I told you.

🔹 Doc ID: 3905501_2  (Score: 2)
try using "0000" or "1234"; that's the usual reset password for most V-Chip tv's and cable boxes

🔹 Doc ID: 4399106_0  (Score: 2)
If the voicemail is from a company you work at, you need to contact the telecom administrator to
reset the password.  If it's personal voicemail provided by a local telephone company, the original
owner of the voice mailbox will need to call and have the password reset...or they can request to
transfer the account to your name.

🔹 Doc ID: 4399106_3  (Score: 2)
Are you wanting to change the # the phone dials when attempting to call your voicemail or are you
wanting to reset the password? If you want to change the # the phone dials to get to the
voicemail.... that's easy.... just advise which phone you have. If it's the other th

In [3]:
import ir_datasets

dataset = ir_datasets.load("antique/test")

# Check what's available
print("Provides:", dataset)
print("\nFirst few queries:")
for q in dataset.queries_iter():
    print(q)
    break

print("\nFirst few qrels:")
for qrel in dataset.qrels_iter():
    print(qrel)
    break

Provides: Dataset(id='antique/test', provides=['docs', 'queries', 'qrels'])

First few queries:


[INFO] Please confirm you agree to the authors' data usage agreement found at <https://ciir.cs.umass.edu/downloads/Antique/readme.txt>
[INFO] [starting] https://ciir.cs.umass.edu/downloads/Antique/antique-test-queries.txt
[INFO] [finished] https://ciir.cs.umass.edu/downloads/Antique/antique-test-queries.txt: [00:00] [11.4kB] [54.0kB/s]
                                                                                                 

GenericQuery(query_id='3990512', text='how can we get concentration onsomething?')

First few qrels:


[INFO] [starting] https://ciir.cs.umass.edu/downloads/Antique/antique-test.qrel
[INFO] [finished] https://ciir.cs.umass.edu/downloads/Antique/antique-test.qrel: [00:00] [150kB] [168kB/s]
                                                                                        

TrecQrel(query_id='1964316', doc_id='1964316_5', relevance=4, iteration='U0')




In [4]:
import ir_datasets

dataset = ir_datasets.load("antique/test")
qrels = dataset.qrels_iter()

# Save to TREC-compatible QREL file
with open("antique_qrels.txt", "w", encoding="utf-8") as f:
    for qrel in qrels:
        f.write(f"{qrel.query_id} 0 {qrel.doc_id} {qrel.relevance}\n")

print("✅ Saved QRELs to antique_qrels.txt")

✅ Saved QRELs to antique_qrels.txt


In [2]:
import ir_datasets

dataset = ir_datasets.load("antique/test")
queries = list(dataset.queries_iter())

with open("antique_queries.txt", "w", encoding="utf-8") as f:
    for q in queries:
        f.write(f"{q.query_id}\t{q.text}\n")

print("✅ Saved ANTIQUE queries to antique_queries.txt")

✅ Saved ANTIQUE queries to antique_queries.txt


In [5]:
import pandas as pd
import joblib
from sklearn.metrics.pairwise import cosine_similarity

# === Fix: Define tokenizer used in training ===
def whitespace_tokenizer(text):
    return text.split()

# === Load data ===
meta = pd.read_csv("antique_metadata.csv")
queries = pd.read_csv("antique_queries.txt", sep='\t', names=["query_id", "text"])

# === Load vectorizer and sparse TF-IDF matrix ===
tfidf_vectorizer = joblib.load("antique_vectorizer.pkl")
tfidf_matrix = joblib.load("antique_tfidf_matrix.pkl")

with open("antique_tfidf_results.txt", "w", encoding="utf-8") as f:
    for _, query in queries.iterrows():
        query_vec = tfidf_vectorizer.transform([query["text"]])  # Keep sparse
        similarities = cosine_similarity(query_vec, tfidf_matrix).flatten()
        ranked_indices = similarities.argsort()[::-1][:100]  # Top 100

        for rank, idx in enumerate(ranked_indices):
            doc_id = meta.iloc[idx]['doc_id']
            score = similarities[idx]
            f.write(f"{query['query_id']} Q0 {doc_id} {rank + 1} {score:.4f} tfidf\n")

print("✅ TF-IDF results for ANTIQUE saved to antique_tfidf_results.txt")

✅ TF-IDF results for ANTIQUE saved to antique_tfidf_results.txt


In [6]:
import pandas as pd
import joblib
from rank_bm25 import BM25Okapi
from tqdm import tqdm

# === Load data ===
df_meta = pd.read_csv("antique_metadata.csv")
with open("antique_bm25_corpus.pkl", "rb") as f:
    corpus = joblib.load(f)
with open("antique_bm25_model.pkl", "rb") as f:
    bm25 = joblib.load(f)
queries = pd.read_csv("antique_queries.txt", sep='\t', names=["query_id", "text"])

# === Process and export ===
with open("antique_bm25_results.txt", "w", encoding="utf-8") as f:
    for _, row in tqdm(queries.iterrows(), total=len(queries), desc="Exporting BM25 results"):
        query_tokens = row['text'].split()
        scores = bm25.get_scores(query_tokens)
        top_indices = scores.argsort()[::-1][:100]

        for rank, idx in enumerate(top_indices):
            doc_id = df_meta.iloc[idx]['doc_id']
            score = scores[idx]
            f.write(f"{row['query_id']} Q0 {doc_id} {rank + 1} {score:.4f} bm25\n")

print("✅ BM25 results for ANTIQUE saved to antique_bm25_results.txt")

Exporting BM25 results: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [07:17<00:00,  2.19s/it]

✅ BM25 results for ANTIQUE saved to antique_bm25_results.txt





In [7]:
import pandas as pd
import joblib
import numpy as np
from sentence_transformers import SentenceTransformer, util
from tqdm import tqdm

# === Load metadata and BERT doc embeddings ===
df_meta = pd.read_csv("antique_metadata.csv")
queries = pd.read_csv("antique_queries.txt", sep='\t', names=["query_id", "text"])
doc_embeddings = joblib.load("antique_bert_embeddings.pkl")

# === Load BERT model ===
model = SentenceTransformer("all-MiniLM-L6-v2")

# === Process and export ===
with open("antique_bert_results.txt", "w", encoding="utf-8") as f:
    for _, row in tqdm(queries.iterrows(), total=len(queries), desc="Exporting BERT results"):
        query_embedding = model.encode(row['text'], convert_to_tensor=True)
        scores = util.cos_sim(query_embedding, doc_embeddings)[0].cpu().numpy()
        top_indices = scores.argsort()[::-1][:100]

        for rank, idx in enumerate(top_indices):
            doc_id = df_meta.iloc[idx]['doc_id']
            score = scores[idx]
            f.write(f"{row['query_id']} Q0 {doc_id} {rank + 1} {score:.4f} bert\n")

print("✅ BERT results for ANTIQUE saved to antique_bert_results.txt")

Exporting BERT results: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [01:41<00:00,  1.97it/s]

✅ BERT results for ANTIQUE saved to antique_bert_results.txt





In [8]:
import pandas as pd
import joblib
import numpy as np
from rank_bm25 import BM25Okapi
from sentence_transformers import SentenceTransformer, util
from tqdm import tqdm

# === Load metadata and embeddings ===
df_meta = pd.read_csv("antique_metadata.csv")
queries = pd.read_csv("antique_queries.txt", sep='\t', names=["query_id", "text"])
corpus = joblib.load("antique_bm25_corpus.pkl")
bm25 = joblib.load("antique_bm25_model.pkl")
doc_embeddings = joblib.load("antique_bert_embeddings.pkl")

# === Load BERT model ===
model = SentenceTransformer("all-MiniLM-L6-v2")

# === Process and export hybrid scores ===
with open("antique_hybrid_results.txt", "w", encoding="utf-8") as f:
    for _, row in tqdm(queries.iterrows(), total=len(queries), desc="Exporting Hybrid results"):
        query_text = row["text"]
        query_tokens = query_text.lower().split()
        bm25_scores = bm25.get_scores(query_tokens)
        query_embedding = model.encode(query_text, convert_to_tensor=True)
        bert_scores = util.cos_sim(query_embedding, doc_embeddings)[0].cpu().numpy()

        # Normalize scores
        bm25_norm = (bm25_scores - np.min(bm25_scores)) / (np.max(bm25_scores) - np.min(bm25_scores) + 1e-6)
        bert_norm = (bert_scores - np.min(bert_scores)) / (np.max(bert_scores) - np.min(bert_scores) + 1e-6)
        hybrid_scores = bm25_norm + bert_norm

        top_indices = np.argsort(hybrid_scores)[::-1][:100]

        for rank, idx in enumerate(top_indices):
            doc_id = df_meta.iloc[idx]["doc_id"]
            score = hybrid_scores[idx]
            f.write(f"{row['query_id']} Q0 {doc_id} {rank + 1} {score:.4f} hybrid\n")

print("✅ Hybrid results for ANTIQUE saved to antique_hybrid_results.txt")

Exporting Hybrid results: 100%|██████████████████████████████████████████████████████████████████████████████████████████| 200/200 [06:02<00:00,  1.81s/it]

✅ Hybrid results for ANTIQUE saved to antique_hybrid_results.txt





In [1]:
import pytrec_eval
import pandas as pd

# === Load QRELs ===
qrels = {}
with open("antique_qrels.txt", "r", encoding="utf-8") as f:
    for line in f:
        query_id, _, doc_id, relevance = line.strip().split()
        qrels.setdefault(query_id, {})[doc_id] = int(relevance)

# === Load TREC-style results ===
def load_run(file_path):
    run = {}
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            qid, _, docid, rank, score, _ = line.strip().split()
            run.setdefault(qid, {})[docid] = float(score)
    return run

# === Load each run ===
runs = {
    "TF-IDF": load_run("antique_tfidf_results.txt"),
    "BM25": load_run("antique_bm25_results.txt"),
    "BERT": load_run("antique_bert_results.txt"),
    "Hybrid": load_run("antique_hybrid_results.txt"),
}

# === Define metrics (pytrec_eval keys)
metrics = {'map', 'recip_rank', 'P_10', 'recall_1000'}

evaluator = pytrec_eval.RelevanceEvaluator(qrels, metrics)

# === Evaluate and print results
for name, run in runs.items():
    results = evaluator.evaluate(run)

    avg_metrics = {
        metric: sum(query_scores[metric] for query_scores in results.values()) / len(results)
        for metric in metrics
    }

    print(f"\n📊 Results for {name}")
    print(f"  MAP:         {avg_metrics['map']:.4f}")
    print(f"  MRR:         {avg_metrics['recip_rank']:.4f}")
    print(f"  Recall@1000: {avg_metrics['recall_1000']:.4f}")
    print(f"  Precision@10:{avg_metrics['P_10']:.4f}")


📊 Results for TF-IDF
  MAP:         0.0552
  MRR:         0.2581
  Recall@1000: 0.1695
  Precision@10:0.1220

📊 Results for BM25
  MAP:         0.1084
  MRR:         0.4457
  Recall@1000: 0.2315
  Precision@10:0.2335

📊 Results for BERT
  MAP:         0.1653
  MRR:         0.7294
  Recall@1000: 0.3631
  Precision@10:0.3675

📊 Results for Hybrid
  MAP:         0.2059
  MRR:         0.7327
  Recall@1000: 0.4085
  Precision@10:0.4115
