In [None]:
import sys

sys.path.append("/workspaces/trec-bench/trec25/ir_datasets")

import pyterrier as pt
from pathlib import Path
from ir_measures import Recall, nDCG, RR

def load_run(directory):
    return pt.transformer.get_transformer(pt.io.read_results(Path(directory) / "run.txt.gz"))

In [69]:
def experiment(dataset_split):
    topics = pt.datasets.get_dataset(f"irds:trec-tot/2025/{dataset_split}").get_topics()
    qrels = pt.datasets.get_dataset(f"irds:trec-tot/2025/{dataset_split}").get_qrels()
    bm25_anserini = load_run(f'../anserini-bm25-retrieval/runs/{dataset_split}/')
    bm25_pyterrier = load_run(f'../pyterrier-bm25-retrieval/runs/bm25/{dataset_split}/')
    bert_dense = load_run(f'../lightning-dense-retrieval/runs/{dataset_split}')

    runs = [
        bm25_anserini,
        bm25_pyterrier,
        bert_dense,
        bm25_anserini^bm25_pyterrier,
        bm25_anserini^bm25_pyterrier^bert_dense,
    ]
    return pt.Experiment(runs, names=["BM25 (Anserini)", "BM25 (PyTerrier)", "BERT Dense", "BM25 (Anserini^PyTerrier)", "All"], topics=topics, qrels=qrels, eval_metrics=[nDCG@10, nDCG@1000, RR@1000, Recall@10, Recall@1000, Recall@1000, Recall@10000])


In [70]:
experiment("train")

Unnamed: 0,name,nDCG@10,nDCG@1000,RR@1000,R@10,R@1000,R@10000
0,BM25 (Anserini),0.021895,0.055211,0.021766,0.027972,0.27972,0.27972
1,BM25 (PyTerrier),0.064874,0.115279,0.057098,0.104895,0.454545,0.454545
2,BERT Dense,0.006618,0.025006,0.005087,0.013986,0.160839,0.160839
3,BM25 (Anserini^PyTerrier),0.021895,0.055211,0.021766,0.027972,0.27972,0.475524
4,All,0.021895,0.055211,0.021766,0.027972,0.27972,0.524476


In [71]:
experiment("dev1")

Unnamed: 0,name,nDCG@10,nDCG@1000,RR@1000,R@10,R@1000,R@10000
0,BM25 (Anserini),0.030516,0.057501,0.029978,0.042254,0.21831,0.21831
1,BM25 (PyTerrier),0.083584,0.133527,0.081144,0.105634,0.450704,0.450704
2,BERT Dense,0.02157,0.045255,0.019727,0.035211,0.190141,0.190141
3,BM25 (Anserini^PyTerrier),0.030516,0.057501,0.029978,0.042254,0.21831,0.443662
4,All,0.030516,0.057501,0.029978,0.042254,0.21831,0.478873


In [72]:
experiment("dev2")

Unnamed: 0,name,nDCG@10,nDCG@1000,RR@1000,R@10,R@1000,R@10000
0,BM25 (Anserini),0.042998,0.072473,0.041643,0.055944,0.251748,0.251748
1,BM25 (PyTerrier),0.099429,0.142733,0.08748,0.146853,0.454545,0.454545
2,BERT Dense,0.013501,0.032341,0.013126,0.020979,0.146853,0.146853
3,BM25 (Anserini^PyTerrier),0.042998,0.072473,0.041643,0.055944,0.251748,0.426573
4,All,0.042998,0.072473,0.041643,0.055944,0.251748,0.468531


In [73]:
experiment("dev3")

Unnamed: 0,name,nDCG@10,nDCG@1000,RR@1000,R@10,R@1000,R@10000
0,BM25 (Anserini),0.092326,0.142904,0.084755,0.134328,0.470149,0.470149
1,BM25 (PyTerrier),0.33659,0.391737,0.312929,0.434701,0.770522,0.770522
2,BERT Dense,0.008413,0.022242,0.007414,0.014925,0.117537,0.117537
3,BM25 (Anserini^PyTerrier),0.092326,0.142904,0.084755,0.134328,0.470149,0.776119
4,All,0.092326,0.142904,0.084755,0.134328,0.470149,0.791045
