In [1]:
# !pip install trectools
import pandas as pd
import trectools as trec

In [2]:
results_df = pd.read_csv("./MQ2008-agg/result_agg_functions_MQ2008_top25.csv", sep="\t")
results_df.head(5)

Unnamed: 0,qid,docid,relevance,ranking_1,ranking_2,ranking_3,ranking_4,ranking_5,ranking_6,ranking_7,...,ranking_20_rank,ranking_21_rank,ranking_22_rank,ranking_23_rank,ranking_24_rank,ranking_25_rank,avg_rank,rrf,squared_rrf,mrrf
0,10002,GX008-86-4444840,0,1,30,48,133,0,265,0,...,0,0,1,1,0,0,1.416667,0.195416,9.472222,0.658577
1,10002,GX037-06-11625428,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1.0,0.04918,3.0,0.166826
2,10002,GX044-30-4142998,0,0,0,0,0,0,8,0,...,0,0,4,0,0,0,2.444444,0.144178,3.708333,0.478094
3,10002,GX228-42-3888699,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,1.0,0.04918,3.0,0.166826
4,10002,GX229-14-12863205,0,0,0,0,0,0,0,0,...,0,0,0,0,2,2,2.0,0.032258,0.5,0.107659


In [3]:
qrel_df = results_df[["qid", "docid", "relevance"]].copy()
qrel_df['rank'] = qrel_df.groupby('qid')['relevance'].rank(method='dense', ascending=False).astype(int)
qrel_df['q0']  = 'Q0'
qrel_df['rel'] = qrel_df['relevance']
qrel_df['query'] = qrel_df['qid'].astype(str)
qrel_df = qrel_df[["qid","query", "q0", "docid", "rel"]]
print(qrel_df.head(5))
trec_qrel = trec.TrecQrel()
trec_qrel.qrels_data = qrel_df.copy()

     qid  query  q0              docid  rel
0  10002  10002  Q0   GX008-86-4444840    0
1  10002  10002  Q0  GX037-06-11625428    0
2  10002  10002  Q0   GX044-30-4142998    0
3  10002  10002  Q0   GX228-42-3888699    0
4  10002  10002  Q0  GX229-14-12863205    0


In [4]:
# Evaluate RRF
run_df_rrf = results_df[["qid", "docid", "rrf"]].copy()
run_df_rrf['rank'] = run_df_rrf.groupby('qid')['rrf'].rank(method='dense', ascending=False).astype(int)
run_df_rrf['q0']  = 'Q0'
run_df_rrf['score'] = run_df_rrf['rrf'].astype(float)
run_df_rrf['system'] = 'rrf'
run_df_rrf['query'] = run_df_rrf['qid'].astype(str)
run_df_rrf = run_df_rrf[["qid","query","q0", "docid","rank" ,"score", "system"]]
# print(run_df_rrf.head(5))

# RRF Evaluation
run_rrf = trec.TrecRun()
run_rrf.load_run_from_dataframe(run_df_rrf)
rrf_trec_eval = trec.TrecEval(run_rrf, trec_qrel)
print(f"rrd:{rrf_trec_eval.get_relevant_retrieved_documents()}")
for i in range(1,11):
    print(f"ndcg@{i}:{rrf_trec_eval.get_ndcg(depth=i, trec_eval=True)}")
    print(f"precision@{i}:{rrf_trec_eval.get_precision(depth=i, trec_eval=True)}")
    print(f"map@{i}:{rrf_trec_eval.get_map(depth=i, trec_eval=True)}")
    


rrd:2932
ndcg@1:0.14540816326530612
precision@1:0.40816326530612246
map@1:0.1404866851357125
ndcg@2:0.1774871940361115
precision@2:0.3903061224489796
map@2:0.21935123402350098
ndcg@3:0.2066683050090782
precision@3:0.37244897959183676
map@3:0.278177847939881
ndcg@4:0.23615031209230392
precision@4:0.36160714285714285
map@4:0.3279098726168327
ndcg@5:0.26102242956098465
precision@5:0.3369897959183673
map@5:0.35807042704773806
ndcg@6:0.28563320745130655
precision@6:0.3112244897959184
map@6:0.3770862004685037
ndcg@7:0.3106510955570426
precision@7:0.293002915451895
map@7:0.3949069345492399
ndcg@8:0.32997812956579525
precision@8:0.27487244897959184
map@8:0.40698622089114905
ndcg@9:0.33506569960048216
precision@9:0.25623582766439906
map@9:0.41388051833497985
ndcg@10:0.3400595208104161
precision@10:0.24094387755102045
map@10:0.42071387721275555


In [5]:
# Evaluate SRRF
run_df_srrf = results_df[["qid", "docid", "squared_rrf"]].copy()
run_df_srrf['rank'] = run_df_srrf.groupby('qid')['squared_rrf'].rank(method='dense', ascending=False).astype(int)
run_df_srrf['q0']  = 'Q0'
run_df_srrf['score'] = run_df_srrf['squared_rrf'].astype(float)
run_df_srrf['system'] = 'srrf'
run_df_srrf['query'] = run_df_srrf['qid'].astype(str)
run_df_srrf = run_df_srrf[["qid","query","q0", "docid","rank" ,"score", "system"]]
# print(run_df_srrf.head(5))

# SRRF Evaluation
run_srrf = trec.TrecRun()
run_srrf.load_run_from_dataframe(run_df_srrf)
srrf_trec_eval = trec.TrecEval(run_srrf, trec_qrel)
print(f"rrd:{srrf_trec_eval.get_relevant_retrieved_documents()}")
for i in range(1,11):
    # print(f"ndcg@{i}:{srrf_trec_eval.get_ndcg(depth=i)}")
    print(f"ndcg@{i}:{srrf_trec_eval.get_ndcg(depth=i, trec_eval=True)}")
    print(f"precision@{i}:{srrf_trec_eval.get_precision(depth=i, trec_eval=True)}")
    print(f"map@{i}:{srrf_trec_eval.get_map(depth=i, trec_eval=True)}")
    

rrd:2932
ndcg@1:0.14540816326530612
precision@1:0.20918367346938777
map@1:0.06685486198484161
ndcg@2:0.1774871940361115
precision@2:0.23341836734693877
map@2:0.11926392081579815
ndcg@3:0.2066683050090782
precision@3:0.2385204081632653
map@3:0.15987996473703256
ndcg@4:0.23615031209230392
precision@4:0.23469387755102042
map@4:0.19093547261200342
ndcg@5:0.26102242956098465
precision@5:0.2303571428571429
map@5:0.21655419857866526
ndcg@6:0.28563320745130655
precision@6:0.22385204081632654
map@6:0.23613368880697752
ndcg@7:0.3106510955570426
precision@7:0.2193877551020408
map@7:0.25493593303741463
ndcg@8:0.32997812956579525
precision@8:0.21045918367346939
map@8:0.2671258327110977
ndcg@9:0.33506569960048216
precision@9:0.20011337868480722
map@9:0.2741159300274637
ndcg@10:0.3400595208104161
precision@10:0.18966836734693876
map@10:0.2792973417886442


In [6]:
# Evaluate MRRF
run_df_mrrf = results_df[["qid", "docid", "mrrf"]].copy()
run_df_mrrf['rank'] = run_df_mrrf.groupby('qid')['mrrf'].rank(method='dense', ascending=False).astype(int)
run_df_mrrf['q0']  = 'Q0'
run_df_mrrf['score'] = run_df_mrrf['mrrf'].astype(float)
run_df_mrrf['system'] = 'mrrf'
run_df_mrrf['query'] = run_df_mrrf['qid'].astype(str)
run_df_mrrf = run_df_mrrf[["qid","query","q0", "docid","rank", "score", "system"]]
# print(run_df_mrrf.head(5))

# MRRF Evaluation
run_mrrf = trec.TrecRun()
run_mrrf.load_run_from_dataframe(run_df_mrrf)
mrrf_trec_eval = trec.TrecEval(run_mrrf, trec_qrel)
print(f"rrd:{mrrf_trec_eval.get_relevant_retrieved_documents()}")
for i in range(1,11):
    print(f"ndcg@{i}:{mrrf_trec_eval.get_precision(depth=i, trec_eval=True)}")
    print(f"precision@{i}:{mrrf_trec_eval.get_precision(depth=i, trec_eval=True)}")
    print(f"map@{i}:{mrrf_trec_eval.get_map(depth=i, trec_eval=True)}")
    

rrd:2932
ndcg@1:0.3864795918367347
precision@1:0.3864795918367347
map@1:0.13604045163898382
ndcg@2:0.3711734693877551
precision@2:0.3711734693877551
map@2:0.21161501430667648
ndcg@3:0.35714285714285715
precision@3:0.35714285714285715
map@3:0.2690647702963287
ndcg@4:0.34534438775510207
precision@4:0.34534438775510207
map@4:0.31676026275339353
ndcg@5:0.3239795918367347
precision@5:0.3239795918367347
map@5:0.3473437269326138
ndcg@6:0.3012329931972789
precision@6:0.3012329931972789
map@6:0.3662278283062537
ndcg@7:0.2844387755102041
precision@7:0.2844387755102041
map@7:0.38384254633651116
ndcg@8:0.2661033163265306
precision@8:0.2661033163265306
map@8:0.3948363326273027
ndcg@9:0.24829931972789118
precision@9:0.24829931972789118
map@9:0.40167950621743603
ndcg@10:0.2343112244897959
precision@10:0.2343112244897959
map@10:0.40862009543180977


In [7]:
# Evaluate Average Rank
run_df_avgrank = results_df[["qid", "docid", "avg_rank"]].copy()
run_df_avgrank['rank'] = run_df_avgrank.groupby('qid')['avg_rank'].rank(method='dense', ascending=True).astype(int)
run_df_avgrank['q0']  = 'Q0'
run_df_avgrank['score'] = run_df_avgrank['avg_rank'].astype(float)
run_df_avgrank['system'] = 'avg_rank'
run_df_avgrank['query'] = run_df_avgrank['qid'].astype(str)
run_df_avgrank = run_df_avgrank[["qid","query","q0", "docid","rank", "score", "system"]]

# Avg Rank Evaluation
run_avg_rank = trec.TrecRun()
run_avg_rank.load_run_from_dataframe(run_df_avgrank)
avgrank_trec_eval = trec.TrecEval(run_avg_rank, trec_qrel)
print(f"rrd:{avgrank_trec_eval.get_relevant_retrieved_documents()}")
for i in range(1,11):
    print(f"ndcg@{i}:{avgrank_trec_eval.get_precision(depth=i, trec_eval=False)}")
    print(f"precision@{i}:{avgrank_trec_eval.get_precision(depth=i, trec_eval=False)}")
    print(f"map@{i}:{avgrank_trec_eval.get_map(depth=i, trec_eval=False)}")

rrd:2932
ndcg@1:0.16454081632653061
precision@1:0.16454081632653061
map@1:0.05409057112031064
ndcg@2:0.19260204081632654
precision@2:0.19260204081632654
map@2:0.09330295170037976
ndcg@3:0.20493197278911562
precision@3:0.20493197278911562
map@3:0.1258940875651836
ndcg@4:0.21205357142857142
precision@4:0.21205357142857142
map@4:0.15544909486087932
ndcg@5:0.21173469387755103
precision@5:0.21173469387755103
map@5:0.17932334116455
ndcg@6:0.21045918367346939
precision@6:0.21045918367346939
map@6:0.19997965921647268
ndcg@7:0.21191690962099122
precision@7:0.21191690962099122
map@7:0.2222365775991628
ndcg@8:0.2074298469387755
precision@8:0.2074298469387755
map@8:0.23879718863940372
ndcg@9:0.19387755102040816
precision@9:0.19387755102040816
map@9:0.24324573303864874
ndcg@10:0.18252551020408167
precision@10:0.18252551020408167
map@10:0.24737064167917353
