In [None]:
# !pip install trectools
import pandas as pd
import trectools as trec

In [None]:
results_df = pd.read_csv("./MQ2008-agg/result_agg_functions_MQ2008_top25.csv", sep="\t")
results_df.head(5)

In [None]:
qrel_df = results_df[["qid", "docid", "relevance"]].copy()
qrel_df['rank'] = qrel_df.groupby('qid')['relevance'].rank(method='dense', ascending=False).astype(int)
qrel_df['q0']  = 'Q0'
qrel_df['rel'] = qrel_df['relevance']
qrel_df['query'] = qrel_df['qid'].astype(str)
qrel_df = qrel_df[["qid","query", "q0", "docid", "rel"]]
print(qrel_df.head(5))
trec_qrel = trec.TrecQrel()
trec_qrel.qrels_data = qrel_df.copy()

In [None]:
# Evaluate RRF
run_df_rrf = results_df[["qid", "docid", "rrf"]].copy()
run_df_rrf['rank'] = run_df_rrf.groupby('qid')['rrf'].rank(method='dense', ascending=False).astype(int)
run_df_rrf['q0']  = 'Q0'
run_df_rrf['score'] = run_df_rrf['rrf'].astype(float)
run_df_rrf['system'] = 'rrf'
run_df_rrf['query'] = run_df_rrf['qid'].astype(str)
run_df_rrf = run_df_rrf[["qid","query","q0", "docid","rank" ,"score", "system"]]
# print(run_df_rrf.head(5))

# RRF Evaluation
run_rrf = trec.TrecRun()
run_rrf.load_run_from_dataframe(run_df_rrf)
rrf_trec_eval = trec.TrecEval(run_rrf, trec_qrel)
print(f"rrd:{rrf_trec_eval.get_relevant_retrieved_documents()}")
for i in range(1,11):
    print(f"ndcg@{i}:{rrf_trec_eval.get_ndcg(depth=i, trec_eval=True)}")
    print(f"precision@{i}:{rrf_trec_eval.get_precision(depth=i, trec_eval=True)}")
    print(f"map@{i}:{rrf_trec_eval.get_map(depth=i, trec_eval=True)}")
    


In [None]:
# Evaluate SRRF
run_df_srrf = results_df[["qid", "docid", "squared_rrf"]].copy()
run_df_srrf['rank'] = run_df_srrf.groupby('qid')['squared_rrf'].rank(method='dense', ascending=False).astype(int)
run_df_srrf['q0']  = 'Q0'
run_df_srrf['score'] = run_df_srrf['squared_rrf'].astype(float)
run_df_srrf['system'] = 'srrf'
run_df_srrf['query'] = run_df_srrf['qid'].astype(str)
run_df_srrf = run_df_srrf[["qid","query","q0", "docid","rank" ,"score", "system"]]
# print(run_df_srrf.head(5))

# SRRF Evaluation
run_srrf = trec.TrecRun()
run_srrf.load_run_from_dataframe(run_df_srrf)
srrf_trec_eval = trec.TrecEval(run_srrf, trec_qrel)
print(f"rrd:{srrf_trec_eval.get_relevant_retrieved_documents()}")
for i in range(1,11):
    # print(f"ndcg@{i}:{srrf_trec_eval.get_ndcg(depth=i)}")
    print(f"ndcg@{i}:{srrf_trec_eval.get_ndcg(depth=i, trec_eval=True)}")
    print(f"precision@{i}:{srrf_trec_eval.get_precision(depth=i, trec_eval=True)}")
    print(f"map@{i}:{srrf_trec_eval.get_map(depth=i, trec_eval=True)}")
    

In [None]:
# Evaluate MRRF
run_df_mrrf = results_df[["qid", "docid", "mrrf"]].copy()
run_df_mrrf['rank'] = run_df_mrrf.groupby('qid')['mrrf'].rank(method='dense', ascending=False).astype(int)
run_df_mrrf['q0']  = 'Q0'
run_df_mrrf['score'] = run_df_mrrf['mrrf'].astype(float)
run_df_mrrf['system'] = 'mrrf'
run_df_mrrf['query'] = run_df_mrrf['qid'].astype(str)
run_df_mrrf = run_df_mrrf[["qid","query","q0", "docid","rank", "score", "system"]]
# print(run_df_mrrf.head(5))

# MRRF Evaluation
run_mrrf = trec.TrecRun()
run_mrrf.load_run_from_dataframe(run_df_mrrf)
mrrf_trec_eval = trec.TrecEval(run_mrrf, trec_qrel)
print(f"rrd:{mrrf_trec_eval.get_relevant_retrieved_documents()}")
for i in range(1,11):
    print(f"ndcg@{i}:{mrrf_trec_eval.get_precision(depth=i, trec_eval=True)}")
    print(f"precision@{i}:{mrrf_trec_eval.get_precision(depth=i, trec_eval=True)}")
    print(f"map@{i}:{mrrf_trec_eval.get_map(depth=i, trec_eval=True)}")
    

In [None]:
# Evaluate Average Rank
run_df_avgrank = results_df[["qid", "docid", "avg_rank"]].copy()
run_df_avgrank['rank'] = run_df_avgrank.groupby('qid')['avg_rank'].rank(method='dense', ascending=True).astype(int)
run_df_avgrank['q0']  = 'Q0'
run_df_avgrank['score'] = run_df_avgrank['avg_rank'].astype(float)
run_df_avgrank['system'] = 'avg_rank'
run_df_avgrank['query'] = run_df_avgrank['qid'].astype(str)
run_df_avgrank = run_df_avgrank[["qid","query","q0", "docid","rank", "score", "system"]]

# Avg Rank Evaluation
run_avg_rank = trec.TrecRun()
run_avg_rank.load_run_from_dataframe(run_df_avgrank)
avgrank_trec_eval = trec.TrecEval(run_avg_rank, trec_qrel)
print(f"rrd:{avgrank_trec_eval.get_relevant_retrieved_documents()}")
for i in range(1,11):
    print(f"ndcg@{i}:{avgrank_trec_eval.get_precision(depth=i, trec_eval=False)}")
    print(f"precision@{i}:{avgrank_trec_eval.get_precision(depth=i, trec_eval=False)}")
    print(f"map@{i}:{avgrank_trec_eval.get_map(depth=i, trec_eval=False)}")