In [15]:
from pathlib import Path
import pandas as pd

In [16]:
def get_performance_mrr(data, col_gold, col_pred, list_k = [1, 5, 10]):
    d_performance = {}
    for k in list_k:
        data["in_topx"] = data.apply(lambda x: (1/([i for i in x[col_pred][:k]].index(x[col_gold]) + 1) if x[col_gold] in [i for i in x[col_pred][:k]] else 0), axis=1)
        d_performance[k] = data["in_topx"].mean()
        print(f"{k = }")
        in_topx = data["in_topx"] > 0
        print(f"Number of queries in top {k}: {in_topx.sum()}")
        print(f"Number of queries not in top {k}: {len(data) - in_topx.sum()}")
    return d_performance

In [17]:
def evaluate_reranked_results(df_query, col_gold='cord_uid', col_pred='reranked_topk', list_k=[1, 5, 10]):
    return get_performance_mrr(df_query, col_gold, col_pred, list_k)

In [18]:
path = Path('data')
files = list(path.glob('*.parquet'))

results = []
for file in files:
    print(f"Processing file: {file.name}")
    df_query = pd.read_parquet(file)
    if 'reranked_topk' in df_query.columns:
        performance = evaluate_reranked_results(df_query, col_gold='cord_uid', col_pred='reranked_topk', list_k=[1, 5, 10])
        results.append({
            'name': file.name,
            1: performance[1],
            5: performance[5],
            10: performance[10]
        })
    else:
        print(f"Column 'reranked_topk' not found in {file.name}. Skipping evaluation.")

df_eval = pd.DataFrame(results, columns=['name', 1, 5, 10])
df_eval


Processing file: reranked_results_alibaba.parquet
k = 1
Number of queries in top 1: 9658
Number of queries not in top 1: 4595
k = 5
Number of queries in top 5: 11240
Number of queries not in top 5: 3013
k = 10
Number of queries in top 10: 11565
Number of queries not in top 10: 2688
Processing file: reranked_results_alibaba_finetuned.parquet
k = 1
Number of queries in top 1: 9609
Number of queries not in top 1: 4644
k = 5
Number of queries in top 5: 11089
Number of queries not in top 5: 3164
k = 10
Number of queries in top 10: 11349
Number of queries not in top 10: 2904
Processing file: reranked_results_alibaba_multilingual.parquet
k = 1
Number of queries in top 1: 9026
Number of queries not in top 1: 5227
k = 5
Number of queries in top 5: 10949
Number of queries not in top 5: 3304
k = 10
Number of queries in top 10: 11361
Number of queries not in top 10: 2892
Processing file: reranked_results_alibaba_multilingual_finetuned.parquet
k = 1
Number of queries in top 1: 9021
Number of querie

Unnamed: 0,name,1,5,10
0,reranked_results_alibaba.parquet,0.677612,0.722566,0.725669
1,reranked_results_alibaba_finetuned.parquet,0.674174,0.716633,0.719122
2,reranked_results_alibaba_multilingual.parquet,0.63327,0.688067,0.692047
3,reranked_results_alibaba_multilingual_finetune...,0.632919,0.685476,0.689208
4,reranked_results_electra.parquet,0.49337,0.555816,0.56414
5,reranked_results_electra_finetuned.parquet,0.558268,0.621813,0.628064
6,reranked_results_miniLM.parquet,0.560714,0.616786,0.622467
7,reranked_results_miniLM12_finetuned.parquet,0.661545,0.706641,0.709631
8,reranked_results_miniLM6_finetuned.parquet,0.626886,0.679552,0.683337
9,reranked_results_mxbai.parquet,0.686452,0.723197,0.725351
