In [140]:
from pathlib import Path
import pandas as pd

In [141]:
def get_performance_mrr(data, col_gold, col_pred, list_k = [1, 5, 10]):
    d_performance = {}
    for k in list_k:
        data["in_topx"] = data.apply(lambda x: (1/([i for i in x[col_pred][:k]].index(x[col_gold]) + 1) if x[col_gold] in [i for i in x[col_pred][:k]] else 0), axis=1)
        d_performance[k] = data["in_topx"].mean()
        print(f"{k = }")
        in_topx = data["in_topx"] > 0
        print(f"Number of queries in top {k}: {in_topx.sum()}")
        print(f"Number of queries not in top {k}: {len(data) - in_topx.sum()}")
    return d_performance

In [142]:
def evaluate_reranked_results(df_query, col_gold='cord_uid', col_pred='reranked_topk', list_k=[1, 5, 10]):
    return get_performance_mrr(df_query, col_gold, col_pred, list_k)

In [143]:
path = Path('data')
files = list(path.glob('*.parquet'))

results = []
for file in files:
    print(f"Processing file: {file.name}")
    df_query = pd.read_parquet(file)
    if 'reranked_topk' in df_query.columns:
        if 'cord_uid' not in df_query.columns:
            print(f"Column 'cord_uid' not found in {file.name}. Skipping evaluation.")
            continue
        performance = evaluate_reranked_results(df_query, col_gold='cord_uid', col_pred='reranked_topk', list_k=[1, 5, 10])
        results.append({
            'name': file.name,
            1: performance[1],
            5: performance[5],
            10: performance[10]
        })
    else:
        print(f"Column 'reranked_topk' not found in {file.name}. Skipping evaluation.")

df_eval = pd.DataFrame(results, columns=['name', 1, 5, 10])
df_eval


Processing file: reranked_results_alibaba_finetuned.parquet
k = 1
Number of queries in top 1: 9609
Number of queries not in top 1: 4644
k = 5
Number of queries in top 5: 11089
Number of queries not in top 5: 3164
k = 10
Number of queries in top 10: 11349
Number of queries not in top 10: 2904
Processing file: reranked_results_alibaba_multilingual_finetuned.parquet
k = 1
Number of queries in top 1: 9021
Number of queries not in top 1: 5232
k = 5
Number of queries in top 5: 10842
Number of queries not in top 5: 3411
k = 10
Number of queries in top 10: 11232
Number of queries not in top 10: 3021
Processing file: reranked_results_electra_finetuned.parquet
k = 1
Number of queries in top 1: 7957
Number of queries not in top 1: 6296
k = 5
Number of queries in top 5: 10281
Number of queries not in top 5: 3972
k = 10
Number of queries in top 10: 10933
Number of queries not in top 10: 3320
Processing file: reranked_results_miniLM.parquet
k = 1
Number of queries in top 1: 785
Number of queries not

Unnamed: 0,name,1,5,10
0,reranked_results_alibaba_finetuned.parquet,0.674174,0.716633,0.719122
1,reranked_results_alibaba_multilingual_finetune...,0.632919,0.685476,0.689208
2,reranked_results_electra_finetuned.parquet,0.558268,0.621813,0.628064
3,reranked_results_miniLM.parquet,0.560714,0.616786,0.622467
4,reranked_results_miniLM12_finetuned.parquet,0.661545,0.706641,0.709631
5,reranked_results_miniLM6_finetuned.parquet,0.626886,0.679552,0.683337
6,reranked_results_tinyBERT_finetuned.parquet,0.572441,0.633011,0.638966
7,reranked_results_alibaba_finetuned_bge-small-e...,0.651429,0.698833,0.702113
8,reranked_results_miniLM12_finetuned_bge-small-...,0.651429,0.697357,0.700896
9,reranked_results_tinyBERT_finetuned_bge-small-...,0.562857,0.625786,0.631567


In [144]:
df = df_eval.copy()

In [145]:
df['is_finetuned'] = df['name'].apply(lambda x: 'finetuned' in x)
df
# remove 'finetuned from name
df['name'] = df['name'].apply(lambda x: x.replace('finetuned', '').replace('.parquet', '').replace('reranked_results_', '').strip('_'))
df

Unnamed: 0,name,1,5,10,is_finetuned
0,alibaba,0.674174,0.716633,0.719122,True
1,alibaba_multilingual,0.632919,0.685476,0.689208,True
2,electra,0.558268,0.621813,0.628064,True
3,miniLM,0.560714,0.616786,0.622467,False
4,miniLM12,0.661545,0.706641,0.709631,True
5,miniLM6,0.626886,0.679552,0.683337,True
6,tinyBERT,0.572441,0.633011,0.638966,True
7,alibaba__bge-small-en-v1.5_epochs2_crossentrop...,0.651429,0.698833,0.702113,True
8,miniLM12__bge-small-en-v1.5_epochs2_crossentro...,0.651429,0.697357,0.700896,True
9,tinyBERT__bge-small-en-v1.5_epochs2_crossentro...,0.562857,0.625786,0.631567,True


In [146]:
def extract_loss_type(filename):
    if 'lambdaloss' in filename:
        return 'lambda'
    elif 'crossentropyloss' in filename:
        return 'crossentropy'
    else:
        return 'unknown'
    
df['loss_type'] = df['name'].apply(extract_loss_type)

# remove 'lambdaloss' and 'crossentropyloss' from name
df['name'] = df['name'].apply(lambda x: x.replace('lambdaloss', '').replace('crossentropyloss', '').strip('_'))

In [147]:
def extract_embedding_model(filename):
    if 'static-retrieval-mrl-en-v1' in filename:
        return 'static-retrieval-mrl-en-v1'
    elif 'bge-small-en-v1.5' in filename:
        return 'bge-small-en-v1.5'
    else:
        return 'unknown'
    
df['embedding_model'] = df['name'].apply(extract_embedding_model)


# remove 'retrieval-mrl-en-v1' and 'bge-small-en-v1.5' from name
df['name'] = df['name'].apply(lambda x: x.replace('static-retrieval-mrl-en-v1', '').replace('bge-small-en-v1.5', '').strip('_'))
df

Unnamed: 0,name,1,5,10,is_finetuned,loss_type,embedding_model
0,alibaba,0.674174,0.716633,0.719122,True,unknown,unknown
1,alibaba_multilingual,0.632919,0.685476,0.689208,True,unknown,unknown
2,electra,0.558268,0.621813,0.628064,True,unknown,unknown
3,miniLM,0.560714,0.616786,0.622467,False,unknown,unknown
4,miniLM12,0.661545,0.706641,0.709631,True,unknown,unknown
5,miniLM6,0.626886,0.679552,0.683337,True,unknown,unknown
6,tinyBERT,0.572441,0.633011,0.638966,True,unknown,unknown
7,alibaba___epochs2,0.651429,0.698833,0.702113,True,crossentropy,bge-small-en-v1.5
8,miniLM12___epochs2,0.651429,0.697357,0.700896,True,crossentropy,bge-small-en-v1.5
9,tinyBERT___epochs2,0.562857,0.625786,0.631567,True,crossentropy,bge-small-en-v1.5


In [148]:
def extract_num_epochs_model(filename):
    if 'epochs2' in filename:
        return 'epochs2'
    elif 'epochs1' in filename:
        return 'epochs1'
    else:
        return 'unknown'
    
df['epochs'] = df['name'].apply(extract_num_epochs_model)


# remove 'retrieval-mrl-en-v1' and 'bge-small-en-v1.5' from name
df['name'] = df['name'].apply(lambda x: x.replace('epochs2', '').replace('epochs1', '').strip('_'))
df

Unnamed: 0,name,1,5,10,is_finetuned,loss_type,embedding_model,epochs
0,alibaba,0.674174,0.716633,0.719122,True,unknown,unknown,unknown
1,alibaba_multilingual,0.632919,0.685476,0.689208,True,unknown,unknown,unknown
2,electra,0.558268,0.621813,0.628064,True,unknown,unknown,unknown
3,miniLM,0.560714,0.616786,0.622467,False,unknown,unknown,unknown
4,miniLM12,0.661545,0.706641,0.709631,True,unknown,unknown,unknown
5,miniLM6,0.626886,0.679552,0.683337,True,unknown,unknown,unknown
6,tinyBERT,0.572441,0.633011,0.638966,True,unknown,unknown,unknown
7,alibaba,0.651429,0.698833,0.702113,True,crossentropy,bge-small-en-v1.5,epochs2
8,miniLM12,0.651429,0.697357,0.700896,True,crossentropy,bge-small-en-v1.5,epochs2
9,tinyBERT,0.562857,0.625786,0.631567,True,crossentropy,bge-small-en-v1.5,epochs2


In [149]:
df['has_stricter_embedding'] = df['name'].apply(lambda x: 'stricter embedding' in x)
df
# remove 'finetuned from name
df['name'] = df['name'].apply(lambda x: x.replace('stricter embedding', '').strip('_'))
df

Unnamed: 0,name,1,5,10,is_finetuned,loss_type,embedding_model,epochs,has_stricter_embedding
0,alibaba,0.674174,0.716633,0.719122,True,unknown,unknown,unknown,False
1,alibaba_multilingual,0.632919,0.685476,0.689208,True,unknown,unknown,unknown,False
2,electra,0.558268,0.621813,0.628064,True,unknown,unknown,unknown,False
3,miniLM,0.560714,0.616786,0.622467,False,unknown,unknown,unknown,False
4,miniLM12,0.661545,0.706641,0.709631,True,unknown,unknown,unknown,False
5,miniLM6,0.626886,0.679552,0.683337,True,unknown,unknown,unknown,False
6,tinyBERT,0.572441,0.633011,0.638966,True,unknown,unknown,unknown,False
7,alibaba,0.651429,0.698833,0.702113,True,crossentropy,bge-small-en-v1.5,epochs2,False
8,miniLM12,0.651429,0.697357,0.700896,True,crossentropy,bge-small-en-v1.5,epochs2,False
9,tinyBERT,0.562857,0.625786,0.631567,True,crossentropy,bge-small-en-v1.5,epochs2,False


In [150]:
df['learningrate'] = df['name'].apply(lambda x: 'learningrate' in x)
df
# remove 'finetuned from name
df['name'] = df['name'].apply(lambda x: x.replace('learningrate', '').strip('_'))
df

Unnamed: 0,name,1,5,10,is_finetuned,loss_type,embedding_model,epochs,has_stricter_embedding,learningrate
0,alibaba,0.674174,0.716633,0.719122,True,unknown,unknown,unknown,False,False
1,alibaba_multilingual,0.632919,0.685476,0.689208,True,unknown,unknown,unknown,False,False
2,electra,0.558268,0.621813,0.628064,True,unknown,unknown,unknown,False,False
3,miniLM,0.560714,0.616786,0.622467,False,unknown,unknown,unknown,False,False
4,miniLM12,0.661545,0.706641,0.709631,True,unknown,unknown,unknown,False,False
5,miniLM6,0.626886,0.679552,0.683337,True,unknown,unknown,unknown,False,False
6,tinyBERT,0.572441,0.633011,0.638966,True,unknown,unknown,unknown,False,False
7,alibaba,0.651429,0.698833,0.702113,True,crossentropy,bge-small-en-v1.5,epochs2,False,False
8,miniLM12,0.651429,0.697357,0.700896,True,crossentropy,bge-small-en-v1.5,epochs2,False,False
9,tinyBERT,0.562857,0.625786,0.631567,True,crossentropy,bge-small-en-v1.5,epochs2,False,False


In [151]:
def extractk(filename):
    if 'k1000' in filename:
        return '1000'
    elif 'k100' in filename:
        return '100'
    else:
        return 'unknown'
    
df['k'] = df['name'].apply(extractk)


# remove 'retrieval-mrl-en-v1' and 'bge-small-en-v1.5' from name
df['name'] = df['name'].apply(lambda x: x.replace('k1000', '').replace('k100', '').strip('_'))
df

Unnamed: 0,name,1,5,10,is_finetuned,loss_type,embedding_model,epochs,has_stricter_embedding,learningrate,k
0,alibaba,0.674174,0.716633,0.719122,True,unknown,unknown,unknown,False,False,unknown
1,alibaba_multilingual,0.632919,0.685476,0.689208,True,unknown,unknown,unknown,False,False,unknown
2,electra,0.558268,0.621813,0.628064,True,unknown,unknown,unknown,False,False,unknown
3,miniLM,0.560714,0.616786,0.622467,False,unknown,unknown,unknown,False,False,unknown
4,miniLM12,0.661545,0.706641,0.709631,True,unknown,unknown,unknown,False,False,unknown
5,miniLM6,0.626886,0.679552,0.683337,True,unknown,unknown,unknown,False,False,unknown
6,tinyBERT,0.572441,0.633011,0.638966,True,unknown,unknown,unknown,False,False,unknown
7,alibaba,0.651429,0.698833,0.702113,True,crossentropy,bge-small-en-v1.5,epochs2,False,False,unknown
8,miniLM12,0.651429,0.697357,0.700896,True,crossentropy,bge-small-en-v1.5,epochs2,False,False,unknown
9,tinyBERT,0.562857,0.625786,0.631567,True,crossentropy,bge-small-en-v1.5,epochs2,False,False,unknown
