In [1]:
import pandas as pd
import json
import os
from collections import defaultdict

In [2]:
metrics = ["ndcg@3", "ndcg@5", "mrr@3", "mrr@5", "map@3", "map@5"]
model_names = ["base", "cohere", "flashrank"]
paths_seed_1 = [
    "Data/json/seed_1/base_basic/q_all_report_base_basic.json",
    "Data/json/seed_1/base_by_title/q_all_report_base_by_title.json",
    "Data/json/seed_1/multiquery_basic/q_all_report_multiquery_basic.json",
    "Data/json/seed_1/multiquery_by_title/q_all_report_multiquery_by_title.json",
]
paths_seed_2 = [
    "Data/json/seed_2/base_basic/q_all_report_base_basic.json",
    "Data/json/seed_2/base_by_title/q_all_report_base_by_title.json",
    "Data/json/seed_2/multiquery_basic/q_all_report_multiquery_basic.json",
    "Data/json/seed_2/multiquery_by_title/q_all_report_multiquery_by_title.json",
]

n = 40
seed_1_len, seed_2_len = 5, 25


def calculate_weighted_average(avg_1: float | int, avg_2: float | int, n_1: int=seed_1_len, n_2: int=seed_2_len, round_to: int=None):
    if not round_to:
        return ((avg_1*n_1)+(avg_2*n_2))/(n_1+n_2)
    return round(((avg_1*n_1)+(avg_2*n_2))/(n_1+n_2), round_to)

In [3]:
dict_seed_1 = []
dfs_seed1 = []
for path in paths_seed_1:
    with open(path, "r") as f:
        data = json.load(f)
    dic = {k: [[s for _, s in score.items()] for _, score in v.items()][0] for k, v in data.items() if k in model_names}
    dict_seed_1.append(dic)
    df = pd.DataFrame(dic, index=metrics).T
    df = df.style.highlight_max(color="darkgreen", axis=0).highlight_min(color="darkred", axis=0)
    df = df.set_caption(os.path.basename(path))
    df = df.format("{:.3f}")
    dfs_seed1.append(df)


dict_seed_2 = []
dfs_seed2 = []
for path in paths_seed_2:
    with open(path, "r") as f:
        data = json.load(f)
    dic = {k: [[s for _, s in score.items()] for _, score in v.items()][0] for k, v in data.items() if k in model_names}
    dict_seed_2.append(dic)
    df = pd.DataFrame(dic, index=metrics).T
    df = df.style.highlight_max(color="darkgreen", axis=0).highlight_min(color="darkred", axis=0)
    df = df.set_caption(os.path.basename(path))
    df = df.format("{:.3f}")
    dfs_seed2.append(df)

In [4]:
dfs_seed1[1]

Unnamed: 0,ndcg@3,ndcg@5,mrr@3,mrr@5,map@3,map@5
base,0.635,0.586,0.733,0.733,0.459,0.484
cohere,0.782,0.798,0.8,0.8,0.539,0.699
flashrank,0.2,0.251,0.5,0.5,0.142,0.188


In [5]:
dfs_seed2[1]

Unnamed: 0,ndcg@3,ndcg@5,mrr@3,mrr@5,map@3,map@5
base,0.764,0.791,0.82,0.828,0.683,0.73
cohere,0.642,0.724,0.7,0.728,0.577,0.645
flashrank,0.091,0.1,0.087,0.095,0.061,0.067


In [103]:
calculate_weighted_average(0.467, 0.517, round_to=3)

0.509

In [7]:
dict_all = []
for dic_seed_1, dic_seed_2 in zip(dict_seed_1, dict_seed_2):
    dic_reranker = {}
    for k_1, k_2, v_1, v_2 in zip(dic_seed_1.keys(), dic_seed_2.keys(), dic_seed_1.values(), dic_seed_2.values()):
        assert k_1 == k_2
        dic_model_name = {}
        dic_model_name[k_1] = [calculate_weighted_average(n_1, n_2) for n_1, n_2 in zip(v_1, v_2)]
        dic_reranker.update(dic_model_name)
    dict_all.append(dic_reranker)

In [8]:
dfs_all = []
for path, dic in zip(paths_seed_1, dict_all):
    df = pd.DataFrame(dic, index=metrics).T
    df = df.style.highlight_max(color="darkgreen", axis=0).highlight_min(color="darkred", axis=0)
    df = df.set_caption(os.path.basename(path))
    df = df.format("{:.3f}")
    dfs_all.append(df)

In [10]:
dfs_all[0]

Unnamed: 0,ndcg@3,ndcg@5,mrr@3,mrr@5,map@3,map@5
base,0.661,0.676,0.783,0.783,0.564,0.607
cohere,0.649,0.645,0.717,0.723,0.526,0.58
flashrank,0.12,0.14,0.2,0.222,0.069,0.084


In [11]:
dfs_all[1]

Unnamed: 0,ndcg@3,ndcg@5,mrr@3,mrr@5,map@3,map@5
base,0.742,0.757,0.806,0.812,0.646,0.689
cohere,0.666,0.737,0.717,0.74,0.571,0.654
flashrank,0.109,0.125,0.156,0.162,0.074,0.087


In [12]:
dfs_all[2]

Unnamed: 0,ndcg@3,ndcg@5,mrr@3,mrr@5,map@3,map@5
base,0.632,0.616,0.756,0.756,0.532,0.554
cohere,0.625,0.657,0.7,0.723,0.508,0.585
flashrank,0.031,0.047,0.05,0.073,0.014,0.021


In [13]:
dfs_all[3]

Unnamed: 0,ndcg@3,ndcg@5,mrr@3,mrr@5,map@3,map@5
base,0.659,0.688,0.739,0.746,0.583,0.628
cohere,0.674,0.736,0.728,0.743,0.578,0.653
flashrank,0.023,0.046,0.067,0.092,0.018,0.03
