<a href="https://colab.research.google.com/github/RecSys-lab/MM-VideoRec/blob/main/colabs/rank_aggregation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **ViLLA-MMBench: Generated RecLists Processings**

- 📝 Paper Submitted to RecSys'25
- 🗄️ Recommendation Lists: [link](https://github.com/RecSys-lab/MM-VideoRec/tree/main/reclist)

**I. Data Loading**

In [1]:
# A Dictionary of Generated Recommendation Lists (sampling)
reclists = {
    # Baseline
    "baseline_cf": ("cf", "", "", ""),
    # Unimodal Text
    "unimodal_txt_raw_openai_amr": ("amr", "raw-oai", "", ""),
    "unimodal_txt_llm_openai_amr": ("amr", "llm-oai", "", ""),
    # "unimodal_txt_raw_llama_vbpr": ("vbpr", "raw-lm", "", ""),
    # "unimodal_txt_llm_llama_vbpr": ("vbpr", "llm-lm", "", ""),
    # "unimodal_txt_raw_st_vbpr": ("vbpr", "raw-st", "", ""),
    # Unimodal Visual
    "unimodal_vis_avf_amr": ("amr", "", "avf", ""),
    "unimodal_vis_cnn_amr": ("amr", "", "cnn", ""),
    # Unimodal Audio
    "unimodal_aud_blf_amr": ("amr", "", "", "blf"),
    "unimodal_aud_iv_amr": ("amr", "", "", "iv"),
    # Multimodal
    # "multimodal_txt_llm_st_vis_cnn_amr": ("amr", "llm-st", "cnn", "iv"),
}

# Variables
file_name_suffix = ".csv"
file_name_prefix = "reclist_df_1m_"
reclist_base_url = ("https://raw.githubusercontent.com/RecSys-lab/"
            "MM-VideoRec/refs/heads/main/reclist/")

# Generate File Addresses
def fileNameGenerator(params: dict):
  # Variables
  file_name = ""
  model, txt_mod, vis_mod, aud_mod = params
  # Add prefix
  file_name += reclist_base_url
  file_name += file_name_prefix
  # Add model
  if model == "cf":
    file_name += "cf"
  else:
    file_name += f"{model}_"
  # Check Modality
  if txt_mod != "":
    file_name += "txt-" + txt_mod
  if vis_mod != "":
    vis_prefix = "vis-" if txt_mod == "" else "_vis-"
    file_name += vis_prefix + vis_mod
  if aud_mod != "":
    aud_prefix = "aud-" if vis_mod == "" else "_aud-"
    file_name += aud_prefix + aud_mod
  # Add suffix
  file_name += file_name_suffix
  return file_name

# Sample Usages
print('Baseline: ', fileNameGenerator(reclists['baseline_cf']))
print('Unimodal Text Raw OpenAI: ', fileNameGenerator(reclists['unimodal_txt_raw_openai_amr']))
print('Unimodal Text LLM OpenAI: ', fileNameGenerator(reclists['unimodal_txt_llm_openai_amr']))
print('Unimodal Visual AVF: ', fileNameGenerator(reclists['unimodal_vis_avf_amr']))
print('Unimodal Visual CNN: ', fileNameGenerator(reclists['unimodal_vis_cnn_amr']))
print('Unimodal Audio BLF: ', fileNameGenerator(reclists['unimodal_aud_blf_amr']))
print('Unimodal Audio i-Vec: ', fileNameGenerator(reclists['unimodal_aud_iv_amr']))

Baseline:  https://raw.githubusercontent.com/RecSys-lab/MM-VideoRec/refs/heads/main/reclist/reclist_df_1m_cf.csv
Unimodal Text Raw OpenAI:  https://raw.githubusercontent.com/RecSys-lab/MM-VideoRec/refs/heads/main/reclist/reclist_df_1m_amr_txt-raw-oai.csv
Unimodal Text LLM OpenAI:  https://raw.githubusercontent.com/RecSys-lab/MM-VideoRec/refs/heads/main/reclist/reclist_df_1m_amr_txt-llm-oai.csv
Unimodal Visual AVF:  https://raw.githubusercontent.com/RecSys-lab/MM-VideoRec/refs/heads/main/reclist/reclist_df_1m_amr_vis-avf.csv
Unimodal Visual CNN:  https://raw.githubusercontent.com/RecSys-lab/MM-VideoRec/refs/heads/main/reclist/reclist_df_1m_amr_vis-cnn.csv
Unimodal Audio BLF:  https://raw.githubusercontent.com/RecSys-lab/MM-VideoRec/refs/heads/main/reclist/reclist_df_1m_amr_aud-blf.csv
Unimodal Audio i-Vec:  https://raw.githubusercontent.com/RecSys-lab/MM-VideoRec/refs/heads/main/reclist/reclist_df_1m_amr_aud-iv.csv


In [2]:
from os import path
import pandas as pd

def read_csv_from_url(url):
  try:
    df = pd.read_csv(url)
    return df
  except Exception as e:
    print(f"[Error] Error reading CSV from URL: {e}")
    return None

# Paths
pathBaseline = fileNameGenerator(reclists['baseline_cf'])
pathAudioIV = fileNameGenerator(reclists['unimodal_aud_iv_amr'])
pathVisAVF = fileNameGenerator(reclists['unimodal_vis_avf_amr'])
pathVisCNN = fileNameGenerator(reclists['unimodal_vis_cnn_amr'])
pathAudioBLF = fileNameGenerator(reclists['unimodal_aud_blf_amr'])
pathTxtRaw = fileNameGenerator(reclists['unimodal_txt_raw_openai_amr'])
pathTxtLLM = fileNameGenerator(reclists['unimodal_txt_llm_openai_amr'])

# Read files
dfVisAVF = read_csv_from_url(pathVisAVF)
dfVisCNN = read_csv_from_url(pathVisCNN)
dfTxtRaw = read_csv_from_url(pathTxtRaw)
dfTxtLLM = read_csv_from_url(pathTxtLLM)
dfAudioIV = read_csv_from_url(pathAudioIV)
dfBaseline = read_csv_from_url(pathBaseline)
dfAudioBLF = read_csv_from_url(pathAudioBLF)

# Prints
print('Baseline: ')
display(dfBaseline.head(3))

print('\nAudio i-Vector: ')
display(dfAudioIV.head(3))

print('\nAudio BLF: ')
display(dfAudioBLF.head(3))

print('\nVisual AVF: ')
display(dfVisAVF.head(3))

print('\nVisual CNN: ')
display(dfVisCNN.head(3))

print('\nText Raw: ')
display(dfTxtRaw.head(3))

print('\nText LLM: ')
display(dfTxtLLM.head(3))

Baseline: 


Unnamed: 0,userId,train,gt,rec_TopPop_NA,CR_TopPop_NA,PB_TopPop_NA,FA_TopPop_NA,NO_TopPop_NA,DI_TopPop_NA,CB_TopPop_NA,...,PB_VAECF_NA,FA_VAECF_NA,NO_VAECF_NA,DI_VAECF_NA,CB_VAECF_NA,RC_VAECF_NA,ND_VAECF_NA,CV_TopPop_NA,CV_MF_NA,CV_VAECF_NA
0,1,"[2692, 260, 1028, 1287, 1029, 1035, 914, 661, ...","[2791, 1962, 1197, 3408, 594, 1721, 1022]","[2858, 1196, 589, 593, 1580, 1198, 2396, 1197,...",0.0,0.025182,0.186087,10.877062,0.800317,7.383697,...,0.003759,0.1,11.891683,0.773915,2.144773,0.571429,0.458104,0.100147,0.650957,0.867452
1,2,"[1792, 648, 265, 1801, 1544, 780, 2312, 3471, ...","[1537, 2881, 515, 1247, 1527, 647, 3147, 590, ...","[260, 1270, 1580, 608, 2762, 1197, 1617, 1097,...",0.1,0.031752,0.10046,11.484178,0.853492,1.255014,...,0.046715,0.222812,9.37882,0.738519,3.16647,0.058824,0.138862,0.100147,0.650957,0.867452
2,3,"[260, 1291, 653, 1304, 2081, 2470, 552, 1961, ...","[2115, 648, 1580, 1261, 1198, 1615, 1136, 1266...","[589, 2028, 1580, 1198, 608, 2762, 2396, 2628,...",0.1,0.014416,0.101013,11.598494,0.855714,2.274215,...,0.0,1.0,12.560333,0.812381,2.632693,0.181818,0.29849,0.100147,0.650957,0.867452



Audio i-Vector: 


Unnamed: 0,userId,train,gt,rec_AMR_audio,CR_AMR_audio,PB_AMR_audio,FA_AMR_audio,NO_AMR_audio,DI_AMR_audio,CB_AMR_audio,RC_AMR_audio,ND_AMR_audio,CV_AMR_audio
0,1,"[2692, 260, 1028, 1287, 1029, 1035, 914, 661, ...","[2791, 1962, 1197, 3408, 594, 1721, 1022]","[1198, 318, 590, 2336, 1196, 2087, 594, 2858, ...",0.0,0.014088,0.231088,10.498757,0.83963,1.227513,0.142857,0.091625,0.868925
1,2,"[1792, 648, 265, 1801, 1544, 780, 2312, 3471, ...","[1537, 2881, 515, 1247, 1527, 647, 3147, 590, ...","[474, 858, 2194, 377, 590, 608, 1608, 1266, 16...",0.1,0.076642,0.29219,8.412313,0.772593,1.698441,0.117647,0.195189,0.868925
2,3,"[260, 1291, 653, 1304, 2081, 2470, 552, 1961, ...","[2115, 648, 1580, 1261, 1198, 1615, 1136, 1266...","[1198, 2797, 1580, 2716, 2628, 457, 2028, 2396...",0.0,0.024964,0.134211,10.958576,0.815556,2.639835,0.272727,0.396392,0.868925



Audio BLF: 


Unnamed: 0,userId,train,gt,rec_AMR_audio,CR_AMR_audio,PB_AMR_audio,FA_AMR_audio,NO_AMR_audio,DI_AMR_audio,CB_AMR_audio,RC_AMR_audio,ND_AMR_audio,CV_AMR_audio
0,1,"[2692, 260, 1028, 1287, 1029, 1035, 914, 661, ...","[2791, 1962, 1197, 3408, 594, 1721, 1022]","[2572, 1198, 1968, 1196, 246, 2761, 594, 318, ...",0.0,0.060839,0.14967,10.081125,0.886984,2.256556,0.285714,0.174371,0.905744
1,2,"[1792, 648, 265, 1801, 1544, 780, 2312, 3471, ...","[1537, 2881, 515, 1247, 1527, 647, 3147, 590, ...","[2243, 2948, 912, 153, 2194, 1608, 2336, 2064,...",0.0,0.019635,0.1,11.653187,0.81037,1.991602,0.117647,0.148764,0.905744
2,3,"[260, 1291, 653, 1304, 2081, 2470, 552, 1961, ...","[2115, 648, 1580, 1261, 1198, 1615, 1136, 1266...","[3210, 1198, 1275, 1222, 2791, 919, 588, 1784,...",0.0,0.020584,0.160638,10.956709,0.814074,2.315745,0.090909,0.138862,0.905744



Visual AVF: 


Unnamed: 0,userId,train,gt,rec_AMR_visual,CR_AMR_visual,PB_AMR_visual,FA_AMR_visual,NO_AMR_visual,DI_AMR_visual,CB_AMR_visual,RC_AMR_visual,ND_AMR_visual,CV_AMR_visual
0,1,"[2692, 260, 1028, 1287, 1029, 1035, 914, 661, ...","[2791, 1962, 1197, 3408, 594, 1721, 1022]","[2879, 1968, 262, 1476, 3257, 3361, 849, 3448,...",0.0,0.034964,0.191232,10.78067,0.732593,5.864017,0.0,0.0,0.729013
1,2,"[1792, 648, 265, 1801, 1544, 780, 2312, 3471, ...","[1537, 2881, 515, 1247, 1527, 647, 3147, 590, ...","[2879, 381, 1240, 1192, 3328, 849, 3254, 2001,...",0.0,0.015949,0.1,11.683184,0.845344,2.46603,0.0,0.0,0.729013
2,3,"[260, 1291, 653, 1304, 2081, 2470, 552, 1961, ...","[2115, 648, 1580, 1261, 1198, 1615, 1136, 1266...","[2879, 1240, 3328, 1103, 2001, 1234, 262, 339,...",0.0,0.021022,0.119097,11.079683,0.819418,2.978748,0.0,0.0,0.729013



Visual CNN: 


Unnamed: 0,userId,train,gt,rec_AMR_visual,CR_AMR_visual,PB_AMR_visual,FA_AMR_visual,NO_AMR_visual,DI_AMR_visual,CB_AMR_visual,RC_AMR_visual,ND_AMR_visual,CV_AMR_visual
0,1,"[2692, 260, 1028, 1287, 1029, 1035, 914, 661, ...","[2791, 1962, 1197, 3408, 594, 1721, 1022]","[318, 357, 920, 2067, 2396, 1196, 2081, 590, 2...",0.0,0.03719,0.225417,9.568151,0.788095,2.274597,0.142857,0.079457,0.837997
1,2,"[1792, 648, 265, 1801, 1544, 780, 2312, 3471, ...","[1537, 2881, 515, 1247, 1527, 647, 3147, 590, ...","[3527, 1275, 2001, 1299, 2105, 923, 1222, 1214...",0.0,0.0,1.0,12.560333,0.755873,1.51779,0.0,0.0,0.837997
2,3,"[260, 1291, 653, 1304, 2081, 2470, 552, 1961, ...","[2115, 648, 1580, 1261, 1198, 1615, 1136, 1266...","[2396, 318, 1198, 1517, 2115, 2987, 3361, 1214...",0.0,0.007591,0.1,11.790289,0.814815,2.90102,0.272727,0.261443,0.837997



Text Raw: 


Unnamed: 0,userId,train,gt,rec_AMR_text,CR_AMR_text,PB_AMR_text,FA_AMR_text,NO_AMR_text,DI_AMR_text,CB_AMR_text,RC_AMR_text,ND_AMR_text,CV_AMR_text
0,1,"[2692, 260, 1028, 1287, 1029, 1035, 914, 661, ...","[2791, 1962, 1197, 3408, 594, 1721, 1022]","[1197, 1198, 318, 2858, 594, 3147, 912, 2081, ...",0.0,0.02219,0.236184,10.300207,0.826402,2.451591,0.285714,0.381213,0.896907
1,2,"[1792, 648, 265, 1801, 1544, 780, 2312, 3471, ...","[1537, 2881, 515, 1247, 1527, 647, 3147, 590, ...","[434, 590, 733, 3147, 858, 608, 2058, 1197, 13...",0.1,0.01,0.281022,10.51367,0.732222,2.656719,0.176471,0.453743,0.896907
2,3,"[260, 1291, 653, 1304, 2081, 2470, 552, 1961, ...","[2115, 648, 1580, 1261, 1198, 1615, 1136, 1266...","[2804, 648, 2791, 1580, 589, 2797, 2628, 1721,...",0.0,0.018613,0.145882,11.011458,0.815185,2.605095,0.272727,0.299905,0.896907



Text LLM: 


Unnamed: 0,userId,train,gt,rec_AMR_text,CR_AMR_text,PB_AMR_text,FA_AMR_text,NO_AMR_text,DI_AMR_text,CB_AMR_text,RC_AMR_text,ND_AMR_text,CV_AMR_text
0,1,"[2692, 260, 1028, 1287, 1029, 1035, 914, 661, ...","[2791, 1962, 1197, 3408, 594, 1721, 1022]","[318, 3751, 2858, 2396, 912, 593, 3147, 1617, ...",0.0,0.018431,0.182376,10.968857,0.840741,3.618175,0.0,0.0,0.893962
1,2,"[1792, 648, 265, 1801, 1544, 780, 2312, 3471, ...","[1537, 2881, 515, 1247, 1527, 647, 3147, 590, ...","[150, 1608, 1580, 587, 786, 454, 590, 608, 130...",0.1,0.039526,0.213389,9.47692,0.808519,0.352319,0.117647,0.136985,0.893962
2,3,"[260, 1291, 653, 1304, 2081, 2470, 552, 1961, ...","[2115, 648, 1580, 1261, 1198, 1615, 1136, 1266...","[1198, 2804, 2716, 1242, 588, 3753, 2028, 648,...",0.0,0.010511,0.18125,11.131511,0.794974,3.051648,0.272727,0.355777,0.893962


**II. Rank Aggregation and Helpers**

In [3]:
# ================================================================
#  Rank Aggregation & Evaluation helpers  —  drop‑in cell
# ================================================================
from __future__ import annotations
import ast, itertools, numpy as np, pandas as pd
from typing import Callable, Dict, List, Sequence

# ---------- robust CSV‑list parser ------------------------------
def _safe_to_list(x) -> List[int]:
    if isinstance(x, list):   return x
    if pd.isna(x):            return []
    if isinstance(x, str):
        try:                  return ast.literal_eval(x)
        except Exception:     return [int(t) for t in x.strip('[](){}').replace(' ','').split(',') if t]
    return list(x)

# ---------- primitive aggregation rules -------------------------
def borda_count(rank_lists: Sequence[Sequence[int]]):
    L = max(len(r) for r in rank_lists)
    sc: Dict[int, float] = {}
    for r in rank_lists:
        for pos, it in enumerate(r):
            sc[it] = sc.get(it, 0) + (L-pos)
    return _sorted_items(sc, True)

def weighted_borda(rank_lists, w_fn: Callable[[int,int],float]):
    sc: Dict[int, float] = {}
    for r in rank_lists:
        L = len(r)
        for pos, it in enumerate(r):
            sc[it] = sc.get(it, 0) + w_fn(pos, L)
    return _sorted_items(sc, True)

def average_rank(rank_lists):
    fill = max(len(r) for r in rank_lists)+1
    ranks: Dict[int, List[int]] = {}
    for r in rank_lists:
        pos_d = {it:p for p,it in enumerate(r)}
        for it in set(itertools.chain(*rank_lists)):
            ranks.setdefault(it, []).append(pos_d.get(it, fill))
    sc = {it: np.mean(lst) for it,lst in ranks.items()}
    return _sorted_items(sc, False)

def rrf(rank_lists, *, k: int = 60):
    sc: Dict[int, float] = {}
    for r in rank_lists:
        for pos, it in enumerate(r):
            sc[it] = sc.get(it, 0) + 1/(k+pos)
    return _sorted_items(sc, True)

# ---------- metrics ---------------------------------------------
def recall_at_k(g, p, k): gs=set(g); return 0 if not gs else len(set(p[:k])&gs)/len(gs)
def hr_at_k(g, p, k):     return 1.0 if set(p[:k]) & set(g) else 0.0
def ndcg_at_k(g, p, k):
    gs=set(g)
    dcg=sum(1/np.log2(i+2) for i,it in enumerate(p[:k]) if it in gs)
    ideal=min(len(gs),k)
    if ideal==0: return 0.0
    idcg=sum(1/np.log2(i+2) for i in range(ideal))
    return dcg/idcg

# ---------- helpers ---------------------------------------------
def _sorted_items(sc: Dict[int,float], larger_is_better: bool):
    key = (lambda kv:(-kv[1],kv[0])) if larger_is_better else (lambda kv:(kv[1],kv[0]))
    return [it for it,_ in sorted(sc.items(), key=key)]

# weight functions
WEIGHT_FNS = {
    'linear'    : lambda pos,L: L-pos,
    'harmonic'  : lambda pos,L: 1/(pos+1),
    'log'       : lambda pos,L: 1/np.log2(pos+2),
    'exp'       : lambda pos,L: np.exp(-pos),
    'quadratic' : lambda pos,L: (L-pos)**2,
    'const'     : lambda pos,L: 1.0,
}
DEFAULT_METHODS = ['borda', 'avg-rank', 'rrf'] + [f'w-borda-{w}' for w in WEIGHT_FNS]

# ---------- evaluator class --------------------------------------
class RankAggregationEvaluator:
    def __init__(self, df: pd.DataFrame, *, gt_col='gt',
                 rec_cols: Sequence[str]|None=None, top_k:int=10):
        self.df = df.copy()
        self.gt_col = gt_col
        self.top_k  = top_k
        self.rec_cols = list(rec_cols) if rec_cols else [c for c in df.columns if c.startswith('rec_')]
        if not self.rec_cols:
            raise ValueError('No rec_* columns found.')
        self.df[self.gt_col] = self.df[self.gt_col].apply(_safe_to_list)
        for c in self.rec_cols:
            self.df[c] = self.df[c].apply(_safe_to_list)

    # ----- public -------------------------------------------------
    def evaluate_single(self, cols: Sequence[str]|None=None)->pd.DataFrame:
        cols = list(cols) if cols else self.rec_cols
        out=[]
        for c in cols:
            met = _aggregate_metrics(self.df[self.gt_col], self.df[c], self.top_k)
            out.append({'list_A':c,'list_B':'—','method':'single',**met})
        return pd.DataFrame(out)

    def run_all_pairs(self, *, methods=None, verbose=False)->pd.DataFrame:
        methods = list(methods) if methods else DEFAULT_METHODS
        recs=[]
        for a,b in itertools.combinations(self.rec_cols,2):
            if verbose: print('Aggregating',a,'+',b)
            for m in methods:
                fused = self.df.apply(lambda r: self._agg([r[a],r[b]],m), axis=1)
                met   = _aggregate_metrics(self.df[self.gt_col], fused, self.top_k, pre_fused=True)
                recs.append({'list_A':a,'list_B':b,'method':m,**met})
        return pd.DataFrame(recs)

    # ----- internal ----------------------------------------------
    def _agg(self, lists, m):
        if m=='borda': return borda_count(lists)
        if m=='avg-rank': return average_rank(lists)
        if m=='rrf': return rrf(lists)
        if m.startswith('w-borda'):
            return weighted_borda(lists, WEIGHT_FNS[m.split('-')[-1]])
        raise ValueError(m)

def _aggregate_metrics(gt_series, pred_series, k, *, pre_fused=False):
    s={'recall@'+str(k):0,'ndcg@'+str(k):0,'hr@'+str(k):0}
    for g,p in zip(gt_series, pred_series):
        pred = p if pre_fused else p
        s[f'recall@{k}'] += recall_at_k(g,pred,k)
        s[f'ndcg@{k}']   += ndcg_at_k(g,pred,k)
        s[f'hr@{k}']     += hr_at_k(g,pred,k)
    n=len(gt_series)
    return {k:v/n for k,v in s.items()}


**III. Applying Rank Aggregation**

In [4]:
import pandas as pd
from functools import reduce

# ------------ 1)  dictionary & utilities to build URLs -------------
reclists = {
    "baseline_cf": ("cf","","",""),
    "unimodal_txt_raw_openai_amr": ("amr","raw-oai","",""),
    "unimodal_txt_llm_openai_amr": ("amr","llm-oai","",""),
    "unimodal_vis_avf_amr": ("amr","","avf",""),
    "unimodal_vis_cnn_amr": ("amr","","cnn",""),
    "unimodal_aud_blf_amr": ("amr","","","blf"),
    "unimodal_aud_iv_amr" : ("amr","","","iv"),
}
base_url  = "https://raw.githubusercontent.com/RecSys-lab/MM-VideoRec/refs/heads/main/reclist/"
prefix,suffix = "reclist_df_1m_",".csv"

def path_of(tup):
    model,txt,vis,aud = tup
    fn = prefix + ( "cf" if model=="cf" else model+"_" )
    if txt: fn += "txt-"+txt
    if vis: fn += ("_vis-" if txt else "vis-")+vis
    if aud: fn += ("_aud-" if vis or txt else "aud-")+aud
    return base_url+fn+suffix

paths = {name:path_of(t) for name,t in reclists.items()}

# ------------ 2)  read CSVs ----------------------------------------
def rd(url):
    try: return pd.read_csv(url)
    except Exception as e: print("[err]",e); return None

dfBaseline  = rd(paths["baseline_cf"])
dfTxtRaw    = rd(paths["unimodal_txt_raw_openai_amr"])
dfTxtLLM    = rd(paths["unimodal_txt_llm_openai_amr"])
dfVisAVF    = rd(paths["unimodal_vis_avf_amr"])
dfVisCNN    = rd(paths["unimodal_vis_cnn_amr"])
dfAudioBLF  = rd(paths["unimodal_aud_blf_amr"])
dfAudioIV   = rd(paths["unimodal_aud_iv_amr"])

# ------------ 3)  merge on userId ----------------------------------
dfs = [dfBaseline, dfTxtRaw, dfTxtLLM, dfVisAVF, dfVisCNN, dfAudioBLF, dfAudioIV]
df_all = reduce(lambda L,R: pd.merge(L, R.drop(columns=["train","gt"]), on="userId", how="inner"), dfs)
print("Merged shape:", df_all.shape)

# ------------ 4)  evaluate -----------------------------------------
evalr      = RankAggregationEvaluator(df_all, top_k=10)
single_df  = evalr.evaluate_single()
fusion_df  = evalr.run_all_pairs(verbose=True)

combined = (
    pd.concat([single_df, fusion_df], ignore_index=True)
      .sort_values("recall@10", ascending=False)
)

# ------------ 5)  show top‑50 --------------------------------------
pd.set_option("display.max_rows", 50)
display(combined.head(50))


Merged shape: (5948, 93)
Aggregating rec_TopPop_NA + rec_MF_NA
Aggregating rec_TopPop_NA + rec_VAECF_NA
Aggregating rec_TopPop_NA + rec_AMR_text_x
Aggregating rec_TopPop_NA + rec_AMR_text_y
Aggregating rec_TopPop_NA + rec_AMR_visual_x
Aggregating rec_TopPop_NA + rec_AMR_visual_y
Aggregating rec_TopPop_NA + rec_AMR_audio_x
Aggregating rec_TopPop_NA + rec_AMR_audio_y
Aggregating rec_MF_NA + rec_VAECF_NA
Aggregating rec_MF_NA + rec_AMR_text_x
Aggregating rec_MF_NA + rec_AMR_text_y
Aggregating rec_MF_NA + rec_AMR_visual_x
Aggregating rec_MF_NA + rec_AMR_visual_y
Aggregating rec_MF_NA + rec_AMR_audio_x
Aggregating rec_MF_NA + rec_AMR_audio_y
Aggregating rec_VAECF_NA + rec_AMR_text_x
Aggregating rec_VAECF_NA + rec_AMR_text_y
Aggregating rec_VAECF_NA + rec_AMR_visual_x
Aggregating rec_VAECF_NA + rec_AMR_visual_y
Aggregating rec_VAECF_NA + rec_AMR_audio_x
Aggregating rec_VAECF_NA + rec_AMR_audio_y
Aggregating rec_AMR_text_x + rec_AMR_text_y
Aggregating rec_AMR_text_x + rec_AMR_visual_x
Aggrega

Unnamed: 0,list_A,list_B,method,recall@10,ndcg@10,hr@10
191,rec_VAECF_NA,rec_AMR_audio_y,rrf,0.25213,0.373851,0.860962
194,rec_VAECF_NA,rec_AMR_audio_y,w-borda-log,0.25213,0.372944,0.860962
193,rec_VAECF_NA,rec_AMR_audio_y,w-borda-harmonic,0.252088,0.371909,0.860962
190,rec_VAECF_NA,rec_AMR_audio_y,avg-rank,0.251979,0.373803,0.860625
189,rec_VAECF_NA,rec_AMR_audio_y,borda,0.251654,0.373249,0.861466
192,rec_VAECF_NA,rec_AMR_audio_y,w-borda-linear,0.251654,0.373249,0.861466
195,rec_VAECF_NA,rec_AMR_audio_y,w-borda-exp,0.250896,0.369714,0.860121
196,rec_VAECF_NA,rec_AMR_audio_y,w-borda-quadratic,0.250766,0.371975,0.860121
155,rec_VAECF_NA,rec_AMR_text_y,rrf,0.249582,0.366638,0.860457
158,rec_VAECF_NA,rec_AMR_text_y,w-borda-log,0.249582,0.364703,0.860457
