In [1]:
import pandas as pd

In [2]:
def filter_zeroes_rank(rank):
    return rank[rank != 0]

def reciprocal_rank_fusion(rank, c):
    return 1/(c + rank)

def squared_rrf(rank):
    return 1/(rank**2)

def rank_value(rank, c):
    return 1/(rank + c)**2

def mandelbrot_rank(rank,c,n):
    denom = sum([rank_value(i, c) for i in range(1, n+1)])
    num = rank_value(rank, c)
    return num/denom

In [3]:
experiment_df = pd.read_csv('./MQ2007-agg/transformed_agg.csv', sep='\t')
experiment_df.head(5)

Unnamed: 0,qid,docid,relevance,ranking_1,ranking_2,ranking_3,ranking_4,ranking_5,ranking_6,ranking_7,...,ranking_11,ranking_12,ranking_13,ranking_14,ranking_15,ranking_16,ranking_17,ranking_18,ranking_19,ranking_20
0,10,GX000-00-0000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,10,GX000-24-12369390,1,107,107,85,0,113,110,110,...,0,252,36,52,244,155,0,113,132,44
2,10,GX000-62-7863450,1,0,0,0,1,8,14,14,...,0,18,252,227,6,214,0,0,120,0
3,10,GX016-48-5543459,1,0,0,111,4,1,3,3,...,0,4,15,26,15,96,4,163,2,152
4,10,GX037-87-3082362,0,160,160,117,57,102,209,209,...,0,162,229,205,142,190,0,0,12,4


In [4]:
def apply_new_ranking(col_value, old_rank):
    # print(f"column_value: {col_value}")
    # print(f"old_rank: {old_rank}")
    if int(col_value) > 0:
        return int(old_rank)
    else:
        return int(0)

ranking_cols = [col for col in experiment_df.columns if col.startswith('ranking_')]
data_list = []
for qid, data in experiment_df.groupby('qid'):
    data_copy = data.copy()
    for col in ranking_cols:
        data_copy[f"{col}_rank_old"] = data_copy[col].rank(method='dense', ascending=False).astype(int)
        data_copy[f"{col}_rank_new"] = data_copy[[col,f"{col}_rank_old"]].apply(lambda x:apply_new_ranking(x[0], x[1]), axis=1)
        data_copy = data_copy.drop(columns=[f"{col}_rank_old"])
        data_copy = data_copy.rename(columns={f"{col}_rank_new":f"{col}_rank"})
    data_list.append(data_copy)

experiment_df = pd.concat(data_list)    
experiment_df.head(5) 

Unnamed: 0,qid,docid,relevance,ranking_1,ranking_2,ranking_3,ranking_4,ranking_5,ranking_6,ranking_7,...,ranking_11_rank,ranking_12_rank,ranking_13_rank,ranking_14_rank,ranking_15_rank,ranking_16_rank,ranking_17_rank,ranking_18_rank,ranking_19_rank,ranking_20_rank
0,10,GX000-00-0000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,10,GX000-24-12369390,1,107,107,85,0,113,110,110,...,0,3,9,8,3,8,0,7,6,14
2,10,GX000-62-7863450,1,0,0,0,1,8,14,14,...,0,21,4,4,29,4,0,0,7,0
3,10,GX016-48-5543459,1,0,0,111,4,1,3,3,...,0,29,11,10,26,10,3,1,18,7
4,10,GX037-87-3082362,0,160,160,117,57,102,209,209,...,0,12,7,7,17,7,0,0,13,22


In [9]:
# rrf set to c = 60
# mrrf set to c = 60, n = 10

ranked = [col for col in experiment_df.columns if col.endswith('_rank')]
data_applied_functions = []
for qid, data in experiment_df.groupby('qid'):
    data['rrf'] = reciprocal_rank_fusion(filter_zeroes_rank(data[ranked]),60).sum(axis=1)
    data['squared_rrf'] = squared_rrf(filter_zeroes_rank(data[ranked])).sum(axis=1)
    data['mrrf'] = mandelbrot_rank(filter_zeroes_rank(data[ranked]), 60, 10).sum(axis=1)
    data_applied_functions.append(data)

result_df = pd.concat(data_applied_functions)
result_df.head(5)    


Unnamed: 0,qid,docid,relevance,ranking_1,ranking_2,ranking_3,ranking_4,ranking_5,ranking_6,ranking_7,...,ranking_14_rank,ranking_15_rank,ranking_16_rank,ranking_17_rank,ranking_18_rank,ranking_19_rank,ranking_20_rank,rrf,squared_rrf,mrrf
0,10,GX000-00-0000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.333333,inf,2.369723
1,10,GX000-24-12369390,1,107,107,85,0,113,110,110,...,8,3,8,0,7,6,14,0.297603,inf,1.903143
2,10,GX000-62-7863450,1,0,0,0,1,8,14,14,...,4,29,4,0,0,7,0,0.293447,inf,1.873013
3,10,GX016-48-5543459,1,0,0,111,4,1,3,3,...,10,26,10,3,1,18,7,0.275416,inf,1.651267
4,10,GX037-87-3082362,0,160,160,117,57,102,209,209,...,7,17,7,0,0,13,22,0.294039,inf,1.862726


In [11]:
result_df[['rrf', 'squared_rrf', 'mrrf']]

Unnamed: 0,rrf,squared_rrf,mrrf
0,0.333333,inf,2.369723
1,0.297603,inf,1.903143
2,0.293447,inf,1.873013
3,0.275416,inf,1.651267
4,0.294039,inf,1.862726
...,...,...,...
69618,0.309531,inf,2.068721
69619,0.327985,inf,2.296767
69620,0.311883,inf,2.097193
69621,0.299098,inf,1.932220
