# Normalized Discounted Cumulative Gain (NDCG)

In [5]:

import pandas as pd

llm_res = pd.read_csv('../gym/results/llm_response.csv')
altair_res = pd.read_csv('../gym/results/altair_response.csv')

altair_res = altair_res[(altair_res['threshold']*10).astype(int) == 7]
altair_res

Unnamed: 0,query_id,query,order,threshold,file_id,table_id,id
4967,1,(Graph structured) AND (visual Question Answer...,0,0.7,1609.05600v2,S5.T1,1609.05600v2_S5.T1
4968,2,"""Autonomous Surface Vehicles"" AND ""Federated L...",0,0.7,2408.01931v2,S5.T5,2408.01931v2_S5.T5
4969,2,"""Autonomous Surface Vehicles"" AND ""Federated L...",1,0.7,2006.05148,S4.T3,2006.05148_S4.T3
4970,2,"""Autonomous Surface Vehicles"" AND ""Federated L...",2,0.7,2303.12317,S4.T2,2303.12317_S4.T2
4971,2,"""Autonomous Surface Vehicles"" AND ""Federated L...",3,0.7,2211.15217,S6.T5,2211.15217_S6.T5
...,...,...,...,...,...,...,...
5111,10,FL AND decentralised,29,0.7,2409.03893v2,S2.T1,2409.03893v2_S2.T1
5112,10,FL AND decentralised,30,0.7,2409.08933v1,S5.T2,2409.08933v1_S5.T2
5113,10,FL AND decentralised,31,0.7,2211.15217,S6.T2,2211.15217_S6.T2
5114,10,FL AND decentralised,32,0.7,2303.12317,A2.T5,2303.12317_A2.T5


In [6]:
import numpy as np

## Normalized Discounted Cumulative Gain (NDCG)
# each query_id and table_id has a response value for every model
# we want to use a mean of the response values for each table_id
llm_res.drop(columns=['model', 'index', 'file_id', 'table_id'], inplace=True)
llm_res = llm_res.groupby(['id', 'query_id']).mean().reset_index()

altair_res.drop(columns=['query', 'threshold', 'file_id', 'table_id'], inplace=True)

# get a df with all
df = pd.merge(llm_res, altair_res, on=['id', 'query_id'])

df

Unnamed: 0,id,query_id,score,relevant,order
0,1609.05600v2_S5.T1,1,0.075000,0.00,0
1,1609.05600v2_S5.T1,5,0.025000,0.00,16
2,1609.05600v2_S5.T1,10,0.050000,0.00,16
3,1609.05600v2_S5.T2,5,0.075000,0.00,17
4,1609.05600v2_S5.T2,10,0.025000,0.00,17
...,...,...,...,...,...
144,2410.13042v1_Sx1.T0,5,0.212500,0.25,22
145,2410.13042v1_Sx1.T0,6,0.612500,0.75,1
146,2410.13042v1_Sx1.T0,10,0.050000,0.00,25
147,2410.13042v1_Sx1.T0a,5,0.175000,0.25,23


In [7]:

def dcg(query_id, k):
    query = df[df['query_id'] == query_id].sort_values(by='order', ascending=False)
    query = query[:k]
    dgc = []
    for i in range(1, min(k, len(query)) + 1):
        dgc.append(query['score'].iloc[i-1] / np.log2(i + 1))
    return sum(dgc)

def idcg(query_id, k):
    query = df[df['query_id'] == query_id].sort_values(by='score', ascending=False)
    query = query[:k]
    idcg = []
    for i in range(1, min(k, len(query)) + 1):
        idcg.append(query['score'].iloc[i-1] / np.log2(i + 1))
    return sum(idcg)

def ndcg(query_id, k):
    ideal_dcg = idcg(query_id, k)
    if ideal_dcg == 0:
        return 0
    return dcg(query_id, k) / ideal_dcg

    

In [8]:
    
print(f'NDCG for all queries is {df["query_id"].apply(lambda x: ndcg(x, 50)).mean()}')


NDCG for all queries is 0.6821346141119633
