# Normalized Discounted Cumulative Gain (NDCG)

In [17]:

import pandas as pd

llm_res = pd.read_csv('../gym/results/llm_response.csv')
altair_res = pd.read_csv('../gym/results/altair_response.csv')

altair_res = altair_res[(altair_res['threshold']*10).astype(int) == 0]
altair_res

Unnamed: 0,query_id,query,order,threshold,file_id,table_id,id
0,1,(Graph structured) AND (visual Question Answer...,0,0.0,1609.05600v2,S5.T1,1609.05600v2_S5.T1
1,1,(Graph structured) AND (visual Question Answer...,1,0.0,2406.15319,S3.T5.2,2406.15319_S3.T5.2
2,1,(Graph structured) AND (visual Question Answer...,2,0.0,2404.18961v1,S2.SS1.89,2404.18961v1_S2.SS1.89
3,1,(Graph structured) AND (visual Question Answer...,3,0.0,2303.05977v2,S5.T4,2303.05977v2_S5.T4
4,1,(Graph structured) AND (visual Question Answer...,4,0.0,1609.05600v2,S5.T2,1609.05600v2_S5.T2
...,...,...,...,...,...,...,...
1605,10,FL AND decentralised,156,0.0,2406.06679,Pt0.A2.T6,2406.06679_Pt0.A2.T6
1606,10,FL AND decentralised,157,0.0,2211.15217,S6.T1,2211.15217_S6.T1
1607,10,FL AND decentralised,158,0.0,2410.10089v1,S4.T5,2410.10089v1_S4.T5
1608,10,FL AND decentralised,159,0.0,2409.08933v1,S2.T1,2409.08933v1_S2.T1


In [18]:
import numpy as np

## Normalized Discounted Cumulative Gain (NDCG)
# each query_id and table_id has a response value for every model
# we want to use a mean of the response values for each table_id
llm_res.drop(columns=['model', 'index', 'file_id', 'table_id'], inplace=True)
llm_res = llm_res.groupby(['id', 'query_id']).mean().reset_index()

altair_res.drop(columns=['query', 'threshold', 'file_id', 'table_id'], inplace=True)

# get a df with all
df = pd.merge(llm_res, altair_res, on=['id', 'query_id'])

df

Unnamed: 0,id,query_id,score,relevant,order
0,1609.05600v2_S5.T1,1,0.075000,0.00,0
1,1609.05600v2_S5.T1,2,0.050000,0.00,52
2,1609.05600v2_S5.T1,3,0.225000,0.25,14
3,1609.05600v2_S5.T1,4,0.250000,0.25,1
4,1609.05600v2_S5.T1,5,0.025000,0.00,70
...,...,...,...,...,...
1605,2410.13042v1_Sx1.T0a,6,0.442500,0.50,12
1606,2410.13042v1_Sx1.T0a,7,0.025000,0.00,155
1607,2410.13042v1_Sx1.T0a,8,0.033333,0.00,135
1608,2410.13042v1_Sx1.T0a,9,0.033333,0.00,154


In [19]:

def dcg(query_id, k):
    query = df[df['query_id'] == query_id].sort_values(by='order', ascending=False)
    query = query[:k]
    dgc = []
    for i in range(1, min(k, len(query)) + 1):
        dgc.append(query['score'].iloc[i-1] / np.log2(i + 1))
    return sum(dgc)

def idcg(query_id, k):
    query = df[df['query_id'] == query_id].sort_values(by='score', ascending=False)
    query = query[:k]
    idcg = []
    for i in range(1, min(k, len(query)) + 1):
        idcg.append(query['score'].iloc[i-1] / np.log2(i + 1))
    return sum(idcg)

def ndcg(query_id, k):
    ideal_dcg = idcg(query_id, k)
    if ideal_dcg == 0:
        return 0
    return dcg(query_id, k) / ideal_dcg

    

In [20]:
    
print(f'NDCG for all queries is {df["query_id"].apply(lambda x: ndcg(x, 50)).mean()}')


NDCG for all queries is 0.24377055951924012
