In [116]:
import pandas as pd
import numpy as np
from functools import partial
from typing import Tuple

In [2]:
qrels = pd.read_csv('./qrels.csv')

In [3]:
sys_results = pd.read_csv('./system_results.csv')

In [4]:
qrels

Unnamed: 0,query_id,doc_id,relevance
0,1,9090,3
1,1,6850,2
2,1,9574,2
3,1,8709,1
4,1,9684,1
...,...,...,...
115,10,7346,3
116,10,8840,2
117,10,3258,2
118,10,5175,1


In [13]:
sys_results

Unnamed: 0,system_number,query_number,doc_number,rank_of_doc,score
0,1,1,6567,1,5.0743
1,1,1,9652,2,4.4829
2,1,1,9684,3,4.3478
3,1,1,7844,4,4.3268
4,1,1,9584,5,4.2160
...,...,...,...,...,...
29299,6,10,5809,496,1.0113
29300,6,10,5762,497,1.0113
29301,6,10,5696,498,1.0113
29302,6,10,5618,499,1.0113


In [48]:
def filter_on_kwargs(data:pd.DataFrame, **kwargs) -> pd.DataFrame:
    construct_query = []
    for key, item in kwargs.items():
        construct_query.append(key + " == " + str(item))
    construct_query = ' & '.join(construct_query)
    return data.query(construct_query)

In [89]:
def get_number_of_unique_rows(data:pd.DataFrame, col_name:str) -> int:
    return len(set(data[col_name]))

def get_row_number(system_nr:int, query_nr:int, nr_queries:int) -> int:
    ii = system_nr - 1
    jj = query_nr - 1
    return ii * nr_queries + jj

nr_queries = get_number_of_unique_rows(sys_results, 'query_number')
nr_systems = get_number_of_unique_rows(sys_results, 'system_number')

def create_result_df(nr_systems:int, nr_queries:int) -> pd.DataFrame:
    col_names = ['system_number', 'query_number', 'P@10', 'R@50', 'r-precision', 'AP', 'nDCG@10', 'nDCG@20']
    result_df = pd.DataFrame(np.zeros((nr_systems * nr_queries, len(col_names))), columns=col_names)
    result_df['system_number'] = pd.to_numeric(result_df['system_number'], downcast='integer')
    result_df['query_number'] = pd.to_numeric(result_df['query_number'], downcast='integer')
    
    for ii in range(nr_systems):
        for jj in range(nr_queries):
            row_nr = get_row_number(ii+1, jj+1, nr_queries)
            result_df.at[row_nr, 'system_number'] = ii+1
            result_df.at[row_nr, 'query_number'] = jj+1
    return result_df

result_df = create_result_df(nr_systems, nr_queries)

In [110]:
def calculate_precision_at_10(sys_results:pd.DataFrame, qrels:pd.DataFrame, system_number:int, 
                             query_number:int) -> float:
    retrieved_docs = list(filter_on_kwargs(sys_results, system_number=system_number, 
                                           query_number=query_number)['doc_number'])
    relevant_docs = set(filter_on_kwargs(qrels, query_id=query_number)['doc_id'])
    
    TP = 0
    for doc in retrieved_docs[:10]:
        if doc in relevant_docs:
            TP += 1
    return TP/10

def calculate_all_precision_at_10(sys_results:pd.DataFrame, qrels:pd.DataFrame, nr_systems:int, 
                                 nr_queries:int, result_df:pd.DataFrame):
    for ii in range(1, nr_systems+1):
        for jj in range(1, nr_queries+1):
            precision = calculate_precision_at_10(sys_results, qrels, ii, jj)
            result_df.at[get_row_number(ii, jj, nr_queries), 'P@10'] = precision

calculate_all_precision_at_10(sys_results, qrels, nr_systems, nr_queries, result_df)

In [111]:
def calculate_recall_at_50(sys_results:pd.DataFrame, qrels:pd.DataFrame, system_number:int, 
                             query_number:int) -> float:
    retrieved_docs = list(filter_on_kwargs(sys_results, system_number=system_number, 
                                           query_number=query_number)['doc_number'])
    relevant_docs = set(filter_on_kwargs(qrels, query_id=query_number)['doc_id'])
    
    TP = 0
    for doc in retrieved_docs[:50]:
        if doc in relevant_docs:
            TP += 1

    FN = len(relevant_docs.difference(set(retrieved_docs[:50])))
    return TP/(TP + FN)

def calculate_all_recall_at_50(sys_results:pd.DataFrame, qrels:pd.DataFrame, nr_systems:int, 
                                 nr_queries:int, result_df:pd.DataFrame):
    for ii in range(1, nr_systems+1):
        for jj in range(1, nr_queries+1):
            recall = calculate_recall_at_50(sys_results, qrels, ii, jj)
            result_df.at[get_row_number(ii, jj, nr_queries), 'R@50'] = recall

calculate_all_recall_at_50(sys_results, qrels, nr_systems, nr_queries, result_df)

In [112]:
def calculate_R_precision(sys_results:pd.DataFrame, qrels:pd.DataFrame, system_number:int, 
                          query_number:int) -> float:
    retrieved_docs = list(filter_on_kwargs(sys_results, system_number=system_number, 
                                           query_number=query_number)['doc_number'])
    relevant_docs = set(filter_on_kwargs(qrels, query_id=query_number)['doc_id'])
    R = len(relevant_docs)
    
    TP = 0
    for doc in retrieved_docs[:R]:
        if doc in relevant_docs:
            TP += 1
    return TP/R

def calculate_all_R_precision(sys_results:pd.DataFrame, qrels:pd.DataFrame, nr_systems:int, 
                              nr_queries:int, result_df:pd.DataFrame):
    for ii in range(1, nr_systems+1):
        for jj in range(1, nr_queries+1):
            R_precision = calculate_R_precision(sys_results, qrels, ii, jj)
            result_df.at[get_row_number(ii, jj, nr_queries), 'r-precision'] = R_precision

calculate_all_R_precision(sys_results, qrels, nr_systems, nr_queries, result_df)

In [114]:
def calculate_AP(sys_results:pd.DataFrame, qrels:pd.DataFrame, system_number:int, 
                 query_number:int) -> float:
    retrieved_docs = list(filter_on_kwargs(sys_results, system_number=system_number, 
                                           query_number=query_number)['doc_number'])
    relevant_docs = set(filter_on_kwargs(qrels, query_id=query_number)['doc_id'])
    
    R = len(relevant_docs)
    
    AP = 0
    TP = 0
    for ii, doc in enumerate(retrieved_docs):
        if doc in relevant_docs:
            TP += 1
            AP += TP/(ii+1)
    AP = AP/R
    return AP

def calculate_all_AP(sys_results:pd.DataFrame, qrels:pd.DataFrame, nr_systems:int, 
                              nr_queries:int, result_df:pd.DataFrame):
    for ii in range(1, nr_systems+1):
        for jj in range(1, nr_queries+1):
            AP = calculate_AP(sys_results, qrels, ii, jj)
            result_df.at[get_row_number(ii, jj, nr_queries), 'AP'] = AP

calculate_all_AP(sys_results, qrels, nr_systems, nr_queries, result_df)

In [123]:
def calculate_nDCG(sys_results:pd.DataFrame, qrels:pd.DataFrame, system_number:int, 
                   query_number:int) -> float:
    retrieved_docs = list(filter_on_kwargs(sys_results, system_number=system_number, 
                                           query_number=query_number)['doc_number'])
    qrels_subset = filter_on_kwargs(qrels, query_id=query_number)
    
    relevant_doc_ids = list(qrels_subset['doc_id'])
    relevant_doc_relevance = list(qrels_subset['relevance'])
    relevant_dict = dict()
    for ii, doc_id in enumerate(relevant_doc_ids):
        relevant_dict[doc_id] = relevant_doc_relevance[ii]
    
    nDCG_10 = 0
    nDCG_20 = 0
    for ii, doc in enumerate(retrieved_docs[:20]):
        if doc in relevant_dict:
            if ii == 0:
                nDCG_10 += relevant_dict[doc]
                nDCG_20 += relevant_dict[doc]
            elif ii < 10:
                nDCG_10 += relevant_dict[doc] / np.log2(ii+1)
                nDCG_20 += relevant_dict[doc] / np.log2(ii+1)
            else:
                nDCG_20 += relevant_dict[doc] / np.log2(ii+1)
    return nDCG_10, nDCG_20

def calculate_all_nDCG(sys_results:pd.DataFrame, qrels:pd.DataFrame, nr_systems:int, 
                       nr_queries:int, result_df:pd.DataFrame):
    for ii in range(1, nr_systems+1):
        for jj in range(1, nr_queries+1):
            nDCG_10, nDCG_20 = calculate_nDCG(sys_results, qrels, ii, jj)
            result_df.at[get_row_number(ii, jj, nr_queries), 'nDCG@10'] = nDCG_10
            result_df.at[get_row_number(ii, jj, nr_queries), 'nDCG@20'] = nDCG_20

calculate_all_nDCG(sys_results, qrels, nr_systems, nr_queries, result_df)

In [124]:
def print_result_df(result_df:pd.DataFrame, nr_systems:int, nr_queries:int, file_name:str='ir_eval.csv'):
    with open(file_name, 'w') as f:
        f.write(','.join(result_df.columns)+'\n')
        for ii in range(nr_systems):
            for jj in range(nr_queries):
                row_nr = get_row_number(ii+1, jj+1, nr_queries)
                row = result_df.loc[row_nr, :]
                line = str(int(row[0])) + ',' + str(int(row[1])) + ','
                
                rest_of_line = ','.join(map(str, [round(x, 3) for x in row[2:]]))
                line += rest_of_line + '\n'
                f.write(line)
            line = str(ii+1) + "," + "mean" + ","
            row_nr = get_row_number(ii+1, 1, nr_queries)
            relevant_stats = np.array(result_df.iloc[row_nr:row_nr+nr_queries, 2:])
            means = np.mean(relevant_stats, axis=0)
            
            rest_of_line = ','.join(map(str, [round(x, 3) for x in means]))
            line += rest_of_line + '\n'
            f.write(line)
                
print_result_df(result_df, nr_systems, nr_queries)