In [125]:
import requests
import csv
from ltr.data import CorpusApi, Config
import numpy as np

In [126]:
queries = CorpusApi.getValidationQueriesAsDict()

In [127]:
evaluationMap = {}

with open('data/validation/docv2_trec2020_qrels.txt', 'r') as txt:
    txtReader = csv.reader(txt, delimiter=' ')
    for line in txtReader:
        queryMap = evaluationMap.get(line[0], {})
        forwardIndexList = queryMap.get(line[3],[])
        forwardIndexList.append(line[2])
        queryMap[line[3]] = forwardIndexList
        reverseIndexMap = queryMap.get('reverse', {})
        reverseIndexMap[line[2]] = line[3]
        queryMap['reverse'] = reverseIndexMap
        
        evaluationMap[line[0]] = queryMap
        

In [128]:
from typing import List


def reciprocalRank(queryId: int, evalList: List) -> float:
    
    rank = 1
    
    lookupMap = evaluationMap[queryId].get('reverse')
    
    for docId in evalList:
        v = lookupMap.get(docId)
        if v is None or int(v) == 0:
            rank += 1
        else:
            break

    return 1/rank if rank <= 100 else 0
    

In [129]:
def averagePrecision(queryId: int, evalList: List) -> float:
    
    lookupMap = evaluationMap[queryId].get('reverse')
    
    foundDocs = 0
    
    sum = 0
    
    for i, docId in enumerate(evalList):
        v = lookupMap.get(docId)
        if v is not None and int(v) != 0:
            foundDocs += 1
            sum += (foundDocs / (i + 1))
            
    
    return sum / len(evalList)

In [148]:
from sklearn.metrics import ndcg_score, dcg_score

def normalizedDiscountedCumulativeGain(queryId: int, evalList: List) -> float:
    
    lookupMap = evaluationMap[queryId].get('reverse')
    
    # create gain list
    gainList = [int(lookupMap.get(docId, 0)) for docId in evalList]
    #print(gainList)
    
    # create optimal gain list
    keys = list(evaluationMap[queryId].keys()).copy()
    
    if 'reverse' in keys:
        keys.remove('reverse')
    
    if 'rr' in keys:
        keys.remove('rr')
    
    if 'ap' in keys:
        keys.remove('ap')
    
    if 'ndcg' in keys:
        keys.remove('ndcg')
    
    if 'ncg' in keys:
        keys.remove('ncg')
    
    keys = [int(idx) for idx in keys]
    
    optimalGainList = []
    for key in sorted(keys, reverse=True):
        gains = [key for value in range(0,len(evaluationMap[queryId][str(key)]))]
        optimalGainList.extend(gains)
    
    optimalGainList = optimalGainList[:len(evalList)]
    
    # Relevance scores in Ideal order
    true_relevance = np.asarray([[v for v in optimalGainList]])
  
    # Relevance scores in output order
    relevance_score = np.asarray([[v for v in gainList]])
    
    return ndcg_score(true_relevance, relevance_score)

In [146]:


def normalizedCumulativeGain(queryId: int, evalList: List) -> float:
    
    lookupMap = evaluationMap[queryId].get('reverse')
    
    # create gain list
    gainList = [int(lookupMap.get(docId, 0)) for docId in evalList]
    #print(gainList)
    
    # create optimal gain list
    keys = list(evaluationMap[queryId].keys()).copy()
    
    if 'reverse' in keys:
        keys.remove('reverse')
    
    if 'rr' in keys:
        keys.remove('rr')
    
    if 'ap' in keys:
        keys.remove('ap')
    
    if 'ndcg' in keys:
        keys.remove('ndcg')
    
    if 'ncg' in keys:
        keys.remove('ncg')
    
    keys = [int(idx) for idx in keys]
    
    optimalGainList = []
    for key in sorted(keys, reverse=True):
        gains = [key for value in range(0,len(evaluationMap[queryId][str(key)]))]
        optimalGainList.extend(gains)
    
    optimalGainList = optimalGainList[:len(evalList)]
    

    return sum(gainList) / sum(optimalGainList)

In [149]:
for key in list(evaluationMap.keys())[:]:
    try:
        # get query
        query = queries[key]
        
        # get result from solr
        request = {
            "fields": "id",
            "limit": 100,
            "params": {
                "qf": "title headings body",
                "defType": "dismax",
                "q": query
            }
        }
        
        response = requests.post(f'http://localhost:8983/solr/thesis-ltr/select', json=request)
        
        queryResult = [doc['id'] for doc in response.json()["response"]["docs"]]
        
        
        # calculate metrics
        # Reciprocal Rank
        rr = reciprocalRank(key, queryResult)
        evaluationMap[key]['rr'] = rr
        print(f'RR: {rr}')
        # Normalized Discounted Cumulative Gains (NDCG)
        ndcg = normalizedDiscountedCumulativeGain(key, queryResult[:10])
        evaluationMap[key]['ndcg'] = ndcg
        print(f'NDCG: {ndcg}')
        # Normalized Cumulative Gains (NCG)
        ncg = normalizedCumulativeGain(key, queryResult)
        evaluationMap[key]['ncg'] = ncg
        print(f'NCG: {ncg}')
        # Average Precision (AP)
        ap = averagePrecision(key, queryResult)
        evaluationMap[key]['ap'] = ap
        print(f'AP: {ap}')
        
        print(f'processed {key} query: {query}')
    except KeyError as err:
        # ignore the queries that are not in the validation queries list
        pass

RR: 0
[[3 3 3 1 0 0 0 0 0 0]]
[[0 0 0 0 0 0 0 0 0 0]]
4.543559338088346
6.823465818787766
NDCG: 0.6658726604269177
NCG: 0.0
AP: 0.0
processed 42255 query: average salary for dental hygienist in nebraska
RR: 0.3333333333333333
[[3 3 3 3 3 3 3 3 3 3]]
[[0 0 3 3 0 0 0 0 0 0]]
13.630678014265037
13.630678014265037
NDCG: 0.9999999999999999
NCG: 0.2716049382716049
AP: 0.022007575843370254
processed 47210 query: average wedding dress alteration cost
RR: 0.3333333333333333
[[3 3 3 3 3 3 2 2 2 2]]
[[0 0 1 0 0 0 0 0 0 0]]
12.05576275289244
12.391784982164108
NDCG: 0.972883468382052
NCG: 0.04054054054054054
AP: 0.003952020202020202
processed 67316 query: can fever cause miscarriage early pregnancy
RR: 0.2
[[3 3 2 2 2 2 2 2 2 2]]
[[0 0 0 0 3 2 3 0 0 0]]
9.776441414610087
10.718048429748148
NDCG: 0.9121475312124349
NCG: 0.07894736842105263
AP: 0.010152380952380951
processed 135802 query: definition of laudable
RR: 0.125
[[3 2 2 1 1 1 1 1 1 1]]
[[0 0 0 0 0 0 0 1 0 0]]
6.118474599460944
7.67448909165

In [143]:
bm25_rr = 0
bm25_ap = 0
bm25_ndcg = 0
bm25_ncg = 0
counter = 0
for key in list(evaluationMap.keys()):
    try:
        bm25_rr += evaluationMap[key]['rr']
        bm25_ap += evaluationMap[key]['ap']
        bm25_ndcg += evaluationMap[key]['ndcg']
        bm25_ncg += evaluationMap[key]['ncg']
        counter += 1
    except KeyError:
        pass

In [144]:
bm25_rr = bm25_rr/counter
bm25_ap = bm25_ap/counter
bm25_ndcg = bm25_ndcg/counter
bm25_ncg = bm25_ncg/counter

print(f'exp_bm25: rr:{bm25_rr} / ap:{bm25_ap} / ndcg:{bm25_ndcg} / ncg:{bm25_ncg}')


exp_bm25: rr:0.49137667887667874 / ap:0.017599704162294193 / ndcg:0.915999761028869 / ncg:0.22533136694798403
