In [1]:
import requests
import csv
from ltr.data import CorpusApi, Config

In [2]:
queries = CorpusApi.getValidationQueriesAsDict()

In [3]:
evaluationMap = {}

with open('data/validation/docv2_trec2020_qrels.txt', 'r') as txt:
    txtReader = csv.reader(txt, delimiter=' ')
    for line in txtReader:
        queryMap = evaluationMap.get(line[0], {})
        forwardIndexList = queryMap.get(line[3],[])
        forwardIndexList.append(line[2])
        queryMap[line[3]] = forwardIndexList
        reverseIndexMap = queryMap.get('reverse', {})
        reverseIndexMap[line[2]] = line[3]
        queryMap['reverse'] = reverseIndexMap
        
        evaluationMap[line[0]] = queryMap
        

In [4]:
from typing import List


def reciprocalRank(queryId: int, evalList: List) -> float:
    
    rank = 1
    
    lookupMap = evaluationMap[queryId].get('reverse')
    
    for docId in evalList:
        v = lookupMap.get(docId)
        if v is None or int(v) == 0:
            rank += 1
        else:
            break

    return 1/rank if rank <= 100 else 0
    

In [5]:
def averagePrecision(queryId: int, evalList: List) -> float:
    
    lookupMap = evaluationMap[queryId].get('reverse')
    
    foundDocs = 0
    
    sum = 0
    
    for i, docId in enumerate(evalList):
        v = lookupMap.get(docId)
        if v is not None and int(v) != 0:
            foundDocs += 1
            sum += (foundDocs / (i + 1))
            
    
    return sum / len(evalList)

In [6]:
for key in list(evaluationMap.keys())[:]:
    try:
        # get query
        query = queries[key]
        
        # get result from solr
        request = {
            "fields": "id",
            "limit": 100,
            "params": {
                "qf": "title headings body",
                "defType": "dismax",
                "q": query
            }
        }
        
        response = requests.post(f'http://localhost:8983/solr/thesis-ltr/select', json=request)
        
        queryResult = [doc['id'] for doc in response.json()["response"]["docs"]]
        
        
        # calculate metrics
        # Reciprocal Rank
        rr = reciprocalRank(key, queryResult)
        evaluationMap[key]['rr'] = rr
        print(f'RR: {rr}')
        # Normalized Discounted Cumulative Gains (NDCG)
        # Normalized Cumulative Gains (NCG)
        # Average Precision (AP)
        ap = averagePrecision(key, queryResult)
        evaluationMap[key]['ap'] = ap
        print(f'AP: {ap}')
        
        print(f'processed {key} query: {query}')
    except KeyError as err:
        # ignore the queries that are not in the validation queries list
        pass

JSONDecodeError: Expecting value: line 1 column 1 (char 0)