In [1]:
cd ..

/Users/mig/Desktop/pitt/projects/Group-Project-2140/cord_ir


In [2]:
from search.elastic_index_reader import IndexReader
from tqdm.notebook import trange, tqdm
import math

In [3]:
import xml.etree.ElementTree as ET
queryTree = ET.parse('../data/2020-07-16/eval/topics-rnd5.xml')

In [4]:
queryRoot = queryTree.getroot()
queries = []
for child in queryRoot:
    query = {
        'queryNo': child.attrib['number'],
        'query': child.find('query').text,
        'question': child.find('question').text,
        'narrative': child.find('narrative').text
    }
    queries.append(query)

In [5]:
reader = IndexReader()
for query in queries:
    query['query_tokens'] = [t['token'] for t in reader.tokenize(query['query'])['tokens']]
    query['question_tokens'] = [t['token'] for t in reader.tokenize(query['question'])['tokens']]
    query['narrative_tokens'] = [t['token'] for t in reader.tokenize(query['narrative'])['tokens']]



In [6]:
def getRetrievalResults(queries, field):
    results = {}
    for query in tqdm(queries):
        res = reader.search("cord_test", query[field], size=200, fields=[], highlight=False)
        results[query['queryNo']] = res['hits']['hits']
    return results

In [7]:
queryFields = ['query', 'question', 'narrative']
results = {f: getRetrievalResults(queries, f) for f in queryFields}

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

In [8]:
# load the judgments
judgments = {q['queryNo']: [] for q in queries}
with open('../data/2020-07-16/eval/qrels-covid_d5_j0.5-5.txt', 'r') as qrels:
    for line in qrels:
        [topicId, iteration, cordId, judgment] = line.strip('\n').split(' ')
        judgments[topicId].append({
            'iteration': iteration,
            'cordId': cordId,
            'judgment': judgment
        })

In [9]:
for field in queryFields:
    with open('../data/2020-07-16/eval/%s-result.txt'%field, 'w') as res_file:
        fieldResults = results[field]
        for query in tqdm(queries):
            for (index, result) in enumerate(fieldResults[query['queryNo']]):
                line = ' '.join([query['queryNo'], 'Q0', result['_id'], str(index + 1), str(result['_score']), field])
                res_file.write(line + '\n')

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

In [10]:
# use trec_eval to get evaluation
!../data/2020-07-16/eval/trec_eval ../data/2020-07-16/eval/qrels-covid_d5_j0.5-5.txt ../data/2020-07-16/eval/query-result.txt
!../data/2020-07-16/eval/trec_eval ../data/2020-07-16/eval/qrels-covid_d5_j0.5-5.txt ../data/2020-07-16/eval/question-result.txt
!../data/2020-07-16/eval/trec_eval ../data/2020-07-16/eval/qrels-covid_d5_j0.5-5.txt ../data/2020-07-16/eval/narrative-result.txt

runid                 	all	query
num_q                 	all	50
num_ret               	all	10000
num_rel               	all	26664
num_rel_ret           	all	3774
map                   	all	0.0938
gm_map                	all	0.0542
Rprec                 	all	0.1514
bpref                 	all	0.1417
recip_rank            	all	0.8236
iprec_at_recall_0.00  	all	0.8466
iprec_at_recall_0.10  	all	0.4202
iprec_at_recall_0.20  	all	0.1483
iprec_at_recall_0.30  	all	0.0284
iprec_at_recall_0.40  	all	0.0000
iprec_at_recall_0.50  	all	0.0000
iprec_at_recall_0.60  	all	0.0000
iprec_at_recall_0.70  	all	0.0000
iprec_at_recall_0.80  	all	0.0000
iprec_at_recall_0.90  	all	0.0000
iprec_at_recall_1.00  	all	0.0000
P_5                   	all	0.6640
P_10                  	all	0.6180
P_15                  	all	0.6067
P_20                  	all	0.5840
P_30                  	all	0.5640
P_100                 	all	0.4620
P_200                 	all	0.3774
P_500                 	all	0.1510
P_1000                	

In [18]:
# calculate in Python
ndcg_eval_at = [3, 5, 10, 15, 20, 25, 30]
recall_eval_at = [5, 10, 20, 50, 100, 150]
def getEvaluation(results, judgments):
    queryEval = []
    for queryNo in results:
        # doc to relevant score
        relMap = {}
        # number of docs in each relevant score
        relCount = {}
        totalRel = 0
        for j in judgments[queryNo]:
            score = int(j['judgment'])
            relMap[j['cordId']] = score
            relCount[score] = relCount.get(score, 0) + 1
            if score > 0:
                totalRel += 1
        truePositive = 0
        falsePositive = 0
        precisions = []
        reciprocalRank = 0
        cumulativeGain = 0
        discountedCumulativeGain = 0
        idealDCG = 0
        dcgAt = {}
        idcgAt = {}
        ndcgAt = {}
        index = 1
        recallAt = {}
        for score in sorted(relCount.keys(), reverse=True):
            for i in range(relCount[score]):
                idealDCG += (2 ** score - 1) / (math.log2(1 + index))
                if index in ndcg_eval_at:
                    idcgAt[index] = idealDCG
                index += 1
        for (index, doc) in enumerate(results[queryNo]):
            docId = doc['_id']
            cumulativeGain += relMap.get(docId, 0)
            discountedCumulativeGain += (2 ** (relMap.get(docId, 0)) - 1) / (math.log2(1 + (1 + i)))
            if relMap.get(docId, 0) > 0:
                truePositive += 1
                # recall increase
                precisions.append(truePositive / (truePositive + falsePositive))
                if reciprocalRank == 0:
                    reciprocalRank = 1 / (index+1)
            else:
                falsePositive += 1
            if index + 1 in ndcg_eval_at and (index + 1) in idcgAt:
                dcgAt[index + 1] = discountedCumulativeGain
                ndcgAt[index + 1] = dcgAt[index + 1] / idcgAt[index + 1]
            if index + 1 in recall_eval_at:
                recallAt[index + 1] = truePositive / totalRel
        queryEval.append({
            'AveragePrecision': sum(precisions) / totalRel,
            'ReciprocalRank': reciprocalRank,
            'CG': cumulativeGain,
            'DCG': discountedCumulativeGain,
            'IDCG': idealDCG,
            'nDCG': discountedCumulativeGain / idealDCG,
            'nDCGAt': ndcgAt,
            'recallAt': recallAt
        })
    return {
        'MeanAveragePrecisions': sum(map(lambda e: e['AveragePrecision'], queryEval)) / len(queryEval),
        'MeanReciprocalRank': sum(map(lambda e: e['ReciprocalRank'], queryEval)) / len(queryEval),
        'AverageNDCG': sum(map(lambda e: e['nDCG'], queryEval)) / len(queryEval),
        'AverageNDCGAt': {k: sum(map(lambda e: e['nDCGAt'][k], queryEval)) / len(queryEval) for k in ndcg_eval_at},
        'AverageRecallAt': {k: sum(map(lambda e: e['recallAt'][k], queryEval)) / len(queryEval) for k in recall_eval_at}
    }
            

In [25]:
getEvaluation(results['query'], judgments)

{'MeanAveragePrecisions': 0.0938413425805633,
 'MeanReciprocalRank': 0.8136031746031747,
 'AverageNDCG': 0.14186232674604926,
 'AverageNDCGAt': {3: 0.13364055103725428,
  5: 0.1418463555798589,
  10: 0.1680738131354743,
  15: 0.18464160353165723,
  20: 0.19572503874359487,
  25: 0.20166118601071548,
  30: 0.21199265201748066},
 'AverageRecallAt': {5: 0.007571936093659915,
  10: 0.014347370761125028,
  20: 0.025675868223038946,
  50: 0.05593293689518463,
  100: 0.09603243791764829,
  150: 0.12785061256462518}}

In [26]:
getEvaluation(results['question'], judgments)

{'MeanAveragePrecisions': 0.05846350714788746,
 'MeanReciprocalRank': 0.6930461107519932,
 'AverageNDCG': 0.10352932326145607,
 'AverageNDCGAt': {3: 0.09875194087296979,
  5: 0.11636770250055294,
  10: 0.13835908558004478,
  15: 0.1488085799127936,
  20: 0.15100867206756832,
  25: 0.15061045685942806,
  30: 0.15584868010479463},
 'AverageRecallAt': {5: 0.005727560253085045,
  10: 0.011274666322520223,
  20: 0.019611381677786005,
  50: 0.04029976874013658,
  100: 0.06874504517191328,
  150: 0.09178983181898402}}

In [27]:
getEvaluation(results['narrative'], judgments)

{'MeanAveragePrecisions': 0.0529449798520697,
 'MeanReciprocalRank': 0.6434796152314557,
 'AverageNDCG': 0.10658332315533475,
 'AverageNDCGAt': {3: 0.09286698794380803,
  5: 0.09907587042444932,
  10: 0.12023696099287039,
  15: 0.1276837496201119,
  20: 0.1381839021514124,
  25: 0.1423471423387641,
  30: 0.14730348405985721},
 'AverageRecallAt': {5: 0.005375328395812915,
  10: 0.00957656146663195,
  20: 0.01846469816821065,
  50: 0.03979425169564572,
  100: 0.06755924917514011,
  150: 0.08948080605363223}}

In [15]:
# ML Part 
from search.ml_rank import MlRanker
from search.data_loader import DataLoader
ranker = MlRanker("../data/models/ranker.joblib",
                  '../data/models/tfidf.joblib',
                  "../data/models/docMatrix.joblib")
loader = DataLoader('../data/2020-07-16')
loader.load_metadata_mappings(loader.load_metadata())

In [23]:
def getMlRetrievalResults(queries, field):
    results = {}
    for query in tqdm(queries):
        res = reader.search("cord_test", query[field], size=200, fields=[], highlight=False)
        results[query['queryNo']] = ranker.rank(query[field], res['hits']['hits'], loader)
#         results[query['queryNo']] = ranker.whole_rank(query[field], loader, size=3000)
    return results
queryFields = ['query', 'question', 'narrative']
mlResults = {f: getMlRetrievalResults(queries, f) for f in queryFields}

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

In [24]:
getEvaluation(mlResults['query'], judgments)

{'MeanAveragePrecisions': 0.09757809150047829,
 'MeanReciprocalRank': 0.881008658008658,
 'AverageNDCG': 0.14178101344522018,
 'AverageNDCGAt': {3: 0.14302845188015995,
  5: 0.158370727880377,
  10: 0.17428973383519267,
  15: 0.1945154717154688,
  20: 0.20581931959190186,
  25: 0.21639762168844115,
  30: 0.22037453186492},
 'AverageRecallAt': {5: 0.009001492257500602,
  10: 0.015139868167777562,
  20: 0.02737418700208618,
  50: 0.05841848937216098,
  100: 0.103302815911794,
  150: 0.13235492029123022}}

In [20]:
getEvaluation(mlResults['question'], judgments)

{'MeanAveragePrecisions': 0.06621321900034868,
 'MeanReciprocalRank': 0.8006899766899765,
 'AverageNDCG': 0.10347918538377215,
 'AverageNDCGAt': {3: 0.11170886826845788,
  5: 0.12544917661570973,
  10: 0.15328750164543398,
  15: 0.16889057798533888,
  20: 0.17184359025591953,
  25: 0.1764679535807171,
  30: 0.18365823864491582},
 'AverageRecallAt': {5: 0.007314020315806555,
  10: 0.013494300831302835,
  20: 0.023226331710781273,
  50: 0.046963292725972015,
  100: 0.0776216140792589,
  150: 0.09850249844731726}}

In [21]:
getEvaluation(mlResults['narrative'], judgments)

{'MeanAveragePrecisions': 0.06307821579867884,
 'MeanReciprocalRank': 0.8210077922077922,
 'AverageNDCG': 0.10657398655766023,
 'AverageNDCGAt': {3: 0.10778962176910373,
  5: 0.11831311120595311,
  10: 0.13638539344543232,
  15: 0.14841490601202692,
  20: 0.15941636614788024,
  25: 0.1663676594616727,
  30: 0.17559742792853972},
 'AverageRecallAt': {5: 0.0065700356764308255,
  10: 0.011818517187814705,
  20: 0.02164875588135949,
  50: 0.04761132386965276,
  100: 0.0807414685336504,
  150: 0.10000227808062155}}