In [1]:
# Imports

import time
import pandas as pd
import numpy as np
import pyterrier as pt

In [2]:
# Initialize PyTerrier

if not pt.started():
    pt.init(boot_packages=["com.github.terrierteam:terrier-prf:-SNAPSHOT"])

PyTerrier 0.8.0 has loaded Terrier 5.6 (built by craigmacdonald on 2021-09-17 13:27)



In [3]:
dataset = pt.get_dataset("trec-deep-learning-passages")

### Index Statistics

In [4]:
# Terrier Load Index with stemming
index_stemmed = dataset.get_index('terrier_stemmed')
print(pt.IndexFactory.of(index_stemmed).getCollectionStatistics().toString())

Number of documents: 8841823
Number of terms: 1170682
Number of postings: 215238456
Number of fields: 1
Number of tokens: 288759529
Field names: [text]
Positions:   false



### BM25 Baseline Topic Wise Analysis

In [5]:
BM25_baseline_stemmed = pt.BatchRetrieve(index_stemmed, wmodel="BM25", verbose=True)

Reference: [perquery passage ranking](https://pyterrier.readthedocs.io/en/latest/experiments.html#per-query-effectiveness)

In [6]:
# perquery is set to true, because we need topic-wise passage ranks

start_time = time.time()

result_per_query = pt.Experiment([BM25_baseline_stemmed], 
                                 dataset.get_topics("test-2019"),
                                 dataset.get_qrels("test-2019"), 
                                 eval_metrics=["recip_rank", "ndcg_cut_10","map"],
                                 perquery=True)

print(f'Time taken : {time.time() - start_time}')

17:43:58.613 [main] WARN org.terrier.applications.batchquerying.TRECQuery - trec.encoding is not set; resorting to platform default (windows-1252). Retrieval may be platform dependent. Recommend trec.encoding=UTF-8


BR(BM25): 100%|██████████| 200/200 [00:16<00:00, 12.02q/s]


Time taken : 21.85712242126465


  warn(f'{backfill_count} topic(s) not found in qrels. Scores for these topics are given as NaN and should not contribute to averages.')


In [7]:
result_per_query = result_per_query[result_per_query["value"].notnull()]

In [8]:
#Perquery performance
result_per_query[result_per_query["measure"]=='map'].sort_values(by="value")

Unnamed: 0,name,qid,measure,value
96,BR(BM25),1121709,map,0.000239
6,BR(BM25),1063750,map,0.003193
93,BR(BM25),443396,map,0.005157
12,BR(BM25),489204,map,0.055242
39,BR(BM25),962179,map,0.058701
24,BR(BM25),527433,map,0.064043
78,BR(BM25),1106007,map,0.094742
27,BR(BM25),1037798,map,0.109533
48,BR(BM25),451602,map,0.122208
66,BR(BM25),833860,map,0.139674


In [9]:
topics = dataset.get_topics("test-2019")
topics.head()

17:44:20.499 [main] WARN org.terrier.applications.batchquerying.TRECQuery - trec.encoding is not set; resorting to platform default (windows-1252). Retrieval may be platform dependent. Recommend trec.encoding=UTF-8


Unnamed: 0,qid,query
0,1108939,what slows down the flow of blood
1,1112389,what is the county for grand rapids mn
2,792752,what is ruclip
3,1119729,what do you do when you have a nosebleed from ...
4,1105095,where is sugar lake lodge located


In [10]:
qrels = dataset.get_qrels("test-2019")
qrels.head()

Unnamed: 0,qid,docno,label
0,19335,1017759,0
1,19335,1082489,0
2,19335,109063,0
3,19335,1160863,0
4,19335,1160871,0


In [11]:
BM25_baseline_results = BM25_baseline_stemmed(topics)
BM25_baseline_results.head()

BR(BM25): 100%|██████████| 200/200 [00:13<00:00, 14.97q/s]


Unnamed: 0,qid,docid,docno,rank,score,query
0,1108939,4069373,4069373,0,36.189054,what slows down the flow of blood
1,1108939,4744533,4744533,1,35.865644,what slows down the flow of blood
2,1108939,7454708,7454708,2,34.213639,what slows down the flow of blood
3,1108939,7724054,7724054,3,33.891239,what slows down the flow of blood
4,1108939,841975,841975,4,33.763758,what slows down the flow of blood


### Find the worst performing topics

In [12]:
# List of worst performing qids

worst_perf_qids = []
worst_perf_qids.append(list(result_per_query[result_per_query["measure"]=='ndcg_cut_10'].sort_values(by="value").head(5)['qid']))
result_per_query[result_per_query["measure"]=='ndcg_cut_10'].sort_values(by="value").head(5)

Unnamed: 0,name,qid,measure,value
41,BR(BM25),962179,ndcg_cut_10,0.0
98,BR(BM25),1121709,ndcg_cut_10,0.0
95,BR(BM25),443396,ndcg_cut_10,0.069431
50,BR(BM25),451602,ndcg_cut_10,0.122273
8,BR(BM25),1063750,ndcg_cut_10,0.142811


In [13]:
worst_perf_qids.append(list(result_per_query[result_per_query["measure"]=='recip_rank'].sort_values(by="value").head(5)['qid']))
result_per_query[result_per_query["measure"]=='recip_rank'].sort_values(by="value").head(5)

Unnamed: 0,name,qid,measure,value
97,BR(BM25),1121709,recip_rank,0.002874
40,BR(BM25),962179,recip_rank,0.025
94,BR(BM25),443396,recip_rank,0.125
46,BR(BM25),148538,recip_rank,0.2
49,BR(BM25),451602,recip_rank,0.333333


In [14]:
worst_perf_qids.append(list(result_per_query[result_per_query["measure"]=='map'].sort_values(by="value").head(5)['qid']))
result_per_query[result_per_query["measure"]=='map'].sort_values(by="value").head(5)

Unnamed: 0,name,qid,measure,value
96,BR(BM25),1121709,map,0.000239
6,BR(BM25),1063750,map,0.003193
93,BR(BM25),443396,map,0.005157
12,BR(BM25),489204,map,0.055242
39,BR(BM25),962179,map,0.058701


In [15]:
# Get unique list of top 5 worst performing queries
worst_perf_qids = np.array(worst_perf_qids).flatten().tolist()
worst_perf_qids = list(set(worst_perf_qids))
worst_perf_qids

['443396', '451602', '1121709', '962179', '489204', '148538', '1063750']

In [16]:
#Get the text corresponding required qid, to use in the search() for query specific passage ranks
topics[topics["qid"] == "962179"]

Unnamed: 0,qid,query
76,962179,when was the salvation army founded


### Crosscheck relevance of top ranked passages
Reference: [passage ranking results specific to a query](https://github.com/terrier-org/cikm2021tutorial/blob/main/notebooks/notebook1.ipynb)

In [17]:
baselineRanks_perQuery = BM25_baseline_stemmed.search("when was the salvation army founded").head(100)
baselineRanks_perQuery

BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 15.87q/s]


Unnamed: 0,qid,docid,docno,rank,score,query
0,1,8689054,8689054,0,38.471748,when was the salvation army founded
1,1,4511501,4511501,1,38.190918,when was the salvation army founded
2,1,4511504,4511504,2,37.620726,when was the salvation army founded
3,1,8689056,8689056,3,37.273490,when was the salvation army founded
4,1,5773189,5773189,4,36.879666,when was the salvation army founded
...,...,...,...,...,...,...
95,1,7428148,7428148,95,28.872857,when was the salvation army founded
96,1,4606387,4606387,96,28.859203,when was the salvation army founded
97,1,181089,181089,97,28.833306,when was the salvation army founded
98,1,5427064,5427064,98,28.731822,when was the salvation army founded


In [18]:
#Get the qrels scores for the required query
required_query_relevance = qrels.loc[(qrels["qid"] == "962179"),  

               ["qid","docno","label"]]

In [19]:
#Get those documents for the required query which have the highest relevance scores
requiredQueryQrels = qrels.loc[(qrels["qid"] == "962179"),  

               ["qid","docno","label"]]
isMostRelevant = requiredQueryQrels["label"] > 2
mostRelavantPassages = requiredQueryQrels[isMostRelevant]
mostRelavantPassages

Unnamed: 0,qid,docno,label
5351,962179,2329692,3
5352,962179,2329693,3
5356,962179,2329697,3
5358,962179,2329699,3
5360,962179,2329701,3
5363,962179,2978864,3
5365,962179,2978866,3
5387,962179,3705165,3
5394,962179,3896632,3
5406,962179,4511499,3


In [20]:
# How many of the most relevant passages are present in the top 100 retrieved by BM25

listOfDocNo = mostRelavantPassages["docno"].tolist()
revCounter = len(listOfDocNo)

for doc in listOfDocNo:
    tmpdf = baselineRanks_perQuery[baselineRanks_perQuery["docno"] == doc]
    if tmpdf.empty:
        print(f'doc absent: {doc}')
        revCounter = revCounter - 1
    else:
        print(f'doc present: {doc}')
print(f'{revCounter}/{len(listOfDocNo)} of the most relevant passages in top 100 ranked passages')

doc absent: 2329692
doc absent: 2329693
doc present: 2329697
doc absent: 2329699
doc absent: 2329701
doc absent: 2978864
doc present: 2978866
doc absent: 3705165
doc absent: 3896632
doc present: 4511499
doc absent: 4606386
doc present: 4606387
doc absent: 536176
doc absent: 5653659
doc absent: 5919340
doc absent: 5919342
doc absent: 6898289
doc present: 6980697
doc absent: 8785367
doc absent: 8785371
5/20 of the most relevant passages in top 100 ranked passages


### Term Importance

In [21]:
def calc_important_terms(query):
    terms = query.split(' ')
    res = BM25_baseline_stemmed.search(query)[:5]
    docids = list(res['docid'])
    print('total result:')
    print(res)
    for term in terms:
        print()
        print(term, ":")
        term_result = BM25_baseline_stemmed.search(term)
        condition = term_result.docid.isin(docids)
        top = term_result[condition]
        print('avg score:', sum(list(top['score']))/5)
        print(top)

In [22]:
calc_important_terms('why did the us volunterilay enter ww1')
calc_important_terms('definition declaratory judgement')
calc_important_terms('does legionella pneumophila cause pneumonia')
calc_important_terms('when was the salvation army founded')

BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 23.80q/s]


total result:
  qid    docid    docno  rank      score  \
0   1  4788295  4788295     0  30.340022   
1   1  2997653  2997653     1  29.795791   
2   1  4337527  4337527     2  29.564491   
3   1  6093907  6093907     3  27.676235   
4   1  2981566  2981566     4  27.031930   

                                   query  
0  why did the us volunterilay enter ww1  
1  why did the us volunterilay enter ww1  
2  why did the us volunterilay enter ww1  
3  why did the us volunterilay enter ww1  
4  why did the us volunterilay enter ww1  

why :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 198.70q/s]


avg score: 0.0
Empty DataFrame
Columns: [docid, docno, rank, score, qid, query]
Index: []

did :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 499.80q/s]


avg score: 0.0
Empty DataFrame
Columns: [docid, docno, rank, score, qid, query]
Index: []

the :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 498.37q/s]


avg score: 0.0
Empty DataFrame
Columns: [docid, docno, rank, score, qid, query]
Index: []

us :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 996.27q/s]


avg score: 0.0
Empty DataFrame
Columns: [docid, docno, rank, score, qid, query]
Index: []

volunterilay :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 499.68q/s]


avg score: 0.0
Empty DataFrame
Columns: [docid, docno, rank, score, qid, query]
Index: []

enter :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 24.38q/s]


avg score: 2.1080811877964374
    qid    docid    docno  rank      score  query
559   1  4337527  4337527   559  10.540406  enter

ww1 :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 83.23q/s]


avg score: 21.163387540816554
   qid    docid    docno  rank      score query
0    1  2997653  2997653     0  24.219973   ww1
3    1  4788295  4788295     3  23.049556   ww1
29   1  2981566  2981566    29  20.358123   ww1
51   1  6093907  6093907    51  19.165201   ww1
52   1  4337527  4337527    52  19.024085   ww1


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 19.11q/s]


total result:
  qid    docid    docno  rank      score                             query
0   1  8612905  8612905     0  38.268905  definition declaratory judgement
1   1   955702   955702     1  30.340646  definition declaratory judgement
2   1   955706   955706     2  28.190404  definition declaratory judgement
3   1  1262137  1262137     3  27.946515  definition declaratory judgement
4   1  1100639  1100639     4  27.837080  definition declaratory judgement

definition :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 20.41q/s]


avg score: 0.0
Empty DataFrame
Columns: [qid, docid, docno, rank, score, query]
Index: []

declaratory :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 142.95q/s]


avg score: 4.725226192568324
   qid    docid    docno  rank      score        query
16   1  8612905  8612905    16  23.626131  declaratory

judgement :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 40.00q/s]

avg score: 19.77587836643682
    qid    docid    docno  rank      score      query
4     1   955702   955702     4  22.302855  judgement
8     1   955706   955706     8  22.178974  judgement
33    1  1262137  1262137    33  20.269292  judgement
62    1  1100639  1100639    62  19.485498  judgement
477   1  8612905  8612905   477  14.642774  judgement



BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 12.35q/s]


total result:
  qid    docid    docno  rank      score  \
0   1  3344828  3344828     0  67.590115   
1   1  1381477  1381477     1  67.283411   
2   1  3830857  3830857     2  67.283411   
3   1  3174840  3174840     3  64.836880   
4   1  1735360  1735360     4  62.984521   

                                         query  
0  does legionella pneumophila cause pneumonia  
1  does legionella pneumophila cause pneumonia  
2  does legionella pneumophila cause pneumonia  
3  does legionella pneumophila cause pneumonia  
4  does legionella pneumophila cause pneumonia  

does :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 500.33q/s]


avg score: 0.0
Empty DataFrame
Columns: [docid, docno, rank, score, qid, query]
Index: []

legionella :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 62.50q/s]


avg score: 20.29739926874832
    qid    docid    docno  rank      score       query
9     1  3344828  3344828     9  24.348731  legionella
80    1  1381477  1381477    80  20.035820  legionella
81    1  3830857  3830857    81  20.035820  legionella
109   1  3174840  3174840   109  18.598533  legionella
110   1  1735360  1735360   110  18.468093  legionella

pneumophila :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 166.69q/s]


avg score: 21.593699697799185
   qid    docid    docno  rank      score        query
6    1  3344828  3344828     6  23.716360  pneumophila
16   1  1381477  1381477    16  21.883570  pneumophila
18   1  3830857  3830857    18  21.883570  pneumophila
25   1  3174840  3174840    25  20.313734  pneumophila
27   1  1735360  1735360    27  20.171264  pneumophila

cause :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 14.09q/s]


avg score: 0.0
Empty DataFrame
Columns: [qid, docid, docno, rank, score, query]
Index: []

pneumonia :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 35.71q/s]


avg score: 14.59963273743576
    qid    docid    docno  rank      score      query
7     1  3174840  3174840     7  19.080014  pneumonia
78    1  1381477  1381477    78  18.193548  pneumonia
79    1  3830857  3830857    79  18.193548  pneumonia
214   1  1735360  1735360   214  17.531054  pneumonia


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 15.63q/s]


total result:
  qid    docid    docno  rank      score                                query
0   1  8689054  8689054     0  38.471748  when was the salvation army founded
1   1  4511501  4511501     1  38.190918  when was the salvation army founded
2   1  4511504  4511504     2  37.620726  when was the salvation army founded
3   1  8689056  8689056     3  37.273490  when was the salvation army founded
4   1  5773189  5773189     4  36.879666  when was the salvation army founded

when :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 1000.55q/s]


avg score: 0.0
Empty DataFrame
Columns: [docid, docno, rank, score, qid, query]
Index: []

was :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 999.60q/s]


avg score: 0.0
Empty DataFrame
Columns: [docid, docno, rank, score, qid, query]
Index: []

the :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 496.48q/s]


avg score: 0.0
Empty DataFrame
Columns: [docid, docno, rank, score, qid, query]
Index: []

salvation :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 40.00q/s]


avg score: 21.063656212203824
   qid    docid    docno  rank      score      query
0    1  8689054  8689054     0  23.299275  salvation
4    1  5773189  5773189     4  22.335078  salvation
40   1  4511501  4511501    40  20.006730  salvation
48   1  8689056  8689056    48  19.935902  salvation
53   1  4511504  4511504    53  19.741296  salvation

army :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 31.25q/s]


avg score: 5.943412119774716
    qid    docid    docno  rank      score query
33    1  8689054  8689054    33  15.172473  army
151   1  5773189  5773189   151  14.544588  army

founded :


BR(BM25): 100%|██████████| 1/1 [00:00<00:00, 18.52q/s]

avg score: 0.0
Empty DataFrame
Columns: [qid, docid, docno, rank, score, query]
Index: []



