# Load Dataset

Connect to google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

Pickle file locations in my drive
<br>/content/drive/MyDrive/robust04/docs.pkl      containing document
<br>/content/drive/MyDrive/robust04/queries.pkl   containing queries
<br>/content/drive/MyDrive/robust04/qrels.pkl     containing query relevence
<br> For more detail of the datasets, please refer to https://ir-datasets.com/trec-robust04.html

<br>Note the official websit only offers .tar file.

In [None]:
# queries
queries_df = pd.read_pickle("/content/drive/MyDrive/robust04/queries.pkl")

In [None]:
# documents
docs_df = pd.read_pickle("/content/drive/MyDrive/robust04/docs.pkl")

In [None]:
# query relevence
qrels_df = pd.read_pickle("/content/drive/MyDrive/robust04/qrels.pkl")

In [None]:
# Example
queries_df.head(2)

Unnamed: 0,query_id,title,description,narrative
0,301,International Organized Crime,Identify organizations that participate in int...,A relevant document must as a minimum identify...
1,302,Poliomyelitis and Post-Polio,Is the disease of Poliomyelitis (polio) under ...,Relevant documents should contain data or outb...


In [None]:
docs_df.head(1)

Unnamed: 0,doc_id,text,marked_up_doc
0,FBIS3-1,"\n\nPOLITICIANS, PARTY PREFERENCES \n\n Sum...","<TEXT>\nPOLITICIANS, PARTY PREFERENCES \n\n ..."


In [None]:
docs_df.shape

(528155, 3)

# Haystack





## Import

In [None]:
!pip install beir
!pip install tensorflow-text
!pip install farm-haystack
!pip install --upgrade pip
!pip install git+https://github.com/deepset-ai/haystack.git
!pip install tensorflow

In [None]:
from typing import List
import requests

from haystack import Document
from haystack.document_stores import FAISSDocumentStore
from haystack.nodes import RAGenerator, DensePassageRetriever
from haystack.utils import fetch_archive_from_http, print_answers, print_documents

from haystack.document_stores import FAISSDocumentStore
from haystack import Document

from haystack.modeling.evaluation.squad import compute_f1 as calculate_f1_str
from haystack.modeling.evaluation.squad import compute_exact as calculate_em_str

In [None]:
from haystack.retriever.dense import EmbeddingRetriever
from haystack.pipelines import DocumentSearchPipeline

In [None]:
import numpy as np

In [None]:
import pandas as pd

In [None]:
import pprint

## Load into Document Store

In [None]:
# optional if we have already the document store 
# run this code if writing new document
# documents: List[Document] = []
# for index, row in docs_df.iterrows():
#   # if index == 10:
#   #   break
#   documents.append(Document(content=row['text'], meta={"name": row['doc_id'] or ""}))


In [None]:
# document_store = FAISSDocumentStore(faiss_index_factory_str="Flat", similarity="cosine", return_embedding=True)

In [None]:
# document_store.delete_documents()

# document_store.write_documents(documents)
# document_store.get_document_count()

Writing Documents:   0%|          | 0/528155 [00:00<?, ?it/s]

522815

In [None]:
!cp -R /content/drive/MyDrive/robust04/robust04 /content/robust04 

In [None]:
!cp -R /content/drive/MyDrive/robust04/robust04.json /content/robust04.json 

In [None]:
!cp -R /content/drive/MyDrive/robust04/FAISS_DS/faiss_document_store.db /content/faiss_document_store.db

In [None]:
# !cp -R /content/faiss_document_store.db /content/drive/MyDrive/robust04/FAISS_DS/
# !cp -R /content/robust04.json /content/drive/MyDrive/robust04/FAISS_DS/robust04.json
# !cp -R /content/robust04 /content/drive/MyDrive/robust04/FAISS_DS/robust04

In [None]:
document_store = FAISSDocumentStore.load("robust04", "robust04.json")
#document_store = FAISSDocumentStore.load("robust04")

In [None]:
# test whether loading is success
document_store.query_by_embedding( query_emb = np.zeros(768))

## Retriever

In [None]:
retriever = EmbeddingRetriever(
   document_store=document_store,
   scale_score = False,
   embedding_model="flax-sentence-embeddings/all_datasets_v3_mpnet-base",
   model_format="sentence_transformers",
   batch_size = 128
)

In [None]:
# document_store.update_embeddings(retriever, update_existing_embeddings=False)
# #document_store = FAISSDocumentStore.load("robust04")
# # pick one of the two lines

Updating Embedding:   0%|          | 0/522815 [00:00<?, ? docs/s]

In [None]:
# document_store.save(index_path = "robust04") # only save after embedding

In [None]:
pipeline = DocumentSearchPipeline(retriever = retriever)

## Example retrieval (optional, no need to run)

In [None]:
index = 0
query = queries_df['title'][index]

In [None]:
print(query)

International Organized Crime


In [None]:
result = pipeline.run(
             query=query, 
             params={"Retriever": {"top_k": 100}}
             )

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
result

In [None]:
print(result["documents"][0].meta)
print(result["documents"][1].meta)
print(result["documents"][2].meta)
print(result["documents"][3].meta)
print(result["documents"][99].meta, result["documents"][99].score)

{'name': 'FBIS3-41285', 'vector_id': '40656'}
{'name': 'FBIS3-42315', 'vector_id': '41644'}
{'name': 'FBIS4-57987', 'vector_id': '117598'}
{'name': 'FBIS4-43552', 'vector_id': '103436'}
{'name': 'FBIS4-63104', 'vector_id': '122625'} 0.49457774


In [None]:
temp_dic = {}
for i in range(100):
  temp_dic[result["documents"][i].meta["name"]] = result["documents"][i].score

In [None]:
result["documents"][99].meta["name"]
result["documents"][99].score

0.49457774

In [None]:
result["documents"]

# Evaluation 

In [None]:
# https://github.com/cvangysel/pytrec_eval

In [None]:
!pip install pytrec_eval 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytrec_eval
  Downloading pytrec_eval-0.5.tar.gz (15 kB)
Building wheels for collected packages: pytrec-eval
  Building wheel for pytrec-eval (setup.py) ... [?25l[?25hdone
  Created wheel for pytrec-eval: filename=pytrec_eval-0.5-cp37-cp37m-linux_x86_64.whl size=269163 sha256=4dd4a390f517f91d3a0055d01622ff5ae24d56ad41dc4753accc39abcb7df220
  Stored in directory: /root/.cache/pip/wheels/42/96/77/0829b8b2606f90f61ba10a51277629d2b615604e122ee932f4
Successfully built pytrec-eval
Installing collected packages: pytrec-eval
Successfully installed pytrec-eval-0.5


In [None]:
import pytrec_eval
import json

qrel = {
    'q1': {
        'd1': 0,
        'd2': 1,
        'd3': 0,
    },
    'q2': {
        'd2': 1,
        'd3': 1,
    },
}

run = {
    'q1': {
        'd1': 1.0,
        'd2': 0.0,
        'd3': 1.5,
    },
    'q2': {
        'd1': 1.5,
        'd2': 0.2,
        'd3': 0.5,
    }
}

evaluator = pytrec_eval.RelevanceEvaluator(
    qrel, {'map', 'ndcg','recall_100', 'P_100'})
# matrics can be found:  https://github.com/usnistgov/trec_eval/blob/de6a29f8ba9312c73f978aa9739695aa8ebf48eb/measures.c

print(json.dumps(evaluator.evaluate(run), indent=1))

{
 "q1": {
  "map": 0.3333333333333333,
  "P_100": 0.01,
  "recall_100": 1.0,
  "ndcg": 0.5
 },
 "q2": {
  "map": 0.5833333333333333,
  "P_100": 0.02,
  "recall_100": 1.0,
  "ndcg": 0.6934264036172708
 }
}


In [None]:
# must run
query_ids = qrels_df['query_id'].unique()

In [None]:
query_ids

array(['301', '302', '303', '304', '305', '306', '307', '308', '309',
       '310', '311', '312', '313', '314', '315', '316', '317', '318',
       '319', '320', '321', '322', '323', '324', '325', '326', '327',
       '328', '329', '330', '331', '332', '333', '334', '335', '336',
       '337', '338', '339', '340', '341', '342', '343', '344', '345',
       '346', '347', '348', '349', '350', '351', '352', '353', '354',
       '355', '356', '357', '358', '359', '360', '361', '362', '363',
       '364', '365', '366', '367', '368', '369', '370', '371', '372',
       '373', '374', '375', '376', '377', '378', '379', '380', '381',
       '382', '383', '384', '385', '386', '387', '388', '389', '390',
       '391', '392', '393', '394', '395', '396', '397', '398', '399',
       '400', '401', '402', '403', '404', '405', '406', '407', '408',
       '409', '410', '411', '412', '413', '414', '415', '416', '417',
       '418', '419', '420', '421', '422', '423', '424', '425', '426',
       '427', '428',

In [None]:
# make qrel into double layer dic
# must run
qrels_dic = {}

for id in query_ids:
  query_id_1 = qrels_df[qrels_df["query_id"]==id]
  dic = pd.Series(query_id_1.relevance.values,index=query_id_1.doc_id).to_dict()
  qrels_dic[id] = dic

In [None]:
#queries_df.head(100)

In [None]:
query_dic = pd.Series(queries_df.title.values,index=queries_df.query_id).to_dict()

In [None]:
query_dic[str(301)]

'International Organized Crime'

In [None]:
# baseline result
retrieval_dic={}
for id in query_ids:
  print(query_dic[id])
  print(id)
  print("----------------- retrieval for id above")

  query = query_dic[id]
  result = pipeline.run(
             query=query, 
             params={"Retriever": {"top_k": 20}}
  )

  temp_dic = {}
  for i in range(20):
    temp_dic[result["documents"][i].meta["name"]] = result["documents"][i].score.astype(float)

  retrieval_dic[id] = temp_dic
    # result["documents"][99].meta["name"]
    # result["documents"][99].score

  print("----------------- retrieval for id above done")

In [None]:
evaluator = pytrec_eval.RelevanceEvaluator(
    qrels_dic, {'map', 'ndcg','recall_20', 'P_20'})
result = evaluator.evaluate(retrieval_dic)
print(result)

{'301': {'map': 0.036127672913110774, 'P_20': 0.85, 'recall_20': 0.03794642857142857, 'ndcg': 0.09691289870696555}, '302': {'map': 0.15722470340117398, 'P_20': 0.6, 'recall_20': 0.18461538461538463, 'ndcg': 0.3137981387355305}, '303': {'map': 0.0, 'P_20': 0.0, 'recall_20': 0.0, 'ndcg': 0.0}, '304': {'map': 0.005612244897959184, 'P_20': 0.1, 'recall_20': 0.01020408163265306, 'ndcg': 0.03579785577438517}, '305': {'map': 0.05224489795918367, 'P_20': 0.15, 'recall_20': 0.08571428571428572, 'ndcg': 0.16956387242007173}, '306': {'map': 0.006632412000944956, 'P_20': 0.2, 'recall_20': 0.012048192771084338, 'ndcg': 0.03960350451342723}, '307': {'map': 0.008609221466364324, 'P_20': 0.2, 'recall_20': 0.01904761904761905, 'ndcg': 0.05111102291442516}, '308': {'map': 0.2857142857142857, 'P_20': 0.1, 'recall_20': 0.5, 'ndcg': 0.490300956513237}, '309': {'map': 0.0, 'P_20': 0.0, 'recall_20': 0.0, 'ndcg': 0.0}, '310': {'map': 0.0641025641025641, 'P_20': 0.15, 'recall_20': 0.23076923076923078, 'ndcg': 

In [None]:
pprint.pprint(result)

In [None]:
## calculate average
count = 0
P_20_avg = 0
map_avg = 0
ndcg_avg = 0
recall_20_avg = 0
for id in query_ids:
  P_20 = result[id]["P_20"]
  map = result[id]["map"]
  ndcg = result[id]["ndcg"]
  recall_20 = result[id]["recall_20"]

  if ((P_20==0.0) & (map==0.0) & (ndcg==0.0) & (recall_20==0.0)):
    print("this query has no value: ", id)
  
  count = count + 1
  P_20_avg = P_20_avg + P_20
  map_avg = map_avg + map
  ndcg_avg = ndcg_avg + ndcg
  recall_20_avg = recall_20_avg +  recall_20
  

print("P_20: ", P_20_avg/count, "\nmap: ", map_avg/count, "\nndcg:", 
      ndcg_avg/count, "\nrecall_20:", recall_20_avg/count )

this query has no value:  303
this query has no value:  309
this query has no value:  316
this query has no value:  325
this query has no value:  348
this query has no value:  367
this query has no value:  376
this query has no value:  378
this query has no value:  409
this query has no value:  433
this query has no value:  437
this query has no value:  447
this query has no value:  620
this query has no value:  652
this query has no value:  665
this query has no value:  690
P_20:  0.3271084337349397 
map:  0.12639341317949004 
ndcg: 0.2434801936268124 
recall_20: 0.1838325609132651


# Find explainable features

## Entity matching from existing tools

In [None]:
!pip install tagme

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tagme
  Downloading tagme-0.1.3-py2.py3-none-any.whl (8.2 kB)
Installing collected packages: tagme
Successfully installed tagme-0.1.3


In [None]:
import tagme

# personal token
tagme.GCUBE_TOKEN = "f83a241f-c321-4473-ae3c-0dd2151382fd-843339462"

In [None]:
rels = tagme.relatedness_title(("Barack Obama",  "Presidency of Barack Obama"))
score = rels.relatedness[0].rel
print(score)

0.6184030771255493


### REL API

In [None]:
## example
import json
import requests


IP_ADDRESS = "https://rel-entity-linker.d4science.org/"
MY_GCUBE_TOKEN = "f83a241f-c321-4473-ae3c-0dd2151382fd-843339462"

document = {
    "text": "Schumacher won the race in Indianapolis",
    "spans": []
}

API_result = requests.post("{}".format(IP_ADDRESS), data=json.dumps(document),
                            headers={'gcube-token': MY_GCUBE_TOKEN, 'Content-Type': 'application/json'})

if API_result.status_code == 200:
    print(API_result.json())
else:
    print(API_result.status_code)

[[0, 10, 'Schumacher', 'Michael_Schumacher', 0.9985688878422279, 0.9996414184570312, 'PER'], [27, 12, 'Indianapolis', 'Indianapolis', 0.9986363930690854, 0.9992759823799133, 'LOC']]


In [None]:
import json
import requests
def REL_entity_linking(text):
  IP_ADDRESS = "https://rel-entity-linker.d4science.org/"
  MY_GCUBE_TOKEN = "f83a241f-c321-4473-ae3c-0dd2151382fd-843339462"

  document = {
    "text": text,
    "spans": []
  }
  print(document)
  API_result = requests.post("{}".format(IP_ADDRESS), data=json.dumps(document),
                            headers={'gcube-token': MY_GCUBE_TOKEN, 'Content-Type': 'application/json'})

  if API_result.status_code == 200:
    print(API_result.json())
    return API_result.json()
  else:
    print(API_result.status_code)
    return API_result.status_code

In [None]:
REL_entity_linking("obama")

{'text': 'obama', 'spans': []}
[[0, 5, 'obama', 'Barack_Obama', 0.9988559949036256, 0.9934688806533813, 'PER']]


[[0,
  5,
  'obama',
  'Barack_Obama',
  0.9988559949036256,
  0.9934688806533813,
  'PER']]

### WAT

In [None]:
import json
import requests

MY_GCUBE_TOKEN = "f83a241f-c321-4473-ae3c-0dd2151382fd-843339462"

class WATAnnotation:
    # An entity annotated by WAT

    def __init__(self, d):

        # char offset (included)
        self.start = d['start']
        # char offset (not included)
        self.end = d['end']

        # annotation accuracy
        self.rho = d['rho']
        # spot-entity probability
        self.prior_prob = d['explanation']['prior_explanation']['entity_mention_probability']

        # annotated text
        self.spot = d['spot']

        # Wikpedia entity info
        self.wiki_id = d['id']
        self.wiki_title = d['title']


    def json_dict(self):
        # Simple dictionary representation
        return {'wiki_title': self.wiki_title,
                'wiki_id': self.wiki_id,
                'start': self.start,
                'end': self.end,
                'rho': self.rho,
                'prior_prob': self.prior_prob
                }


def wat_entity_linking(text):
    # Main method, text annotation with WAT entity linking system
    wat_url = 'https://wat.d4science.org/wat/tag/tag'
    payload = [("gcube-token", MY_GCUBE_TOKEN),
               ("text", text),
               ("lang", 'en'),
               ("tokenizer", "nlp4j"),
               ('debug', 9),
               ("method",
                "spotter:includeUserHint=true:includeNamedEntity=true:includeNounPhrase=true,prior:k=50,filter-valid,centroid:rescore=true,topk:k=5,voting:relatedness=lm,ranker:model=0046.model,confidence:model=pruner-wiki.linear")]

    response = requests.get(wat_url, params=payload)
    return [WATAnnotation(a) for a in response.json()['annotations']]


def print_wat_annotations(wat_annotations):
    json_list = [w.json_dict() for w in wat_annotations]
    print(json.dumps(json_list, indent=4))

def wat_entity_relateness(id1, id2):
  #https://wat.d4science.org/wat/tag/tag?gcube-token=<your Service Authorization Token>&ids=534366&ids=20082093

  wat_url = 'https://wat.d4science.org/wat/tag/tag'
  payload = [("gcube-token", MY_GCUBE_TOKEN),
               ("ids", id1),
               ("ids", id2),]
              
  response = requests.get(wat_url, params=payload)
  return response


# https://wat.d4science.org/wat/relatedness/graph?ids=534366&ids=20082093
def wat_entity_relateness_2(id1, id2):
  wat_url = "https://wat.d4science.org/wat/relatedness/graph"
  payload = [("ids", id1), ("ids", id2),]
  response = requests.get(wat_url, params=payload)
  return response


In [None]:
# #wat_entity_relateness(id1=534366,id2=20082093)
# wat_entity_relateness_2(id1="534366",id2="20082093")
# #534366&ids=20082093
# respone = wat_entity_relateness_2(id1="534366",id2="20082093")

In [None]:
# json_data = json.loads(respone.text)

In [None]:
# wat_annotations= wat_entity_linking("Michael Schumacher")
# print_wat_annotations(wat_annotations)

[
    {
        "wiki_title": "Michael_Schumacher",
        "wiki_id": 20396,
        "start": 0,
        "end": 18,
        "rho": 0.18746185894297396,
        "prior_prob": 0.9994329458463284
    }
]


## Our approach for re-ranking with KG

In [None]:
def evaluate(qrels_dic, rerank_retrieval_dic):
  evaluator = pytrec_eval.RelevanceEvaluator(
    qrels_dic, {'map', 'ndcg','recall_20', 'P_20'})
  result = evaluator.evaluate(rerank_retrieval_dic)
  pprint.pprint(result)
  return result 

In [None]:
# create json object from dictionary
import json
def save_dic(result, name):

  temp_result = json.dumps(result)
  # list(result)[0]
  # open file for writing, "w" 
  path = "/content/drive/MyDrive/robust04/results/"


  path_final = path + name

  f = open(path_final,"w")

  # write json object to file
  f.write(temp_result)

  # close file
  f.close()

In [None]:
qrels_dic = {}

for id in query_ids:
  query_id_1 = qrels_df[qrels_df["query_id"]==id]
  dic = pd.Series(query_id_1.relevance.values,index=query_id_1.doc_id).to_dict()
  qrels_dic[id] = dic

query_dic = pd.Series(queries_df.title.values,index=queries_df.query_id).to_dict()

In [None]:
list_id = qrels_df['query_id'].unique()

### Main body for calculating relatedness 


In [None]:
rerank_retrieval_dic={}
for id in list_id:
  print("------------")
  print("------", id)
  print("------------")
  query = query_dic[id]
  print(query_dic[id])
  err = False
  try:
    query_entities = REL_entity_linking(query)
    print(query_entities)
  except:
    err = True
  
  print("err: ", err)
  
  if err or (len(query_entities) == 0):
    pass
  else: 
    result = pipeline.run(
             query=query, 
             params={"Retriever": {"top_k": 20}}
    )
    temp_dic = {}
    print("------------------------- pair to pair: start:  ", id)
    doc_ent_err = False
    for i in range(20):
      print("====================   ", i ,"   =============================")
      doc = result["documents"][i].content

      try:
        doc_entites = REL_entity_linking(doc)
      except:
        doc_ent_err = True
        break

      # doc_entites = REL_entity_linking(doc)
      list_query_entites=[]

      for query_entity in query_entities:

        for doc_entity in doc_entites:
          list_pair_entity = []
          try:
            rels = tagme.relatedness_title((query_entity[3],  doc_entity[3]))
            score = rels.relatedness[0].rel
          except:
            score = 0
          if score==None:
            score = 0
          list_pair_entity.append(score)

        average_rel = sum(list_pair_entity) / len(list_pair_entity)
        list_query_entites.append(average_rel)

      temp_dic[result["documents"][i].meta["name"]]= sum(list_query_entites)

    print("---------------- pair to pair: end")
    
    if doc_ent_err == False:
      rerank_retrieval_dic[id] = temp_dic

      result = evaluate(qrels_dic,rerank_retrieval_dic)
      save_dic(result, id)


------------
------ 665
------------
poverty Africa sub-Sahara
{'text': 'poverty Africa sub-Sahara', 'spans': []}
[]
[]
err:  False
------------
------ 666
------------
Thatcher resignation impact
{'text': 'Thatcher resignation impact', 'spans': []}
[[0, 8, 'Thatcher', 'Margaret_Thatcher', 0.9986165623324692, 0.9922820925712585, 'PER']]
[[0, 8, 'Thatcher', 'Margaret_Thatcher', 0.9986165623324692, 0.9922820925712585, 'PER']]
err:  False


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   666
{'text': '\n\n\nNovember 29, 1990, Thursday, Home Edition \n\n\n\n\nTHATCHER BIDS QUEEN FAREWELL, BECOMES A BACKBENCHER; \n\n\nBRITAIN: SHE STILL REPRESENTS HER DISTRICT IN THE HOUSE OF COMMONS. FOR THE \nFUTURE, THERE IS TALK OF A BOOK, LECTURE TOURS AND THE HOUSE OF LORDS. \n\n\n\n\nThe crowd cheered, Margaret Thatcher raised her hand in a familiar wave and \nthen she was gone, out the gates of Buckingham Palace and into history. \n\n\nThatcher, prime minister for more than 11 years, had formally resigned and \nreturned her "letters patent" to Queen Elizabeth II and bade the monarch \nfarewell in a 45-minute audience Wednesday morning. \n\n\nShe had already made her final appearance as prime minister at the House of \nCommons and said goodby to her staff at 10 Downing St., her office and \nresidence, and at Conservative Party headquarters. \n\n\nWishing her successor well, she declared in an emotional statement on the steps \nof 10

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   669
[[52, 5, 'ISLAM', 'Islam', 0.9984227366642731, 0.6216033697128296, 'PER'], [129, 7, 'ISLAMIC', 'Islamism', 0.9981323538066712, 0.9502958059310913, 'MISC'], [188, 7, 'ALGERIA', 'Algeria', 0.998492020849007, 0.9944047927856445, 'LOC'], [284, 8, "Algiers'", 'Algiers', 0.9985892189246018, 0.6017681956291199, 'LOC'], [332, 11, 'Middle East', 'Middle_East', 0.9992199567391175, 0.9849947392940521, 'LOC'], [405, 7, 'Algeria', 'Algeria', 0.9984905112234225, 0.9999411106109619, 'LOC'], [578, 6, 'France', 'France', 0.9986743101165702, 0.9996194839477539, 'LOC'], [640, 7, 'Islamic', 'Islam', 0.9988229217441214, 0.9920891523361206, 'MISC'], [749, 7, 'Iranian', 'Iran', 0.9984479336220434, 0.9931426048278809, 'MISC'], [775, 7, 'Islamic', 'Islam', 0.998784963995071, 0.9985660910606384, 'MISC'], [839, 5, 'Islam', 'Islam', 0.9986943944986811, 0.9958807229995728, 'MISC'], [973, 5, 'Islam', 'Islam', 0.9990217467256157, 0.9932049512863159, 'MISC'], [1095

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   670
{'text': "\n\n\nApril 24, 1989, Monday, Home Edition \n\n\n\n\nFOR WANT OF A VOTE . . . \n\n\n\n\nFifty-seven voters forced Julie Korenstein, The Times' choice for reelection to \nthe Los Angeles Board of Education, into a runoff in the general election in \nJune. Although nearly 70,000 voters marked ballots on April 11, a handful of \nvotes denied Korenstein the majority she needed for outright victory. The \nresults ought to persuade Californians everywhere that it matters a great deal \nwhether they go to the polls. \n\n\nNearly 305,000 voters were eligible to vote in the Korenstein election, but \nsomething less than 25% actually did so. Maybe the non-voters didn't care about \nthe quality of instruction in the public schools or how much the teachers will \nget paid, the current controversies before the board. \n\n\nUnfortunately, the low voter turnout shouldn't be all that surprising. It is \npart of a national trend. \n\n\nLast

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   671
{'text': '\n\n\nSeptember 18, 1990, Tuesday, Home Edition \n\n\n\n\nSALVATION ARMY FIGHTS MINIMUM WAGE ORDER \n\n\n\n\nThe Salvation Army gives its clients "soup, soap and salvation," but the \nfederal government says it isn\'t enough. \n\n\nThe Labor Department claims that people to whom the Salvation Army ministers -- \ngiving them room, board and up to about $20 a week for helping at the centers \n-- are employees who should be entitled to a minimum wage under the Fair Labor \nStandards Act. \n\n\nBut Army officials said the payments could doom the centers and have said the \ncompany will not comply with the demand. \n\n\n', 'spans': []}
[[100, 14, 'Salvation Army', 'The_Salvation_Army', 0.9983543775696914, 0.8827670812606812, 'ORG'], [218, 16, 'Labor Department', 'United_States_Department_of_Labor', 0.9984037701968616, 0.9048447608947754, 'ORG'], [266, 14, 'Salvation Army', 'The_Salvation_Army', 0.9983556109198416, 0.775283157825

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   673
{'text': '\n\n\nFebruary 7, 1989, Tuesday, Home Edition \n\n\n\n\nFESTIVE SOVIET CEREMONY MARKS AFGHAN WITHDRAWAL \n\n\n\n\nThe Soviet Union marked its military withdrawal from Afghanistan on Monday with \na festive if premature ceremony characterized more by a sense of relief than of \ntriumph or satisfaction. \n\n\nA battalion of 300 paratroopers who had left their base near Kabul three days \nearlier rolled onto Soviet territory on the north bank of the Amu Darya River \nat 11:37 a.m. local time, marking the beginning of the withdrawal\'s final \nphase. \n\n\nAs they crossed the Friendship Bridge linking the two countries here in Termez, \nthey passed a billboard bearing the message, "The Soviet Armed Forces Are a \nSchool of Hardening and Courage." \n\n\nBut for all the talk of the army having fulfilled its "internationalist duty" \nin nine years of propping up the revolution in Afghanistan, the highest-ranking \nofficial on hand

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   674
{'text': "\n930903\n\n\nFT  03 SEP 93 / Greenpeace takes on the Goliaths\n\n\nTHIS MORNING Greenpeace, the pressure group, will ask the Court of Appeal to\nhalt radioactive tests at the Thorp nuclear reprocessing plant in Cumbria\nuntil September 14, when the High Court will judge whether the tests are\nlegal.\nIn a separate action, Greenpeace has taken ICI, the chemicals group, to\ncourt for water pollution. Yesterday that case was adjourned until later in\nthe month for Greenpeace to assemble more laboratory evidence. The group\nregards the ICI action as a test case on water regulation and says that if\nit is successful more prosecutions of chemical companies may follow.\nThe two highly-publicised cases mark a new policy by one of Britain's\nlargest environmental pressure groups to bring legal actions itself, rather\nthan wait for government regulatory authorities to do so. Those watching\nthis drama of David and Goliath may wonder

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   675
{'text': '\n\n\nJuly 19, 1989, Wednesday, Orange County Edition \n\n\n\n\nGENE WOJCIECHOWSKI: THIS ONE-MAN TEAM IS NOT ALONE THIS TIME \n\n\n\n\nIt wasn\'t the finest two minutes and 11 seconds of Frank Wattles\' life, but it \nwill have to do for now. Look at it this way: at least there was someone else \nin the pool, which is a bit of a change for this loneliest of swimmers. \n\n\nFresh from a flu bout, limping noticeably on a sprained ankle, Wattles, a \nmember of the modest Mission Viejo Aquatics Club, climbed atop the starting \nplatform for the first event of the 1989 U.S. Olympic Festival -- the 200-meter \nindividual medley -- and proceeded to finish seventh in a race he considers his \nspecialty. As debuts go, it wasn\'t exactly what he had in mind. \n\n\nThen again, Wattles\' swim career hasn\'t gone exactly as planned. At 18, he \nfinds himself swimming against conventional wisdom, the result, in part, of a \nnasty departu

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   677
{'text': "\n920523\n\n\nFT  23 MAY 92 / Pisa's lean times may be over: After 800 years, tower's tilt\nmust be stopped\n\n\nTHE WORLD'S toughest corset is being fitted on a very special customer, the\nLeaning Tower of Pisa.\nThis week, workmen began to place 18 thin steel bands round the base of the\nwhite marble tower as an emergency measure, to contain the stress on the\n14,530 tonne structure. In its 800-year history as a leaning tower, this is\nthe first time such direct assistance has been necessary.\nThe emergency measures do not stop here. The group of 14 international\nexperts overseeing the fate of the tower this week agreed to a scheme to\nstabilise the monument's famous incline. 'We are going ahead with the\nplacement of 600 tons of lead slabs at the base of the tower,' says Prof\nMichele Jamiolkowski of Turin Polytechnic, head of the experts' commission.\nThe lead will be placed on the north side, opposite the tilt, will c

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   680
{'text': '\n\n\nDecember 10, 1989, Sunday, Home Edition \n\n\n\n\nSCHOOLS FAIL LATINOS IN ANY TONGUE; \n\n\nEDUCATION: THE \'CRISIS\' OF TOO FEW SPANISH-SPEAKING TEACHERS IN L. A. IS \nGROUNDED IN YEARS OF INSTITUTIONAL NEGLECT. \n\n\n\n\nIt\'s hard to be sympathetic with the Los Angeles Unified School District\'s \nexpressed frustrations in coping with 170,000 limited- and non-English-speaking \nstudents (90% of whom are Latino). I cannot help asking, whose fault is it? \n\n\nThe crisis in our schools did not come about by osmosis; it was the result of \ndemographic changes that happened openly, and it was predictable. \n\n\nAbout a decade ago, Mexico\'s economic slide and wars in Central America created \na flood of immigrants here. Add to this surges in Asian and Middle Eastern \nimmigration, and the result is overcrowded and chaotic inner-city schools \nstaffed by overworked people who cannot communicate with the students. \n\n\n

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   682
{'text': '\n\n\nAugust 26, 1989, Saturday, Orange County Edition \n\n\n\n\nGRADS PUT IN GOOD WORD FOR LESSONS IN ENGLISH \n\n\n\n\nSummer classes are finally over for these 25 boisterous pupils. \n\n\nAfter months of intense lessons, they are looking forward to a little fun. But \nnot necessarily at Splash Mountain or on the beach. \n\n\nThese students are more likely to prefer sipping some freshly brewed tea, \nstrolling through a park or just staying indoors and engaging in some \nold-fashioned conversation. \n\n\nOn Friday, the students -- 25 Vietnamese-American senior citizens -- graduated \nwith distinction from summer English classes sponsored by the Vietnamese \nCommunity of Orange County Inc. in Santa Ana. \n\n\nThe average age of the students was 70, and the oldest student was 83. \n\n\nThe program is one of only two in Orange County that specialize in teaching \nEnglish to elderly Southeast Asian refugees. The classes also 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   683
{'text': '\n\nLanguage:  English \nArticle Type:CSO \n\n [Commentary by Egon T. Lansky, a journalist and political \nanalyst who formerly served as Czechoslovak Foreign Ministry \nspokesman and ambassador to the Council of Europe: "After the \nTragedy: A Look at Year One in the Two Republics"] \n  [Text] Despite being one of the staunchest defenders of \nCzechoslovakia until the very last minutes of its existence, I \nmust admit that the majority of people in both successor states \nseem to have adapted to the new situation amazingly quickly. \n  Maybe that is always so with majorities. Life simply goes \non, \nand most people cannot afford to do anything but try to hang on \nthe best they can or else face the real existential dangers that \nseem to be lurking everywhere. \n  Passivity in the face of political decisions handed down \nfrom \nabove, such as the unpopular split of the Czechoslovak \nfederation, is, after all, what peopl

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   685
{'text': "\n940323\n\n\nFT  23 MAR 94 / World News in Brief: Spielberg film scoops the Oscar pool\n\n\nHolocaust drama Schindler's List swept the board at the Academy Awards in\nLos Angeles, winning seven Oscars for Steven Spielberg, including best\ndirector and best film. It was Spielberg's first Oscar success after a\nstring of box-office hits. British animator Nick Park (left) won the Oscar\nfor best animated short film for his 30-minute feature The Wrong Trousers.\nTom Hanks won the best actor award for his role in Philadelphia, the first\nmainstream film dealing with Aids, and Holly Hunter was named best actress\nfor her portrayal of a mute in The Piano.\n\n", 'spans': []}
[[47, 9, 'Spielberg', 'Steven_Spielberg', 0.9991836574641262, 0.9971561431884766, 'PER'], [86, 9, 'Holocaust', 'The_Holocaust', 0.9991025590884546, 0.7241600155830383, 'MISC'], [142, 14, 'Academy Awards', 'Academy_Award_for_Best_Actress', 0.9981641636945239, 0

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   686
{'text': "\n921118\n\n\nFT  18 NOV 92 / Argentina acts to stabilise the peso\n\n\nARGENTINA plans to take currency convertibility a step further in response\nto last week's speculation, which drove the peso below parity with the US\ndollar, by allowing banks and individuals to broaden hard currency holdings.\nThe central bank president, Mr Roque Fernandez, announced on Monday evening\nthat individuals could open dollar current accounts and banks could meet\ncentral bank reserve requirements in pesos or dollars. However, wages and\ntaxes would still be paid in pesos.\nThe hope is that this will strengthen confidence in the peso and encourage a\ndecline in interest rates - which reached 100 per cent a year, in dollar\nterms, on the interbank market on Monday. Yesterday, interest rates settled\ndown to 35-40 per cent, while share prices climbed 7 per cent and the peso\nrose above its maximum official rate of one peso to the dollar.\nThe

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   687
{'text': "\n940302\n\n\nFT  02 MAR 94 / A million miles from Beirut: Michael Cassell meets one of a\ngrowing contingent of overseas inward investors in Belfast\n\n\n'I thought I was heading for some place like Beirut,' insists Mr Tom\nJohnson, who three years ago moved his family from sleepy Shelbyville,\nIndiana, to the uncertain streets of Belfast. 'My preconceptions turned out\nto be a million miles away from reality.'\nMr Johnson, managing director of Ryobi Aluminium Casting, a Japanese-owned\nautomotive castings manufacturer, is one of the small but growing number of\nexpatriate businessmen sent to Northern Ireland to manage companies. He\nleaves shortly for another job in the US, where he will stoutly defend the\nreputation of a place he says is damaged by distorted reporting and too\noften viewed through a prism of ignorance.\nHe says: 'We've made good friends. My daughter has picked up the accent,\nthough I don't know how it 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   688
{'text': '\n\n\nJune 8, 1990, Friday, Home Edition \n\n\n\n\nN.Y. STRIFE CAPTURES MINORITY INTEREST \n\n\n\n\nBlacks and members of other minority groups paid more attention to articles \nabout racial tensions in New York City than any other news story in the last \nmonth, and most believe that the press has done a mediocre job at best of \ncovering the issue, according to a monthly survey by the Times Mirror Co. \n\n\nAmong nonwhites, 38% said that they were "very closely" following accounts of \nthe New York trial of two young white men convicted of charges related to the \nkilling of a black teen-ager in the Bensonhurst section of Brooklyn and of the \nboycott of Korean-owned stores in Brooklyn by some blacks. Only 13% of whites \nsaid that they were giving those stories "very close" attention. \n\n\nThe racial disparity in how much attention those stories received was a \ndeparture from the usual pattern that has been found by th

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   698
{'text': '\n\nBFN \n\n  [Text] Johannesburg May 3 SAPA-- \n\n\n------------------------------------------------------------------------------- \n|Socio-Economic Indicators on South Africa                                    | \n------------------------------------------------------------------------------- \n|area (square kilometres)              |1,223,201                             | \n------------------------------------------------------------------------------- \n|population (000s) (1993)              |40,715.7                              | \n------------------------------------------------------------------------------- \n|population growth (per cent)          |2.44                                  | \n------------------------------------------------------------------------------- \n|literacy rate (per cent)              |61.4                                  | \n----------------------------------------------------------------

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

------------------------- pair to pair: start:   700
{'text': '\n\n\nMarch 20, 1990, Tuesday, Home Edition \n\n\n\n\nBRIEFLY \n\n\n\n\nUnocal Chairman Backs Boost in Gas Tax: Richard Stegemeier said Congress should \nconsider hiking gasoline taxes as a way to pay for cleaning up the environment \nor upgrading transportation. "It\'s well known that gasoline taxes in the United \nStates are ridiculously low compared to the rest of the world," he said at the \nannual meeting of the American Institute of Chemical Engineers in Orlando, Fla. \nHe noted that U.S. motorists pay about $1 a gallon, inclusive of tax, compared \nto $3 in Europe and Asia. \n\n\n', 'spans': []}
[[59, 6, 'Unocal', 'Unocal_Corporation', 0.998694112566087, 0.9435524940490723, 'ORG'], [123, 8, 'Congress', 'United_States_Congress', 0.9988817120260197, 0.9995494484901428, 'ORG'], [293, 6, 'United', 'United_States', 0.9984841283860907, 0.9397889375686646, 'LOC'], [403, 40, 'American Institute of Chemical Engineers', 'Ameri

# Baseline result

In [None]:
# previous result is printed in section Evaluation, here, i directly copied it.
baseline = {'301': {'P_20': 0.85,
         'map': 0.036127672913110774,
         'ndcg': 0.09691289870696555,
         'recall_20': 0.03794642857142857},
 '302': {'P_20': 0.6,
         'map': 0.15722470340117398,
         'ndcg': 0.3137981387355305,
         'recall_20': 0.18461538461538463},
 '303': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '304': {'P_20': 0.1,
         'map': 0.005612244897959184,
         'ndcg': 0.03579785577438517,
         'recall_20': 0.01020408163265306},
 '305': {'P_20': 0.15,
         'map': 0.05224489795918367,
         'ndcg': 0.16956387242007173,
         'recall_20': 0.08571428571428572},
 '306': {'P_20': 0.2,
         'map': 0.006632412000944956,
         'ndcg': 0.03960350451342723,
         'recall_20': 0.012048192771084338},
 '307': {'P_20': 0.2,
         'map': 0.008609221466364324,
         'ndcg': 0.05111102291442516,
         'recall_20': 0.01904761904761905},
 '308': {'P_20': 0.1,
         'map': 0.2857142857142857,
         'ndcg': 0.490300956513237,
         'recall_20': 0.5},
 '309': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '310': {'P_20': 0.15,
         'map': 0.0641025641025641,
         'ndcg': 0.19553598071525832,
         'recall_20': 0.23076923076923078},
 '311': {'P_20': 0.5,
         'map': 0.04106122574167687,
         'ndcg': 0.12858485140210288,
         'recall_20': 0.054945054945054944},
 '312': {'P_20': 0.05,
         'map': 0.01515151515151515,
         'ndcg': 0.07386356006695578,
         'recall_20': 0.09090909090909091},
 '313': {'P_20': 0.8,
         'map': 0.1535290277225761,
         'ndcg': 0.29589226740614055,
         'recall_20': 0.17204301075268819},
 '314': {'P_20': 0.15,
         'map': 0.03264790764790765,
         'ndcg': 0.13165773496860286,
         'recall_20': 0.06818181818181818},
 '315': {'P_20': 0.05,
         'map': 0.0007855459544383346,
         'ndcg': 0.014663450130273634,
         'recall_20': 0.014925373134328358},
 '316': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '317': {'P_20': 0.2,
         'map': 0.11211407639979068,
         'ndcg': 0.2821282532486612,
         'recall_20': 0.2857142857142857},
 '318': {'P_20': 0.05,
         'map': 0.004424778761061947,
         'ndcg': 0.027595434250284354,
         'recall_20': 0.008849557522123894},
 '319': {'P_20': 0.3,
         'map': 0.009768432259781742,
         'ndcg': 0.05439267438636195,
         'recall_20': 0.03529411764705882},
 '320': {'P_20': 0.1,
         'map': 0.053418803418803416,
         'ndcg': 0.18726753773110685,
         'recall_20': 0.3333333333333333},
 '321': {'P_20': 0.9,
         'map': 0.08866995073891626,
         'ndcg': 0.18691635928899442,
         'recall_20': 0.08866995073891626},
 '322': {'P_20': 0.15,
         'map': 0.031862745098039214,
         'ndcg': 0.12067867486650025,
         'recall_20': 0.08823529411764706},
 '323': {'P_20': 0.15,
         'map': 0.03961748633879781,
         'ndcg': 0.13058640061417123,
         'recall_20': 0.04918032786885246},
 '324': {'P_20': 1.0,
         'map': 0.12422360248447205,
         'ndcg': 0.23767981925642803,
         'recall_20': 0.12422360248447205},
 '325': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '326': {'P_20': 0.35,
         'map': 0.12657004830917873,
         'ndcg': 0.28245331377336713,
         'recall_20': 0.15217391304347827},
 '327': {'P_20': 0.15,
         'map': 0.20661157024793386,
         'ndcg': 0.39603354998136564,
         'recall_20': 0.2727272727272727},
 '328': {'P_20': 0.05,
         'map': 0.125,
         'ndcg': 0.2529427027676571,
         'recall_20': 0.125},
 '329': {'P_20': 0.5,
         'map': 0.23417197648290083,
         'ndcg': 0.4184414428811389,
         'recall_20': 0.2857142857142857},
 '330': {'P_20': 0.7,
         'map': 0.20337771512113614,
         'ndcg': 0.37072400368592234,
         'recall_20': 0.23333333333333334},
 '331': {'P_20': 0.9,
         'map': 0.0832071404461628,
         'ndcg': 0.17966436277979364,
         'recall_20': 0.08450704225352113},
 '332': {'P_20': 0.15,
         'map': 0.002099737532808399,
         'ndcg': 0.021396023193251235,
         'recall_20': 0.011811023622047244},
 '333': {'P_20': 0.3,
         'map': 0.05827505827505828,
         'ndcg': 0.18260041690468962,
         'recall_20': 0.09230769230769231},
 '334': {'P_20': 0.05,
         'map': 0.012345679012345678,
         'ndcg': 0.07075576072108061,
         'recall_20': 0.1111111111111111},
 '335': {'P_20': 0.9,
         'map': 0.26025330201378605,
         'ndcg': 0.4166867191489228,
         'recall_20': 0.2727272727272727},
 '336': {'P_20': 0.1,
         'map': 0.06547619047619048,
         'ndcg': 0.18934070931207542,
         'recall_20': 0.16666666666666666},
 '337': {'P_20': 0.5,
         'map': 0.07818371890952536,
         'ndcg': 0.2063712479917783,
         'recall_20': 0.10752688172043011},
 '338': {'P_20': 0.1,
         'map': 0.225,
         'ndcg': 0.3973220069685352,
         'recall_20': 0.5},
 '339': {'P_20': 0.2,
         'map': 0.09357142857142857,
         'ndcg': 0.264575352348175,
         'recall_20': 0.4},
 '340': {'P_20': 0.55,
         'map': 0.14274159817638077,
         'ndcg': 0.2885728017534164,
         'recall_20': 0.15942028985507245},
 '341': {'P_20': 0.4,
         'map': 0.07223517223517223,
         'ndcg': 0.20125729653454202,
         'recall_20': 0.10256410256410256},
 '342': {'P_20': 0.05,
         'map': 0.008333333333333333,
         'ndcg': 0.05059568297461706,
         'recall_20': 0.05},
 '343': {'P_20': 0.3,
         'map': 0.017574135597391412,
         'ndcg': 0.07119748344186676,
         'recall_20': 0.023255813953488372},
 '344': {'P_20': 0.2,
         'map': 0.5366666666666666,
         'ndcg': 0.7395987090715346,
         'recall_20': 0.8},
 '345': {'P_20': 0.15,
         'map': 0.021929824561403508,
         'ndcg': 0.10186332246068573,
         'recall_20': 0.07894736842105263},
 '346': {'P_20': 0.2,
         'map': 0.009011815252416757,
         'ndcg': 0.057431089195282864,
         'recall_20': 0.0380952380952381},
 '347': {'P_20': 0.1,
         'map': 0.0033891547049441787,
         'ndcg': 0.027438655559359523,
         'recall_20': 0.013157894736842105},
 '348': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '349': {'P_20': 0.2,
         'map': 0.028881278538812786,
         'ndcg': 0.11703523404776589,
         'recall_20': 0.0547945205479452},
 '350': {'P_20': 0.25,
         'map': 0.0421403855659911,
         'ndcg': 0.1480296327885957,
         'recall_20': 0.07352941176470588},
 '351': {'P_20': 0.85,
         'map': 0.3391154049339034,
         'ndcg': 0.5008641070340024,
         'recall_20': 0.3541666666666667},
 '352': {'P_20': 0.25,
         'map': 0.009917408697896502,
         'ndcg': 0.05467516207230389,
         'recall_20': 0.02032520325203252},
 '353': {'P_20': 0.35,
         'map': 0.01978622470425749,
         'ndcg': 0.08441372675262517,
         'recall_20': 0.05737704918032787},
 '354': {'P_20': 0.1,
         'map': 0.004616805170821791,
         'ndcg': 0.027439966306286947,
         'recall_20': 0.00554016620498615},
 '355': {'P_20': 0.35,
         'map': 0.09658730158730158,
         'ndcg': 0.2467817945309062,
         'recall_20': 0.15555555555555556},
 '356': {'P_20': 0.1,
         'map': 0.06722689075630252,
         'ndcg': 0.19791922947023236,
         'recall_20': 0.11764705882352941},
 '357': {'P_20': 0.65,
         'map': 0.040877724309096856,
         'ndcg': 0.11737704847622943,
         'recall_20': 0.04814814814814815},
 '358': {'P_20': 0.15,
         'map': 0.017401960784313726,
         'ndcg': 0.08908674881026615,
         'recall_20': 0.058823529411764705},
 '359': {'P_20': 0.1,
         'map': 0.02806122448979592,
         'ndcg': 0.11012677421250541,
         'recall_20': 0.07142857142857142},
 '360': {'P_20': 0.75,
         'map': 0.09213103965246544,
         'ndcg': 0.2036705922307034,
         'recall_20': 0.09933774834437085},
 '361': {'P_20': 0.2,
         'map': 0.15947712418300652,
         'ndcg': 0.363535515068368,
         'recall_20': 0.4444444444444444},
 '362': {'P_20': 0.25,
         'map': 0.03062678062678063,
         'ndcg': 0.13128345349885726,
         'recall_20': 0.1282051282051282},
 '363': {'P_20': 0.25,
         'map': 0.10343329915698335,
         'ndcg': 0.28716287971177107,
         'recall_20': 0.3125},
 '364': {'P_20': 0.65,
         'map': 0.31779142426201246,
         'ndcg': 0.50692195703122,
         'recall_20': 0.37142857142857144},
 '365': {'P_20': 0.4,
         'map': 0.18420547706261992,
         'ndcg': 0.3655542427172996,
         'recall_20': 0.22857142857142856},
 '366': {'P_20': 0.25,
         'map': 0.033691617024950354,
         'ndcg': 0.12312899797860138,
         'recall_20': 0.050505050505050504},
 '367': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '368': {'P_20': 0.7,
         'map': 0.21385258906956114,
         'ndcg': 0.3727554586013727,
         'recall_20': 0.22950819672131148},
 '369': {'P_20': 0.3,
         'map': 0.37912087912087916,
         'ndcg': 0.580093920016424,
         'recall_20': 0.46153846153846156},
 '370': {'P_20': 0.5,
         'map': 0.014774303361695556,
         'ndcg': 0.061059457332334074,
         'recall_20': 0.02976190476190476},
 '371': {'P_20': 0.05,
         'map': 0.00326797385620915,
         'ndcg': 0.03709674194804795,
         'recall_20': 0.058823529411764705},
 '372': {'P_20': 0.9,
         'map': 0.36297185540883015,
         'ndcg': 0.515998108125258,
         'recall_20': 0.3673469387755102},
 '373': {'P_20': 0.05,
         'map': 0.030303030303030304,
         'ndcg': 0.10249700605553073,
         'recall_20': 0.030303030303030304},
 '374': {'P_20': 0.65,
         'map': 0.056782057971504346,
         'ndcg': 0.14698700125373076,
         'recall_20': 0.06372549019607843},
 '375': {'P_20': 0.5,
         'map': 0.08171841982406997,
         'ndcg': 0.22043726310188536,
         'recall_20': 0.125},
 '376': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '377': {'P_20': 0.25,
         'map': 0.06558185404339249,
         'ndcg': 0.20883383287856186,
         'recall_20': 0.1282051282051282},
 '378': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '379': {'P_20': 0.15,
         'map': 0.13671875,
         'ndcg': 0.30717011859114,
         'recall_20': 0.1875},
 '380': {'P_20': 0.15,
         'map': 0.34523809523809523,
         'ndcg': 0.5306972923267992,
         'recall_20': 0.42857142857142855},
 '381': {'P_20': 0.25,
         'map': 0.06381609661408423,
         'ndcg': 0.22284881168941081,
         'recall_20': 0.17857142857142858},
 '382': {'P_20': 0.6,
         'map': 0.49901673950717007,
         'ndcg': 0.6636644719517895,
         'recall_20': 0.5454545454545454},
 '383': {'P_20': 0.55,
         'map': 0.062330990926881324,
         'ndcg': 0.16396960264281557,
         'recall_20': 0.07534246575342465},
 '384': {'P_20': 0.25,
         'map': 0.024660633484162895,
         'ndcg': 0.11323196347136413,
         'recall_20': 0.09803921568627451},
 '385': {'P_20': 0.65,
         'map': 0.12000639579298951,
         'ndcg': 0.2648715100763174,
         'recall_20': 0.1511627906976744},
 '386': {'P_20': 0.2,
         'map': 0.11228070175438597,
         'ndcg': 0.2901899672570435,
         'recall_20': 0.21052631578947367},
 '387': {'P_20': 0.1,
         'map': 0.004117647058823529,
         'ndcg': 0.03530117695159417,
         'recall_20': 0.023529411764705882},
 '388': {'P_20': 0.1,
         'map': 0.02287581699346405,
         'ndcg': 0.08650777217424489,
         'recall_20': 0.0392156862745098},
 '389': {'P_20': 0.1,
         'map': 0.001823949246629659,
         'ndcg': 0.01908504987461707,
         'recall_20': 0.010309278350515464},
 '390': {'P_20': 0.2,
         'map': 0.009328358208955223,
         'ndcg': 0.056536612235097365,
         'recall_20': 0.029850746268656716},
 '391': {'P_20': 0.6,
         'map': 0.044515609070606116,
         'ndcg': 0.1271738677978696,
         'recall_20': 0.06741573033707865},
 '392': {'P_20': 0.55,
         'map': 0.06752878929349516,
         'ndcg': 0.1924349178443162,
         'recall_20': 0.10476190476190476},
 '393': {'P_20': 0.5,
         'map': 0.08397685421406698,
         'ndcg': 0.23382897401328084,
         'recall_20': 0.14084507042253522},
 '394': {'P_20': 0.1,
         'map': 0.07563025210084033,
         'ndcg': 0.2101123610403062,
         'recall_20': 0.11764705882352941},
 '395': {'P_20': 0.15,
         'map': 0.0037146857754715426,
         'ndcg': 0.030468889845020695,
         'recall_20': 0.014084507042253521},
 '396': {'P_20': 0.65,
         'map': 0.2156577885391445,
         'ndcg': 0.3687999188833989,
         'recall_20': 0.22033898305084745},
 '397': {'P_20': 0.5,
         'map': 0.259440813362382,
         'ndcg': 0.4319338251109782,
         'recall_20': 0.37037037037037035},
 '398': {'P_20': 0.15,
         'map': 0.003847549909255898,
         'ndcg': 0.033088648668383916,
         'recall_20': 0.020689655172413793},
 '399': {'P_20': 0.2,
         'map': 0.01906318082788671,
         'ndcg': 0.08093208806238239,
         'recall_20': 0.0392156862745098},
 '400': {'P_20': 0.75,
         'map': 0.09865119325119326,
         'ndcg': 0.21899795942346403,
         'recall_20': 0.12},
 '401': {'P_20': 0.8,
         'map': 0.04958112531603243,
         'ndcg': 0.1265938269419964,
         'recall_20': 0.05333333333333334},
 '402': {'P_20': 0.55,
         'map': 0.10819329975579976,
         'ndcg': 0.25125092913317654,
         'recall_20': 0.1375},
 '403': {'P_20': 0.7,
         'map': 0.6038909503195218,
         'ndcg': 0.7498445063529312,
         'recall_20': 0.6666666666666666},
 '404': {'P_20': 0.15,
         'map': 0.003816058745636211,
         'ndcg': 0.0315804111279314,
         'recall_20': 0.02112676056338028},
 '405': {'P_20': 0.4,
         'map': 0.10377853819274707,
         'ndcg': 0.2617051153445634,
         'recall_20': 0.21052631578947367},
 '406': {'P_20': 0.15,
         'map': 0.10256410256410256,
         'ndcg': 0.25513336540843,
         'recall_20': 0.23076923076923078},
 '407': {'P_20': 0.1,
         'map': 0.003232062055591467,
         'ndcg': 0.03252895151481945,
         'recall_20': 0.029411764705882353},
 '408': {'P_20': 0.1,
         'map': 0.004943502824858757,
         'ndcg': 0.03456628763697352,
         'recall_20': 0.01694915254237288},
 '409': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '410': {'P_20': 0.3,
         'map': 0.041721611721611714,
         'ndcg': 0.14570991059143737,
         'recall_20': 0.09230769230769231},
 '411': {'P_20': 0.3,
         'map': 0.1228615520282187,
         'ndcg': 0.3077069517198418,
         'recall_20': 0.2222222222222222},
 '412': {'P_20': 0.4,
         'map': 0.0458076701979141,
         'ndcg': 0.14528682254746758,
         'recall_20': 0.06504065040650407},
 '413': {'P_20': 0.1,
         'map': 0.002740226525392766,
         'ndcg': 0.03078152672420554,
         'recall_20': 0.028985507246376812},
 '414': {'P_20': 0.2,
         'map': 0.048005698005698004,
         'ndcg': 0.17355409721001513,
         'recall_20': 0.10256410256410256},
 '415': {'P_20': 0.45,
         'map': 0.0625,
         'ndcg': 0.16011711667459383,
         'recall_20': 0.0661764705882353},
 '416': {'P_20': 0.35,
         'map': 0.08702831559974418,
         'ndcg': 0.22646574872262898,
         'recall_20': 0.16666666666666666},
 '417': {'P_20': 0.5,
         'map': 0.1049114774114774,
         'ndcg': 0.24782484938198593,
         'recall_20': 0.13333333333333333},
 '418': {'P_20': 0.25,
         'map': 0.03635057471264368,
         'ndcg': 0.11965237701037502,
         'recall_20': 0.04310344827586207},
 '419': {'P_20': 0.3,
         'map': 0.3364583333333333,
         'ndcg': 0.5223477542185565,
         'recall_20': 0.375},
 '420': {'P_20': 0.7,
         'map': 0.33002253737547854,
         'ndcg': 0.5311456245302797,
         'recall_20': 0.42424242424242425},
 '421': {'P_20': 0.05,
         'map': 0.001095290251916758,
         'ndcg': 0.015211366269120943,
         'recall_20': 0.012048192771084338},
 '422': {'P_20': 0.95,
         'map': 0.12272251756196474,
         'ndcg': 0.23898073169021466,
         'recall_20': 0.125},
 '423': {'P_20': 0.05,
         'map': 0.023809523809523808,
         'ndcg': 0.08685094548235438,
         'recall_20': 0.047619047619047616},
 '424': {'P_20': 0.1,
         'map': 0.001485194467650608,
         'ndcg': 0.018361683989584644,
         'recall_20': 0.011695906432748537},
 '425': {'P_20': 0.95,
         'map': 0.11514705351492989,
         'ndcg': 0.22799127158894042,
         'recall_20': 0.11728395061728394},
 '426': {'P_20': 0.75,
         'map': 0.06802328839695114,
         'ndcg': 0.1635716226731666,
         'recall_20': 0.07425742574257425},
 '427': {'P_20': 0.3,
         'map': 0.09881578947368422,
         'ndcg': 0.24100742924686824,
         'recall_20': 0.12},
 '428': {'P_20': 0.35,
         'map': 0.04397001303780964,
         'ndcg': 0.13672184690996123,
         'recall_20': 0.059322033898305086},
 '429': {'P_20': 0.3,
         'map': 0.3389277389277389,
         'ndcg': 0.5710794693994037,
         'recall_20': 0.5454545454545454},
 '430': {'P_20': 0.25,
         'map': 0.6593406593406593,
         'ndcg': 0.8251703408000219,
         'recall_20': 0.8333333333333334},
 '431': {'P_20': 0.95,
         'map': 0.1373250795604332,
         'ndcg': 0.26405845385463034,
         'recall_20': 0.14615384615384616},
 '432': {'P_20': 0.05,
         'map': 0.0021008403361344537,
         'ndcg': 0.02738855579742861,
         'recall_20': 0.03571428571428571},
 '433': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '434': {'P_20': 0.95,
         'map': 0.050967128749826084,
         'ndcg': 0.12550631457661224,
         'recall_20': 0.05475504322766571},
 '435': {'P_20': 0.25,
         'map': 0.013646609800455956,
         'ndcg': 0.07245145324830304,
         'recall_20': 0.042735042735042736},
 '436': {'P_20': 0.8,
         'map': 0.07323571719340555,
         'ndcg': 0.1788416879346157,
         'recall_20': 0.08888888888888889},
 '437': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '438': {'P_20': 0.35,
         'map': 0.01213854723513987,
         'ndcg': 0.06345859484708757,
         'recall_20': 0.04046242774566474},
 '439': {'P_20': 0.05,
         'map': 0.00022831050228310504,
         'ndcg': 0.006107187382200578,
         'recall_20': 0.0045662100456621},
 '440': {'P_20': 0.35,
         'map': 0.10149911816578483,
         'ndcg': 0.2462218678130332,
         'recall_20': 0.12962962962962962},
 '441': {'P_20': 0.35,
         'map': 0.3696078431372549,
         'ndcg': 0.5550658849022543,
         'recall_20': 0.4117647058823529},
 '442': {'P_20': 0.25,
         'map': 0.01619881995351332,
         'ndcg': 0.07818298952630386,
         'recall_20': 0.05319148936170213},
 '443': {'P_20': 0.35,
         'map': 0.04973757179639532,
         'ndcg': 0.15397828515627912,
         'recall_20': 0.06862745098039216},
 '444': {'P_20': 0.15,
         'map': 0.10364145658263305,
         'ndcg': 0.26624501086740066,
         'recall_20': 0.17647058823529413},
 '445': {'P_20': 0.6,
         'map': 0.11403195700733901,
         'ndcg': 0.253573722927346,
         'recall_20': 0.1935483870967742},
 '446': {'P_20': 0.4,
         'map': 0.030102202324424544,
         'ndcg': 0.11045474180258966,
         'recall_20': 0.04938271604938271},
 '447': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '448': {'P_20': 0.1,
         'map': 0.013457556935817806,
         'ndcg': 0.06837830348656608,
         'recall_20': 0.043478260869565216},
 '449': {'P_20': 0.1,
         'map': 0.006467661691542289,
         'ndcg': 0.046115648735190924,
         'recall_20': 0.029850746268656716},
 '450': {'P_20': 0.35,
         'map': 0.00947843870198819,
         'ndcg': 0.047644858929617624,
         'recall_20': 0.023890784982935155},
 '601': {'P_20': 0.1, 'map': 0.3, 'ndcg': 0.3124164813082815, 'recall_20': 0.4},
 '602': {'P_20': 0.35,
         'map': 0.03228940024052806,
         'ndcg': 0.1265501180082825,
         'recall_20': 0.08333333333333333},
 '603': {'P_20': 0.5,
         'map': 0.4691726370035193,
         'ndcg': 0.6352306396590041,
         'recall_20': 0.625},
 '604': {'P_20': 0.25,
         'map': 0.625,
         'ndcg': 0.8543876388123582,
         'recall_20': 0.625},
 '605': {'P_20': 0.4,
         'map': 0.0655232884399551,
         'ndcg': 0.18103194390176758,
         'recall_20': 0.1111111111111111},
 '606': {'P_20': 0.05,
         'map': 0.07142857142857142,
         'ndcg': 0.2447095535131568,
         'recall_20': 0.07142857142857142},
 '607': {'P_20': 0.35,
         'map': 0.37222866260299414,
         'ndcg': 0.6127817729887999,
         'recall_20': 0.6363636363636364},
 '608': {'P_20': 0.15,
         'map': 0.03796296296296296,
         'ndcg': 0.10607547408421127,
         'recall_20': 0.1111111111111111},
 '609': {'P_20': 0.3, 'map': 0.2, 'ndcg': 0.3560049480895638, 'recall_20': 0.2},
 '610': {'P_20': 0.1,
         'map': 0.10555555555555556,
         'ndcg': 0.26657146955363803,
         'recall_20': 0.3333333333333333},
 '611': {'P_20': 0.65,
         'map': 0.2034014604842936,
         'ndcg': 0.42625679872337274,
         'recall_20': 0.2653061224489796},
 '612': {'P_20': 0.35,
         'map': 0.20339635854341734,
         'ndcg': 0.37686922609919227,
         'recall_20': 0.4117647058823529},
 '613': {'P_20': 0.55,
         'map': 0.16515806714492567,
         'ndcg': 0.3541882555256686,
         'recall_20': 0.23404255319148937},
 '614': {'P_20': 0.05,
         'map': 0.005555555555555555,
         'ndcg': 0.030384841286538733,
         'recall_20': 0.03333333333333333},
 '615': {'P_20': 0.05,
         'map': 0.004629629629629629,
         'ndcg': 0.03501196618119346,
         'recall_20': 0.08333333333333333},
 '616': {'P_20': 0.25,
         'map': 0.049507916131792895,
         'ndcg': 0.16901055765084075,
         'recall_20': 0.12195121951219512},
 '617': {'P_20': 0.55,
         'map': 0.0863597716504746,
         'ndcg': 0.1993269662930805,
         'recall_20': 0.16176470588235295},
 '618': {'P_20': 0.15,
         'map': 0.03318903318903319,
         'ndcg': 0.12207536418927363,
         'recall_20': 0.1111111111111111},
 '619': {'P_20': 0.65,
         'map': 0.4653303046160189,
         'ndcg': 0.6690778828257522,
         'recall_20': 0.6190476190476191},
 '620': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '621': {'P_20': 0.8,
         'map': 0.29356276341342813,
         'ndcg': 0.5482740801100179,
         'recall_20': 0.3137254901960784},
 '622': {'P_20': 0.2,
         'map': 0.01504534016996629,
         'ndcg': 0.10057923048229654,
         'recall_20': 0.06779661016949153},
 '623': {'P_20': 0.2,
         'map': 0.02719298245614035,
         'ndcg': 0.09398932479819548,
         'recall_20': 0.10526315789473684},
 '624': {'P_20': 0.25,
         'map': 0.13055555555555556,
         'ndcg': 0.2521117197490096,
         'recall_20': 0.2777777777777778},
 '625': {'P_20': 0.6,
         'map': 0.41580428467683367,
         'ndcg': 0.5703236279608787,
         'recall_20': 0.4444444444444444},
 '626': {'P_20': 0.3,
         'map': 0.4523809523809524,
         'ndcg': 0.665072116495427,
         'recall_20': 0.5},
 '627': {'P_20': 0.05,
         'map': 0.003968253968253968,
         'ndcg': 0.034380101046999886,
         'recall_20': 0.03571428571428571},
 '628': {'P_20': 0.3,
         'map': 0.15668498168498168,
         'ndcg': 0.31119934614423467,
         'recall_20': 0.23076923076923078},
 '629': {'P_20': 0.2,
         'map': 0.1425438596491228,
         'ndcg': 0.33934722080228114,
         'recall_20': 0.21052631578947367},
 '630': {'P_20': 0.1,
         'map': 0.29166666666666663,
         'ndcg': 0.5394967322782511,
         'recall_20': 0.5},
 '631': {'P_20': 0.45,
         'map': 0.04776116014674368,
         'ndcg': 0.19611383740823177,
         'recall_20': 0.0782608695652174},
 '632': {'P_20': 0.95,
         'map': 0.26172386053507146,
         'ndcg': 0.3930780604469424,
         'recall_20': 0.2676056338028169},
 '633': {'P_20': 0.5,
         'map': 0.16230923331987163,
         'ndcg': 0.2999137435024016,
         'recall_20': 0.2127659574468085},
 '634': {'P_20': 0.45,
         'map': 0.6116314241314241,
         'ndcg': 0.8079068364235671,
         'recall_20': 0.8181818181818182},
 '635': {'P_20': 0.75,
         'map': 0.711536146398457,
         'ndcg': 0.8381316003888836,
         'recall_20': 0.7894736842105263},
 '636': {'P_20': 0.35,
         'map': 0.27587086116497883,
         'ndcg': 0.47332565472899274,
         'recall_20': 0.5},
 '637': {'P_20': 0.3,
         'map': 0.15454545454545454,
         'ndcg': 0.3714109032107298,
         'recall_20': 0.2727272727272727},
 '638': {'P_20': 0.1,
         'map': 0.02,
         'ndcg': 0.11531520973080929,
         'recall_20': 0.05714285714285714},
 '639': {'P_20': 0.4,
         'map': 0.17813176406926404,
         'ndcg': 0.37036243845397654,
         'recall_20': 0.25},
 '640': {'P_20': 0.65,
         'map': 0.2578638415847718,
         'ndcg': 0.4309130873603626,
         'recall_20': 0.3023255813953488},
 '641': {'P_20': 0.3,
         'map': 0.07961754202355706,
         'ndcg': 0.22216808892796924,
         'recall_20': 0.12244897959183673},
 '642': {'P_20': 0.1,
         'map': 0.004451566951566952,
         'ndcg': 0.03565582318219758,
         'recall_20': 0.05128205128205128},
 '643': {'P_20': 0.4,
         'map': 0.17272167487684728,
         'ndcg': 0.35646248486591087,
         'recall_20': 0.27586206896551724},
 '644': {'P_20': 0.4,
         'map': 0.04703320123902831,
         'ndcg': 0.14708561954129312,
         'recall_20': 0.14285714285714285},
 '645': {'P_20': 0.7,
         'map': 0.29500650352577334,
         'ndcg': 0.4777291779769327,
         'recall_20': 0.4827586206896552},
 '646': {'P_20': 0.2,
         'map': 0.34444444444444444,
         'ndcg': 0.6845610276029979,
         'recall_20': 0.4},
 '647': {'P_20': 0.5,
         'map': 0.23186026936026935,
         'ndcg': 0.4587440747836439,
         'recall_20': 0.30303030303030304},
 '648': {'P_20': 0.35,
         'map': 0.05744568348446465,
         'ndcg': 0.17839249958443526,
         'recall_20': 0.12280701754385964},
 '649': {'P_20': 0.75,
         'map': 0.36647429147429145,
         'ndcg': 0.540270172537978,
         'recall_20': 0.40540540540540543},
 '650': {'P_20': 0.05,
         'map': 0.0017301038062283738,
         'ndcg': 0.019848293450906808,
         'recall_20': 0.029411764705882353},
 '651': {'P_20': 0.2,
         'map': 0.03837719298245614,
         'ndcg': 0.17371909822192488,
         'recall_20': 0.10526315789473684},
 '652': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '653': {'P_20': 0.9,
         'map': 0.21514973428131323,
         'ndcg': 0.3831518013672633,
         'recall_20': 0.24},
 '654': {'P_20': 0.2,
         'map': 0.02356853672643146,
         'ndcg': 0.10611452401869832,
         'recall_20': 0.07017543859649122},
 '655': {'P_20': 0.2,
         'map': 0.4336734693877551,
         'ndcg': 0.6458795850910839,
         'recall_20': 0.5714285714285714},
 '656': {'P_20': 0.65,
         'map': 0.11427216690374585,
         'ndcg': 0.2506981337148574,
         'recall_20': 0.11711711711711711},
 '657': {'P_20': 0.3,
         'map': 0.24131944444444442,
         'ndcg': 0.5480532049342898,
         'recall_20': 0.375},
 '658': {'P_20': 0.3,
         'map': 0.061747506019951846,
         'ndcg': 0.25979252075085874,
         'recall_20': 0.10526315789473684},
 '659': {'P_20': 0.25,
         'map': 0.26644736842105265,
         'ndcg': 0.5977852547459992,
         'recall_20': 0.3125},
 '660': {'P_20': 0.6,
         'map': 0.3884277428920287,
         'ndcg': 0.6373866805279771,
         'recall_20': 0.42857142857142855},
 '661': {'P_20': 0.6,
         'map': 0.3028020418693793,
         'ndcg': 0.5989494698414626,
         'recall_20': 0.35294117647058826},
 '662': {'P_20': 0.5,
         'map': 0.1677878365184867,
         'ndcg': 0.3637238865303859,
         'recall_20': 0.29411764705882354},
 '663': {'P_20': 0.6,
         'map': 0.3362680623938715,
         'ndcg': 0.5121133118166936,
         'recall_20': 0.5454545454545454},
 '664': {'P_20': 0.35,
         'map': 0.6666666666666666,
         'ndcg': 0.8778174563276556,
         'recall_20': 0.7777777777777778},
 '665': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '666': {'P_20': 0.05,
         'map': 0.037037037037037035,
         'ndcg': 0.14126697285982898,
         'recall_20': 0.3333333333333333},
 '667': {'P_20': 0.2,
         'map': 0.05684224909475985,
         'ndcg': 0.1838468639837311,
         'recall_20': 0.0975609756097561},
 '668': {'P_20': 0.1,
         'map': 0.03571428571428571,
         'ndcg': 0.18660818666147996,
         'recall_20': 0.05555555555555555},
 '669': {'P_20': 0.05,
         'map': 0.003105590062111801,
         'ndcg': 0.026026248043798075,
         'recall_20': 0.043478260869565216},
 '670': {'P_20': 0.2,
         'map': 0.13425925925925927,
         'ndcg': 0.3213737412291047,
         'recall_20': 0.16666666666666666},
 '671': {'P_20': 0.4,
         'map': 0.07296485260770975,
         'ndcg': 0.1905493874646097,
         'recall_20': 0.11428571428571428},
 '673': {'P_20': 0.3,
         'map': 0.19925002533698186,
         'ndcg': 0.5066834840963695,
         'recall_20': 0.2608695652173913},
 '674': {'P_20': 0.2,
         'map': 0.07430555555555554,
         'ndcg': 0.209182658908149,
         'recall_20': 0.2},
 '675': {'P_20': 0.35,
         'map': 0.1641970121381886,
         'ndcg': 0.36649196968462744,
         'recall_20': 0.28},
 '676': {'P_20': 0.4,
         'map': 0.08880504582785495,
         'ndcg': 0.23233252078869177,
         'recall_20': 0.16326530612244897},
 '677': {'P_20': 0.35,
         'map': 0.7372134038800705,
         'ndcg': 0.8721718973368122,
         'recall_20': 0.7777777777777778},
 '678': {'P_20': 0.25,
         'map': 0.14077958053027304,
         'ndcg': 0.3107855071273682,
         'recall_20': 0.2631578947368421},
 '679': {'P_20': 0.3,
         'map': 0.9484126984126983,
         'ndcg': 0.9838048780283076,
         'recall_20': 1.0},
 '680': {'P_20': 0.35,
         'map': 0.16569809941520466,
         'ndcg': 0.3366099289329639,
         'recall_20': 0.2916666666666667},
 '681': {'P_20': 0.75,
         'map': 0.23575123242770302,
         'ndcg': 0.3613426968974881,
         'recall_20': 0.3},
 '682': {'P_20': 0.15,
         'map': 0.06341463414634146,
         'ndcg': 0.1418482216904787,
         'recall_20': 0.07317073170731707},
 '683': {'P_20': 0.7,
         'map': 0.11037582616868136,
         'ndcg': 0.26551597813928157,
         'recall_20': 0.13592233009708737},
 '684': {'P_20': 0.2,
         'map': 0.025203252032520322,
         'ndcg': 0.10662529986732404,
         'recall_20': 0.0975609756097561},
 '685': {'P_20': 0.15,
         'map': 0.03310128785241907,
         'ndcg': 0.13315357555211155,
         'recall_20': 0.23076923076923078},
 '686': {'P_20': 0.45,
         'map': 0.16873278792178756,
         'ndcg': 0.42161369442361535,
         'recall_20': 0.28125},
 '687': {'P_20': 0.65,
         'map': 0.1117321938822953,
         'ndcg': 0.2618296539556969,
         'recall_20': 0.14942528735632185},
 '688': {'P_20': 0.1,
         'map': 0.0017319636884854277,
         'ndcg': 0.022667531580386643,
         'recall_20': 0.012422360248447204},
 '689': {'P_20': 0.05,
         'map': 0.00980392156862745,
         'ndcg': 0.03044193392795993,
         'recall_20': 0.058823529411764705},
 '690': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '691': {'P_20': 0.2,
         'map': 0.10793650793650793,
         'ndcg': 0.2523565775059176,
         'recall_20': 0.19047619047619047},
 '692': {'P_20': 0.35,
         'map': 0.13350790458633596,
         'ndcg': 0.4534518632509047,
         'recall_20': 0.25925925925925924},
 '693': {'P_20': 0.3,
         'map': 0.16704014939309053,
         'ndcg': 0.27120548898491903,
         'recall_20': 0.2222222222222222},
 '694': {'P_20': 0.3,
         'map': 0.023606203845174433,
         'ndcg': 0.10631445551715113,
         'recall_20': 0.06818181818181818},
 '695': {'P_20': 0.15,
         'map': 0.025842696629213482,
         'ndcg': 0.08795405072779656,
         'recall_20': 0.033707865168539325},
 '696': {'P_20': 0.45,
         'map': 0.18463405040979197,
         'ndcg': 0.3911054043177229,
         'recall_20': 0.2903225806451613},
 '697': {'P_20': 0.25,
         'map': 0.02778134284016637,
         'ndcg': 0.15654225447607237,
         'recall_20': 0.1},
 '698': {'P_20': 0.15,
         'map': 0.19285714285714287,
         'ndcg': 0.3785761654096044,
         'recall_20': 0.3},
 '699': {'P_20': 0.75,
         'map': 0.1939385456353363,
         'ndcg': 0.34907197617505237,
         'recall_20': 0.2112676056338028},
 '700': {'P_20': 0.4,
         'map': 0.04886077372919478,
         'ndcg': 0.1650861889994992,
         'recall_20': 0.06666666666666667}}

# Our approach results


In [None]:
# previous json files are stored in the drive, here, i directly copied it.

In [None]:
dict_1_2_3 = {"301": {"map": 0.036821968370681614, "P_20": 0.85, "recall_20": 0.03794642857142857, "ndcg": 0.09740414423137281}, "302": {"map": 0.11148638955974036, "P_20": 0.6, "recall_20": 0.18461538461538463, "ndcg": 0.2543233058997546}, "303": {"map": 0.0, "P_20": 0.0, "recall_20": 0.0, "ndcg": 0.0}}

In [None]:
dict_5_6_7 = {"305": {"map": 0.02857142857142857, "P_20": 0.15, "recall_20": 0.08571428571428572, "ndcg": 0.12028083009827595}, "306": {"map": 0.002871333820129001, "P_20": 0.2, "recall_20": 0.012048192771084338, "ndcg": 0.0227538914261022}, "307": {"map": 0.011478696741854637, "P_20": 0.2, "recall_20": 0.01904761904761905, "ndcg": 0.058482735788172646}}

In [None]:
dict_10_to_19 = {"310": {"map": 0.034690799396681744, "P_20": 0.15, "recall_20": 0.23076923076923078, "ndcg": 0.15525079801898536}, "311": {"map": 0.025712908885985805, "P_20": 0.5, "recall_20": 0.054945054945054944, "ndcg": 0.09094507352686222}, "312": {"map": 0.008264462809917356, "P_20": 0.05, "recall_20": 0.09090909090909091, "ndcg": 0.057841952007295125}, "313": {"map": 0.1386863479592931, "P_20": 0.8, "recall_20": 0.17204301075268819, "ndcg": 0.28587071087029675}, "314": {"map": 0.017477405635300373, "P_20": 0.15, "recall_20": 0.06818181818181818, "ndcg": 0.09282211306064267}, "315": {"map": 0.0037313432835820895, "P_20": 0.05, "recall_20": 0.014925373134328358, "ndcg": 0.02729385859344783}, "316": {"map": 0.0, "P_20": 0.0, "recall_20": 0.0, "ndcg": 0.0}, "317": {"map": 0.1619047619047619, "P_20": 0.2, "recall_20": 0.2857142857142857, "ndcg": 0.32161012053185706}, "318": {"map": 0.004424778761061947, "P_20": 0.05, "recall_20": 0.008849557522123894, "ndcg": 0.027595434250284354}, "319": {"map": 0.008175283330765028, "P_20": 0.3, "recall_20": 0.03529411764705882, "ndcg": 0.05307558104216706}}

In [None]:
dict_21_22_23 = {"321": {"map": 0.07128663720383895, "P_20": 0.9, "recall_20": 0.08866995073891626, "ndcg": 0.1678339389403317}, "322": {"map": 0.020022624434389142, "P_20": 0.15, "recall_20": 0.08823529411764706, "ndcg": 0.09696855577943257}, "323": {"map": 0.016295862607338018, "P_20": 0.15, "recall_20": 0.04918032786885246, "ndcg": 0.07770199656909488}}

In [None]:
dict_26_56 = {"326": {"map": 0.04951036459987867, "P_20": 0.35, "recall_20": 0.15217391304347827, "ndcg": 0.16276757531660327}, "331": {"map": 0.07861162148106728, "P_20": 0.9, "recall_20": 0.08450704225352113, "ndcg": 0.1769636072324848}, "338": {"map": 0.0402046783625731, "P_20": 0.1, "recall_20": 0.5, "ndcg": 0.18222438178904452}, "341": {"map": 0.059044733502937846, "P_20": 0.4, "recall_20": 0.10256410256410256, "ndcg": 0.1868591139150484}, "348": {"map": 0.0, "P_20": 0.0, "recall_20": 0.0, "ndcg": 0.0}, "351": {"map": 0.3025304910052395, "P_20": 0.85, "recall_20": 0.3541666666666667, "ndcg": 0.4832221485097297}, "353": {"map": 0.047293780900338284, "P_20": 0.35, "recall_20": 0.05737704918032787, "ndcg": 0.14197451506158573}, "356": {"map": 0.03137254901960784, "P_20": 0.1, "recall_20": 0.11764705882352941, "ndcg": 0.11709456735443448}}

In [None]:
dict_74_14 = {"374": {"map": 0.03851695993154743, "P_20": 0.65, "recall_20": 0.06372549019607843, "ndcg": 0.1164107204977484}, "398": {"map": 0.013103448275862068, "P_20": 0.15, "recall_20": 0.020689655172413793, "ndcg": 0.0635685702405867}, "400": {"map": 0.09755302605890843, "P_20": 0.75, "recall_20": 0.12, "ndcg": 0.20594521597898743}, "401": {"map": 0.047615071489684496, "P_20": 0.8, "recall_20": 0.05333333333333334, "ndcg": 0.1252478219070122}, "404": {"map": 0.003471333048797838, "P_20": 0.15, "recall_20": 0.02112676056338028, "ndcg": 0.030737097813716467}, "406": {"map": 0.07948717948717948, "P_20": 0.15, "recall_20": 0.23076923076923078, "ndcg": 0.231007815839662}, "409": {"map": 0.0, "P_20": 0.0, "recall_20": 0.0, "ndcg": 0.0}, "410": {"map": 0.060948043184885295, "P_20": 0.3, "recall_20": 0.09230769230769231, "ndcg": 0.1862248159004148}, "414": {"map": 0.03638583638583638, "P_20": 0.2, "recall_20": 0.10256410256410256, "ndcg": 0.13972055528237556}}

In [None]:
dict_423_652={"423": {"map": 0.047619047619047616, "P_20": 0.05, "recall_20": 0.047619047619047616, "ndcg": 0.13765549174170919}, "433": {"map": 0.0, "P_20": 0.0, "recall_20": 0.0, "ndcg": 0.0}, "434": {"map": 0.052539563679609706, "P_20": 0.95, "recall_20": 0.05475504322766571, "ndcg": 0.12712522815053665}, "443": {"map": 0.0379781814686659, "P_20": 0.35, "recall_20": 0.06862745098039216, "ndcg": 0.13971574859258265}, "447": {"map": 0.0, "P_20": 0.0, "recall_20": 0.0, "ndcg": 0.0}, "450": {"map": 0.010511445323732013, "P_20": 0.35, "recall_20": 0.023890784982935155, "ndcg": 0.052847384694494154}, "601": {"map": 0.2333333333333333, "P_20": 0.1, "recall_20": 0.4, "ndcg": 0.24696084675730837}, "605": {"map": 0.055210761460761454, "P_20": 0.4, "recall_20": 0.1111111111111111, "ndcg": 0.18082567649709633}, "612": {"map": 0.18357637172516064, "P_20": 0.35, "recall_20": 0.4117647058823529, "ndcg": 0.4347231951233608}, "613": {"map": 0.12110893227420466, "P_20": 0.55, "recall_20": 0.23404255319148937, "ndcg": 0.27212241901985434}, "614": {"map": 0.0018518518518518517, "P_20": 0.05, "recall_20": 0.03333333333333333, "ndcg": 0.020080623662185392}, "615": {"map": 0.004629629629629629, "P_20": 0.05, "recall_20": 0.08333333333333333, "ndcg": 0.03501196618119346}, "618": {"map": 0.01607579058559451, "P_20": 0.15, "recall_20": 0.1111111111111111, "ndcg": 0.08261634815921898}, "619": {"map": 0.44557674990005824, "P_20": 0.65, "recall_20": 0.6190476190476191, "ndcg": 0.6530503312221391}, "620": {"map": 0.0, "P_20": 0.0, "recall_20": 0.0, "ndcg": 0.0}, "621": {"map": 0.29220126885397335, "P_20": 0.8, "recall_20": 0.3137254901960784, "ndcg": 0.5475990539682868}, "625": {"map": 0.35057842436583403, "P_20": 0.6, "recall_20": 0.4444444444444444, "ndcg": 0.5172966998421069}, "627": {"map": 0.03571428571428571, "P_20": 0.05, "recall_20": 0.03571428571428571, "ndcg": 0.11420822357309535}, "628": {"map": 0.1286521592442645, "P_20": 0.3, "recall_20": 0.23076923076923078, "ndcg": 0.2733743578656636}, "641": {"map": 0.08602941176470587, "P_20": 0.3, "recall_20": 0.12244897959183673, "ndcg": 0.21948649981335258}, "643": {"map": 0.12939383694961787, "P_20": 0.4, "recall_20": 0.27586206896551724, "ndcg": 0.2507523604473632}, "651": {"map": 0.03903508771929825, "P_20": 0.2, "recall_20": 0.10526315789473684, "ndcg": 0.14998694558492834}, "652": {"map": 0.0, "P_20": 0.0, "recall_20": 0.0, "ndcg": 0.0}}

In [None]:
dict_660 = {"660": {"map": 0.27603958249966654, "P_20": 0.6, "recall_20": 0.42857142857142855, "ndcg": 0.49228042684498863}}

In [None]:
dict_664 = {"664": {"map": 0.358643371878666, "P_20": 0.35, "recall_20": 0.7777777777777778, "ndcg": 0.6884652369971546}}

In [None]:
dict_666_700 = {"666": {"map": 0.020833333333333332, "P_20": 0.05, "recall_20": 0.3333333333333333, "ndcg": 0.11480929472601784}, "669": {"map": 0.002557544757033248, "P_20": 0.05, "recall_20": 0.043478260869565216, "ndcg": 0.024384540174252338}, "670": {"map": 0.05702614379084967, "P_20": 0.2, "recall_20": 0.16666666666666666, "ndcg": 0.24071524740395786}, "671": {"map": 0.05226737186102511, "P_20": 0.4, "recall_20": 0.11428571428571428, "ndcg": 0.16710579158078204}, "673": {"map": 0.08587228941920017, "P_20": 0.3, "recall_20": 0.2608695652173913, "ndcg": 0.2968372976687423}, "674": {"map": 0.095, "P_20": 0.2, "recall_20": 0.2, "ndcg": 0.24445439892209275}, "675": {"map": 0.12482556659027244, "P_20": 0.35, "recall_20": 0.28, "ndcg": 0.30459699340358015}, "677": {"map": 0.6679738562091503, "P_20": 0.35, "recall_20": 0.7777777777777778, "ndcg": 0.7437467031480509}, "680": {"map": 0.1064304193899782, "P_20": 0.35, "recall_20": 0.2916666666666667, "ndcg": 0.24910996422514584}, "682": {"map": 0.05308464849354375, "P_20": 0.15, "recall_20": 0.07317073170731707, "ndcg": 0.13151142364026608}, "683": {"map": 0.1272164728475408, "P_20": 0.7, "recall_20": 0.13592233009708737, "ndcg": 0.2564023125125386}, "685": {"map": 0.048534798534798536, "P_20": 0.15, "recall_20": 0.23076923076923078, "ndcg": 0.15701598444201237}, "686": {"map": 0.12967414529914528, "P_20": 0.45, "recall_20": 0.28125, "ndcg": 0.28873373072324876}, "687": {"map": 0.09408223059338677, "P_20": 0.65, "recall_20": 0.14942528735632185, "ndcg": 0.24564313558517561}, "688": {"map": 0.001932367149758454, "P_20": 0.1, "recall_20": 0.012422360248447204, "ndcg": 0.023137276530642428}, "698": {"map": 0.05111111111111111, "P_20": 0.15, "recall_20": 0.3, "ndcg": 0.1748064944484022}, "700": {"map": 0.026167838081537772, "P_20": 0.4, "recall_20": 0.06666666666666667, "ndcg": 0.09835614301968836}}

In [None]:
def Merge(dict1, dict2):
    for i in dict2.keys():
        dict1[i]=dict2[i]
    return dict1
     


In [None]:
temp = Merge(dict_1_2_3, dict_5_6_7)
temp = Merge(temp, dict_10_to_19)
temp = Merge(temp, dict_21_22_23)
temp = Merge(temp, dict_26_56)
temp = Merge(temp,dict_74_14)
temp = Merge(temp,dict_423_652)
temp = Merge(temp,dict_660)
temp = Merge(temp,dict_664)
temp = Merge(temp,dict_666_700)

In [None]:
pprint.pprint(temp)

{'301': {'P_20': 0.85,
         'map': 0.036821968370681614,
         'ndcg': 0.09740414423137281,
         'recall_20': 0.03794642857142857},
 '302': {'P_20': 0.6,
         'map': 0.11148638955974036,
         'ndcg': 0.2543233058997546,
         'recall_20': 0.18461538461538463},
 '303': {'P_20': 0.0, 'map': 0.0, 'ndcg': 0.0, 'recall_20': 0.0},
 '305': {'P_20': 0.15,
         'map': 0.02857142857142857,
         'ndcg': 0.12028083009827595,
         'recall_20': 0.08571428571428572},
 '306': {'P_20': 0.2,
         'map': 0.002871333820129001,
         'ndcg': 0.0227538914261022,
         'recall_20': 0.012048192771084338},
 '307': {'P_20': 0.2,
         'map': 0.011478696741854637,
         'ndcg': 0.058482735788172646,
         'recall_20': 0.01904761904761905},
 '310': {'P_20': 0.15,
         'map': 0.034690799396681744,
         'ndcg': 0.15525079801898536,
         'recall_20': 0.23076923076923078},
 '311': {'P_20': 0.5,
         'map': 0.025712908885985805,
         'ndcg': 0.09

In [None]:
id_list = temp.keys()

In [None]:
id_list

dict_keys(['301', '302', '303', '305', '306', '307', '310', '311', '312', '313', '314', '315', '316', '317', '318', '319', '321', '322', '323', '326', '331', '338', '341', '348', '351', '353', '356', '374', '398', '400', '401', '404', '406', '409', '410', '414', '423', '433', '434', '443', '447', '450', '601', '605', '612', '613', '614', '615', '618', '619', '620', '621', '625', '627', '628', '641', '643', '651', '652', '660', '664', '666', '669', '670', '671', '673', '674', '675', '677', '680', '682', '683', '685', '686', '687', '688', '698', '700'])

In [None]:
## calculate average
count = 0
P_20_avg = 0
map_avg = 0
ndcg_avg = 0
recall_20_avg = 0
for id in id_list:
  P_20 = baseline[id]["P_20"]
  map = baseline[id]["map"]
  ndcg = baseline[id]["ndcg"]
  recall_20 = baseline[id]["recall_20"]

  if ((P_20==0.0) & (map==0.0) & (ndcg==0.0) & (recall_20==0.0)):
    print("this query has no value: ", id)
  
  count = count + 1
  P_20_avg = P_20_avg + P_20
  map_avg = map_avg + map
  ndcg_avg = ndcg_avg + ndcg
  recall_20_avg = recall_20_avg +  recall_20
  
print(count)
print("P_20: ", P_20_avg/count, "\nmap: ", map_avg/count, "\nndcg:", 
      ndcg_avg/count, "\nrecall_20:", recall_20_avg/count )

this query has no value:  303
this query has no value:  316
this query has no value:  348
this query has no value:  409
this query has no value:  433
this query has no value:  447
this query has no value:  620
this query has no value:  652
78
P_20:  0.31089743589743596 
map:  0.10389907553112365 
ndcg: 0.20867229449328528 
recall_20: 0.1570635726905983


In [None]:
## calculate average reranking
count = 0
P_20_avg = 0
map_avg = 0
ndcg_avg = 0
recall_20_avg = 0
for id in id_list:
  P_20 = temp[id]["P_20"]
  map = temp[id]["map"]
  ndcg = temp[id]["ndcg"]
  recall_20 = temp[id]["recall_20"]

  if ((P_20==0.0) & (map==0.0) & (ndcg==0.0) & (recall_20==0.0)):
    print("this query has no value: ", id)
  
  count = count + 1
  P_20_avg = P_20_avg + P_20
  map_avg = map_avg + map
  ndcg_avg = ndcg_avg + ndcg
  recall_20_avg = recall_20_avg +  recall_20
  
print(count)
print("P_20: ", P_20_avg/count, "\nmap: ", map_avg/count, "\nndcg:", 
      ndcg_avg/count, "\nrecall_20:", recall_20_avg/count )

this query has no value:  303
this query has no value:  316
this query has no value:  348
this query has no value:  409
this query has no value:  433
this query has no value:  447
this query has no value:  620
this query has no value:  652
78
P_20:  0.31089743589743596 
map:  0.0812620689142913 
ndcg: 0.1789176328226542 
recall_20: 0.1570635726905983


In [None]:
count = 0
for id in id_list:

  if (temp[id]["map"]>baseline[id]["map"]) & (temp[id]["ndcg"]>baseline[id]["ndcg"]):  
    print(id) 
    print("----- baseline --------")
    pprint.pprint(baseline[id])
    print("---- entity reranking  --")
    pprint.pprint(temp[id])
    print("--------------------------------")
    count = count+1

print(count)

301
----- baseline --------
{'P_20': 0.85,
 'map': 0.036127672913110774,
 'ndcg': 0.09691289870696555,
 'recall_20': 0.03794642857142857}
---- entity reranking  --
{'P_20': 0.85,
 'map': 0.036821968370681614,
 'ndcg': 0.09740414423137281,
 'recall_20': 0.03794642857142857}
--------------------------------
307
----- baseline --------
{'P_20': 0.2,
 'map': 0.008609221466364324,
 'ndcg': 0.05111102291442516,
 'recall_20': 0.01904761904761905}
---- entity reranking  --
{'P_20': 0.2,
 'map': 0.011478696741854637,
 'ndcg': 0.058482735788172646,
 'recall_20': 0.01904761904761905}
--------------------------------
315
----- baseline --------
{'P_20': 0.05,
 'map': 0.0007855459544383346,
 'ndcg': 0.014663450130273634,
 'recall_20': 0.014925373134328358}
---- entity reranking  --
{'P_20': 0.05,
 'map': 0.0037313432835820895,
 'ndcg': 0.02729385859344783,
 'recall_20': 0.014925373134328358}
--------------------------------
317
----- baseline --------
{'P_20': 0.2,
 'map': 0.11211407639979068,
 'nd

# End
