# Import dependencies


In [50]:

from elasticsearch import Elasticsearch
import pandas as pd
from sentence_transformers import CrossEncoder
from typing import Callable
import torch
import tqdm

# Preprocessing


In [51]:
es = Elasticsearch()
INDEX_NAME = "passage_index"
doc = es.get(index=INDEX_NAME, id=1)
print(doc)

{'_index': 'passage_index', '_type': '_doc', '_id': '1', '_version': 1, '_seq_no': 1, '_primary_term': 1, 'found': True, '_source': {'content': 'The Manhattan Project and its atomic bomb helped bring an end to World War II. Its legacy of peaceful uses of atomic energy continues to have an impact on history and science.'}}


In [52]:
queries_eval = pd.read_csv("data/queries/queries.eval.tsv", sep='\t', header=None)#, index_col=0)
queries_eval.head()

Unnamed: 0,0,1
0,786436,what is prescribed to treat thyroid storm
1,9,Refer to the data. Diminishing returns begin ...
2,786450,what is presentation software?
3,524308,treasury routing number
4,33,game called poem who wrote what occasion


In [53]:
collection_df = pd.read_csv("data/collection/collection.tsv", sep='\t', header=None) #, index_col=0)
print(len(collection_df))
collection_df.head()

6535846


Unnamed: 0,0,1
0,0,The presence of communication amid scientific ...
1,1,The Manhattan Project and its atomic bomb help...
2,2,Essay on The Manhattan Project - The Manhattan...
3,3,The Manhattan Project was the name for a proje...
4,4,versions of each volume as well as complementa...


In [54]:
# Create a set of query ids
query_ids = set()
with open("data/qrels/qrels.txt", encoding="utf-8") as file:
    for line in file:
        l = line.split(' ')
        query_ids.add(l[0])

In [55]:
# Create a dictionary of relevant query ids and their corresponding query text
queriesToUse = {}
for _, query in queries_eval.iterrows():
    if str(query[0]) in query_ids:
        queriesToUse[query[0]] = query[1]

# Initial retrival

In [56]:
# Create a dictionary of relevant query ids and their corresponding relevant documents from bm25
bm25_top1000 = {}
for query_id, query in queriesToUse.items():
    res = es.search(index=INDEX_NAME, q=query, _source=False, size=1000, request_timeout=60)
    top_k_scores = [hit["_id"] for hit in res["hits"]["hits"]]
    bm25_top1000[str(query_id)] = top_k_scores



# Reranking 

In [57]:
if not torch.cuda.is_available():
    print("Warning: No GPU found. Please add GPU to your notebook")
    
reranked = {}

# We use a cross-encoder, to re-rank the results list to improve the quality
cross_encoder = CrossEncoder('cross-encoder/ms-marco-TinyBERT-L-2-v2')

for query_id, doc_ids in tqdm.tqdm(bm25_top1000.items()):
    # Create a list of query and document pairs
    model_input = [[queriesToUse[int(query_id)], collection_df.iloc[int(doc_id)][1]] for doc_id in doc_ids]
    # Compute the cross-encoder scores
    scores = cross_encoder.predict(model_input)
    # Sort the scores
    sorted_scores = sorted(zip(scores, doc_ids), key=lambda x: x[0], reverse=True)
    # Store the top 1000 results
    reranked[query_id] = [doc_id for score, doc_id in sorted_scores[:1000]]

100%|██████████| 43/43 [01:24<00:00,  1.97s/it]


# Evaluation

In [58]:
# Bulk indexing
qrelspath = "data/qrels/qrels.txt"

qrels = {}
with open(qrelspath, encoding="utf-8") as file:
    for line in file:
        l = line.split(' ')

        qid = l[0]
        pid = l[2]
        relevance = int(l[3])

        if relevance > 0:
            if qid in qrels.keys():
                qrels[qid].add(pid)
            else:
                qrels[qid] = set([pid])

In [59]:
def get_average_precision(system_ranking, ground_truth) -> float:
    vals = []
    over = 1
    for rank_idx, rank in enumerate(system_ranking):
        under = rank_idx+1
        if rank in ground_truth:
            vals.append(over / under)
            over += 1
    AP = sum(vals) / len(ground_truth)

    return AP

In [60]:
def get_reciprocal_rank(system_ranking, ground_truth) -> float:
    AP = 0
    for rank_idx, rank in enumerate(system_ranking):
        under = rank_idx+1
        if rank in ground_truth:
            AP = 1 / under
            break
    
    return AP

In [61]:
def get_mean_eval_measure(system_rankings, ground_truths, eval_function: Callable) -> float:
    results = []
    for query in system_rankings:
        if query in ground_truths.keys():
            results.append(eval_function(system_rankings[query], ground_truths[query]))
        else:
            continue
            # results.append(0) -> ?
    return sum(results) / len(results)

In [62]:
map = get_mean_eval_measure(reranked, qrels, get_average_precision)
mrr = get_mean_eval_measure(reranked, qrels, get_reciprocal_rank)

print("MAP: ", map)
print("MRR: ", mrr)

MAP:  0.31684373478505734
MRR:  0.8759689922480619


## From BM25
map = 0.32872575816078825

mrr = 0.7265016684853105