## Imports 

In [25]:
import os
from models.builers.retriever import Retriever

## Getting dataset

In [2]:
from data.dataloader import Data
import configparser

# load config.ini 
config = configparser.ConfigParser()
config.read('config.ini')
data_handler = Data(config)
dataset = "fiqa"
corpus, queries = data_handler.get_dataset(dataset)


Loading dataset from data/datasets\fiqa
data/datasets\fiqa\corpus.jsonl


In [5]:
relevants = data_handler.get_relevants(dataset)
# print first 5 relevants from the dictionary
print("First 5 relevants from the dictionary")
for i in list(relevants.keys())[:5]:
    print("  ", i, ":", relevants[i])

print("\nFirst query and document from corpus:")
print("  Corpus[0]:  ", corpus[0])
print("  Query[0]: ", queries[0])

First 5 relevants from the dictionary
   0 : [('18850', 1)]
   4 : [('196463', 1)]
   5 : [('69306', 1)]
   6 : [('560251', 1), ('188530', 1), ('564488', 1)]
   7 : [('411063', 1)]

First query and document from corpus:
  Corpus[0]:   {'_id': '3', 'title': '', 'text': "I'm not saying I don't like the idea of on-the-job training too, but you can't expect the company to do that. Training workers is not their job - they're building software. Perhaps educational systems in the U.S. (or their students) should worry a little about getting marketable skills in exchange for their massive investment in education, rather than getting out with thousands in student debt and then complaining that they aren't qualified to do anything.", 'metadata': {}}
  Query[0]:  {'_id': '0', 'text': 'What is considered a business expense on a business trip?', 'metadata': {}}


## Create or load models

In [6]:
documents = corpus[:10]

In [20]:
# from models.model_loader_helpers import create_models

# models_to_create = {"TF-IDF": {},
#                     "BM25": {},
#                     "DPR": {},
#                     "Crossencoder": {"n":25},
#                     "KMeans": {"k":4},
#                     "CURE": {"k": 2, "n": 2, "shrinkage_fraction":0.2}}

# create_models(documents=documents, dataset_name=dataset, models=models_to_create, save=True)

from models.model_loader_helpers import load_models

models_to_load = {"TF-IDF": {},
                    "BM25": {},
                    "DPR": {}}
models = load_models("fiqa", models_to_load)

## Create Queries 

In [21]:
from data.query import Query

queries_ = data_handler.get_queries() # Get queries in the correct format
queries = []
for rel in relevants.items():
    id = rel[0]
    rels = [r[0] for r in rel[1]]
    query = queries_[id]
    queries.append(Query(text=query['text'], id=id, relevant_document_ids=rels))

# output an example
queries[0].GetQuery(), queries[0].GetNumberOfRelevantDocuments()

('What is considered a business expense on a business trip?', 1)

### Perform Experiment

In [22]:
def MeanReciprocalRank(relevancies):
    for i, relevancy in enumerate(relevancies):
        if relevancy:
            return 1/(i+1)
    return 0

def Precision(relevancies):
    return sum([1 if relevancy else 0 for relevancy in relevancies]) / len(relevancies)

def Recall(relevancies, query: Query):
    return sum([1 if relevancy else 0 for relevancy in relevancies]) / min(len(relevancies), query.GetNumberOfRelevantDocuments())

In [23]:
import time

def TimeFunction(function, **args):
    time_before = time.perf_counter()
    output = function(**args)
    time_after = time.perf_counter()
    return time_after - time_before, output

In [26]:
def RetrieveQueryAndGetScore(model: Retriever, query: Query, k: int):
    retrieved_documents = model.Lookup(query=query.GetQuery(), k=k)
    relevancies = []
    for document in retrieved_documents:
        if query.IsDocumentRelevant(document):
            relevancies.append(True)
        else:
            relevancies.append(False)
    return relevancies

In [27]:
def MeanReciprocalRank(relevancies):
    for i, relevancy in enumerate(relevancies):
        if relevancy:
            return 1/(i+1)
    return 0

def Precision(relevancies):
    return sum([1 if relevancy else 0 for relevancy in relevancies]) / len(relevancies)

def Recall(relevancies, query: Query):
    return sum([1 if relevancy else 0 for relevancy in relevancies]) / min(len(relevancies), query.GetNumberOfRelevantDocuments())

In [38]:
k = 10

print("Query: ", queries[0].GetQuery())

for model_type in models.keys():
    query = queries[0]
    k = query.GetNumberOfRelevantDocuments()
    print(model_type)
    model: Retriever = models[model_type]
    time_perf, relevancies = TimeFunction(RetrieveQueryAndGetScore, **{"model": model, "query": queries[0], "k": k})
    print("  ", time_perf, "s")
    print("  ", MeanReciprocalRank(relevancies), "(MRR)")
    print("  ", Precision(relevancies), "(Precision)")
    print("  ", Recall(relevancies, query), "(Recall)")

Query:  What is considered a business expense on a business trip?
TF-IDF
   0.00015629990957677364 s
   0 (MRR)
   0.0 (Precision)
   0.0 (Recall)
BM25
   0.00010780012235045433 s
   0 (MRR)
   0.0 (Precision)
   0.0 (Recall)
DPR
   0.09074080013670027 s
   0 (MRR)
   0.0 (Precision)
   0.0 (Recall)
