In [1]:
import loader
import evaluator
import numpy as np
import pandas as pd

qrels_path = "C:/Users/USER/DataSets/arguana/qrels/test.tsv"
queries_path = "C:/Users/USER/DataSets/arguana/queries.jsonl"
dataset_name = "arguana"

qrels = loader.load_qrels(qrels_path)
queries = loader.load_queries(queries_path)
tf_idf_pipeline = loader.tf_idf_pipeline(dataset_name)
bert_sentence_pipeline = loader.bert_sentence_pipeline(dataset_name)

def evaluate_pipeline(pipeline, queries, qrels, run_fn, top_k=100):
    precisions, recalls, average_precisions, reciprocal_ranks = [], [], [], []

    for qid, query_text in queries.items():
        relevant_docs = qrels.get(qid, [])
        retrieved_docs, _ = run_fn(pipeline, query_text, top_k)

        precisions.append(evaluator.precision_at_k(relevant_docs, retrieved_docs, k=10))
        recalls.append(evaluator.recall_at_k(relevant_docs, retrieved_docs, k=10))
        average_precisions.append(evaluator.average_precision(relevant_docs, retrieved_docs))
        reciprocal_ranks.append(evaluator.reciprocal_rank(relevant_docs, retrieved_docs))

    return {
        "Precision@10": np.mean(precisions),
        "Recall@10": np.mean(recalls),
        "MAP": np.mean(average_precisions),
        "MRR": np.mean(reciprocal_ranks),
    }

def run_tf_idf(pipeline, query_text, top_k):
    return pipeline.run_with_inverted_index(query_text, top_k)

def run_bert(pipeline, query_text, top_k):
    return pipeline.run_with_flat_ip_index(query_text, top_k)

tfidf_results = evaluate_pipeline(tf_idf_pipeline, queries, qrels, run_tf_idf)
bert_results = evaluate_pipeline(bert_sentence_pipeline, queries, qrels, run_bert)

df = pd.DataFrame([tfidf_results, bert_results], index=["TF-IDF", "BERT"])
print(df.round(4))


  from .autonotebook import tqdm as notebook_tqdm


        Precision@10  Recall@10     MAP     MRR
TF-IDF        0.0711     0.7105  0.2296  0.2296
BERT          0.0757     0.7575  0.2507  0.2507
