In [1]:
from pyserini.search.lucene import LuceneSearcher
from ir_measures import *
import ir_measures
from tqdm import tqdm
import pandas as pd

ModuleNotFoundError: No module named 'pyserini'

In [2]:
# Load the full qrels dataset
reconsruction_dataset = pd.read_csv(
    "data/msmarco-passage-v2-reconstruction.csv", index_col=0
)

# Extract the queries
queries = reconsruction_dataset.loc[
    reconsruction_dataset.loc[:, "Query"].drop_duplicates().index, ["QueryID", "Query"]
].reset_index(drop=True)

In [4]:
'''
Retrieves the scores of the top 1000 hit for each query given a paritcular Lucene index.
'''
def get_lucene_search_results(index: str) -> pd.DataFrame:
    results = []
    searcher = LuceneSearcher.from_prebuilt_index(index)

    # For each query
    for query_id, query in tqdm(queries.values):
        # Get the top 1000 hits
        for hit in searcher.search(query, k=1000):
            # Add the hit to the list
            results.append([query_id, hit.docid, hit.score])

    # Convert list to dataframe
    return pd.DataFrame(results, columns=["QueryID", "DocumentID", "Score"])

In [None]:
BM25_results = get_lucene_search_results("msmarco-v2-passage")
BM25_results.to_csv("data/BM25_results.csv", index=False)

In [None]:
BM25_Augmented_results = get_lucene_search_results("msmarco-v2-passage-augmented")
BM25_Augmented_results.to_csv("data/BM25_Augmented_results.csv", index=False)

In [None]:
BM25_d2q_t5_results = get_lucene_search_results("msmarco-v2-passage-d2q-t5")
BM25_d2q_t5_results.to_csv("data/BM25_d2q_t5_results.csv", index=False)

In [None]:
BM25_Augmented_d2q_t5_results = get_lucene_search_results("msmarco-v2-passage-augmented-d2q-t5")
BM25_Augmented_d2q_t5_results.to_csv("data/BM25_Augmented_d2q_t5_results.csv", index=False)

In [None]:
uniCOIL_results = get_lucene_search_results("msmarco-v2-passage-unicoil-0shot")
uniCOIL_results.to_csv("data/uniCOIL_results.csv", index=False)

In [87]:
# Reformat all of the data into a format that ir_measures can use

def to_ir_measures_qrel_format(dataset: pd.DataFrame, qrel_col: str) -> pd.DataFrame:
    qrels = ""
    for query_id, doc_id, qrel in dataset.loc[dataset[qrel_col] > 0, ["QueryID", "DocumentID", qrel_col]].values:
        qrels += f"{query_id} 0 {doc_id} {int(qrel)}\n"
    return list(ir_measures.read_trec_qrels(qrels))


def to_ir_measures_run_format(results: pd.DataFrame) -> pd.DataFrame:
    qrels = ""
    for query_id, doc_id, score in results.values:
        qrels += f"{query_id} 0 {doc_id} 0 {score} 0\n"
    return list(ir_measures.read_trec_run(qrels))

original_qrels = to_ir_measures_qrel_format(reconsruction_dataset, "Actual")
zeroshot_qrels = to_ir_measures_qrel_format(reconsruction_dataset, "Zeroshot")
zeroshot_reversed_qrels = to_ir_measures_qrel_format(reconsruction_dataset, "Zeroshot Reversed")
fewshot_qrels = to_ir_measures_qrel_format(reconsruction_dataset, "Fewshot")
fewshot_reversed_qrels = to_ir_measures_qrel_format(reconsruction_dataset, "Fewshot Reversed")


BM25_scores = to_ir_measures_run_format(BM25_results)
uniCOIL_scores = to_ir_measures_run_format(reconsruction_dataset, "uniCOIL zeroshot Search Score")

In [93]:
def evaluate_run(qrels, run, qrels_name, run_name):
    evaluator = ir_measures.evaluator([AP, nDCG@10, RR@100, R@100, R@1000], qrels)
    evaluation = pd.Series(dict(((str(measure), value) for measure, value in evaluator.calc_aggregate(run).items())))
    evaluation["qrel source"] = qrels_name
    evaluation["run"] = run_name
    return evaluation

qrels_list = [(original_qrels, "Actual"), (zeroshot_qrels, "Zeroshot"), (zeroshot_reversed_qrels, "Zeroshot Reversed"), (fewshot_qrels, "Fewshot"), (fewshot_reversed_qrels, "Fewshot Reversed")]
runs_list = [(BM25_scores, "BM25 Search Score"), (uniCOIL_scores, "uniCOIL zeroshot Search Score")]

evaluations = []
for qrels, qrels_name in qrels_list:
    for run, run_name in runs_list:
        evaluations.append(evaluate_run(qrels, run, qrels_name, run_name))

evaluations = pd.DataFrame(evaluations)

skipped ['gm_map']: measures not yet supported
skipped ['gm_map']: measures not yet supported
skipped ['gm_map']: measures not yet supported
skipped ['gm_map']: measures not yet supported
skipped ['gm_map']: measures not yet supported
skipped ['gm_map']: measures not yet supported
skipped ['gm_map']: measures not yet supported
skipped ['gm_map']: measures not yet supported
skipped ['gm_map']: measures not yet supported
skipped ['gm_map']: measures not yet supported


In [None]:
evaluations.to_csv("data/evaluations_new.csv")