In [1]:
import os
import json
import time
import pandas as pd
from IPython.display import display

from lkae.retrieval.retrieve import get_retriever, retrieve_evidence, AuredDataset
from lkae.utils.data_loading import pkl_dir, load_pkl, root_dir

import pyterrier as pt
import pyterrier.io as ptio
import pyterrier.pipelines as ptpipelines
from ir_measures import R, MAP    

if not pt.started():
    pt.init()

PyTerrier 0.10.1 has loaded Terrier 5.9 (built by craigm on 2024-05-02 17:40) and terrier-helper 0.0.8



In [2]:
datasets = {}

# walk through the pkl directory and load all the datasets in one of its subdirectories
# load each dataset with its subdirectory name and filename as the key
# skip non-pkl files
for subdir in os.listdir(pkl_dir):
    if not os.path.isdir(os.path.join(pkl_dir, subdir)):
        continue            
    datasets[subdir] = {}
    for filename in os.listdir(os.path.join(pkl_dir, subdir)):
        if not filename.endswith('.pkl'):
            continue
        key = os.path.join(subdir, filename)
        datasets[subdir][filename.split('.')[0]] = load_pkl(os.path.join(pkl_dir, key))

split = 'dev'

dataset_split = f'English_{split}'
qrel_filename = f'{dataset_split}_qrels.txt'

dataset_variations_dict = datasets[dataset_split]
print(dataset_variations_dict.keys())

# ground truth RQ1
golden = ptio.read_qrels(os.path.join(root_dir, 'data', qrel_filename))

# select a set of variations of the dataset
selected_variations = ["pre-nonam-nobio", "pre-nam-bio", "nopre-nonam-nobio", "nopre-nam-bio"]


dict_keys(['nopre-nam-bio', 'nopre-nam-nobio', 'nopre-nonam-bio', 'nopre-nonam-nobio', 'pre-nam-bio', 'pre-nam-nobio', 'pre-nonam-bio', 'pre-nonam-nobio'])


In [3]:
# load each config and construct its retriever

retrievers = {}

with open('config.json', 'r') as file:
    configs = json.load(file)

    for config in configs['configs']:
        retriever_label = get_retriever(**config)
        retrievers[config['retriever_method']] = retriever_label

retrievers

Initializing HFSentenceTransformersRetriever with model: sentence-transformers/multi-qa-distilbert-cos-v1
Initializing CrossEncoderRetriever with model: cross-encoder/stsb-roberta-large


{'bm25': <lkae.retrieval.methods.bm25.BM25Retriever at 0x23348d50430>,
 'ollama': <lkae.retrieval.methods.ollama_embeddings.OllamaEmbeddingRetriever at 0x23348d50850>,
 'openai': <lkae.retrieval.methods.openai_embeddings.OpenAIRetriever at 0x2339d4933a0>,
 'sent-transformers-hf': <lkae.retrieval.methods.sent_transformers_hf.HFSentenceTransformersRetriever at 0x2339d4ac220>,
 'sent-transformers-local': <lkae.retrieval.methods.sent_transformers.SBERTRetriever at 0x2339d4ad120>,
 'crossencoder': <lkae.retrieval.methods.sent_transformers.CrossEncoderRetriever at 0x233aa1e6b90>,
 'tfidf': <lkae.retrieval.methods.tfidf.TFIDFRetriever at 0x233aacebeb0>}

In [4]:
# then for every variation of the dataset in ds, run the experiment with each retriever and save the results

out_dir = 'results'
data = []

for selected_variation in selected_variations:
    dataset: AuredDataset = dataset_variations_dict[selected_variation]
    for retriever_label in retrievers:
        start = time.time()

        retrieved_data = retrieve_evidence(dataset[:], retrievers[retriever_label])

        pred = pd.DataFrame([[*d, retriever_label] for d in retrieved_data], columns=['qid', 'docno', 'rank', 'score', 'name']) 

        eval = ptpipelines.Evaluate(pred, golden, metrics = [R@5,MAP], perquery=False)
        r5, meanap = [v for v in eval.values()]

        score = r5

        wall_time = time.time() - start

        print(f'result for retrieval run - R@5: {r5:.4f} MAP: {meanap:.4f} with config\tretriever: {retriever_label};\tds: {selected_variation}, took {wall_time:.2f} seconds')
        
        data.append({
            'R5': r5,
            'MAP': meanap,
            'Retrieval_Method': retriever_label, 
            'DS_Settings': selected_variation,
            'Time (s)': wall_time,
        })

# Convert the list of dictionaries to a DataFrame
df_retrieval = pd.DataFrame(data)

df_retrieval.to_csv(f'{out_dir}/df_retrieval.csv')
print(f'saved df to {out_dir}/df_retrieval.csv')

# Display the DataFrame
display(df_retrieval.sort_values(by='R5', ascending=False))

result for retrieval run - R@5: 0.7214 MAP: 0.6454 with config	retriever: bm25;	ds: pre-nonam-nobio, took 0.47 seconds
result for retrieval run - R@5: 0.4491 MAP: 0.3228 with config	retriever: ollama;	ds: pre-nonam-nobio, took 341.44 seconds
result for retrieval run - R@5: 0.7102 MAP: 0.6415 with config	retriever: openai;	ds: pre-nonam-nobio, took 43.71 seconds
result for retrieval run - R@5: 0.6642 MAP: 0.5380 with config	retriever: sent-transformers-hf;	ds: pre-nonam-nobio, took 68.46 seconds


  attn_output = torch.nn.functional.scaled_dot_product_attention(


result for retrieval run - R@5: 0.6839 MAP: 0.5975 with config	retriever: sent-transformers-local;	ds: pre-nonam-nobio, took 5.22 seconds
result for retrieval run - R@5: 0.7018 MAP: 0.5909 with config	retriever: crossencoder;	ds: pre-nonam-nobio, took 172.35 seconds
result for retrieval run - R@5: 0.7235 MAP: 0.6261 with config	retriever: tfidf;	ds: pre-nonam-nobio, took 0.23 seconds
result for retrieval run - R@5: 0.7674 MAP: 0.6698 with config	retriever: bm25;	ds: pre-nam-bio, took 0.61 seconds
result for retrieval run - R@5: 0.3221 MAP: 0.2339 with config	retriever: ollama;	ds: pre-nam-bio, took 401.76 seconds
result for retrieval run - R@5: 0.7081 MAP: 0.6282 with config	retriever: openai;	ds: pre-nam-bio, took 50.83 seconds
Waiting for model to warm up (for 20.0 seconds)
result for retrieval run - R@5: 0.5347 MAP: 0.4939 with config	retriever: sent-transformers-hf;	ds: pre-nam-bio, took 108.11 seconds
result for retrieval run - R@5: 0.6860 MAP: 0.6004 with config	retriever: sent-t

Unnamed: 0,R5,MAP,Retrieval_Method,DS_Settings,Time (s)
7,0.767368,0.669789,bm25,pre-nam-bio,0.614144
21,0.767368,0.630316,bm25,nopre-nam-bio,0.679263
16,0.747368,0.674269,openai,nopre-nonam-nobio,47.405012
6,0.723509,0.62614,tfidf,pre-nonam-nobio,0.228726
0,0.721404,0.645351,bm25,pre-nonam-nobio,0.468739
2,0.710175,0.641462,openai,pre-nonam-nobio,43.710936
23,0.70807,0.618158,openai,nopre-nam-bio,59.823467
9,0.70807,0.628246,openai,pre-nam-bio,50.831173
5,0.701754,0.590936,crossencoder,pre-nonam-nobio,172.348049
25,0.685965,0.620906,sent-transformers-local,nopre-nam-bio,7.574176
