In [1]:
import os
import json

import pandas as pd
from IPython.display import display, HTML

from lkae.retrieval.retrieve import get_retriever, retrieve_evidence
from lkae.utils.data_loading import pkl_dir, load_pkl, root_dir

import pyterrier as pt
import pyterrier.io as ptio
import pyterrier.pipelines as ptpipelines
from ir_measures import R, MAP    

if not pt.started():
    pt.init()

datasets = {}

# walk through the pkl directory and load all the datasets in one of its subdirectories
# load each dataset with its subdirectory name and filename as the key
# skip non-pkl files
for subdir in os.listdir(pkl_dir):
    if not os.path.isdir(os.path.join(pkl_dir, subdir)):
        continue            
    datasets[subdir] = {}
    for filename in os.listdir(os.path.join(pkl_dir, subdir)):
        if not filename.endswith('.pkl'):
            continue
        key = os.path.join(subdir, filename)
        datasets[subdir][filename.split('.')[0]] = load_pkl(os.path.join(pkl_dir, key))

dataset_name = 'English_train_dev_combined'
ds = datasets[dataset_name]



golden = ptio.read_qrels(os.path.join(root_dir, 'data', 'train_dev_combined_qrels.txt'))

PyTerrier 0.10.1 has loaded Terrier 5.9 (built by craigm on 2024-05-02 17:40) and terrier-helper 0.0.8



In [2]:
# load each config and construct its retriever


retrievers = {}

with open('config.json', 'r') as file:
    configs = json.load(file)

    for config in configs['configs']:
        retriever_label = get_retriever(config['retriever_method'], config['retriever_k'])
        retrievers[config['retriever_method']] = retriever_label

retrievers

{'tfidf': <lkae.retrieval.methods.tfidf.TFIDFRetriever at 0x26e2d593460>,
 'openai': <lkae.retrieval.methods.open_ai.OpenAIRetriever at 0x26e2d592110>}

In [4]:
# then for every variation of the dataset in ds, run the experiment with each retriever and save the results

out_dir = 'results'

data = []


for dataset_label in ds:
    for retriever_label in retrievers:
        retrieved_data = retrieve_evidence(ds[dataset_label][0:1], retrievers[retriever_label])

        pred = pd.DataFrame([[*d, retriever_label] for d in retrieved_data], columns=['qid', 'docno', 'rank', 'score', 'name']) 

        eval = ptpipelines.Evaluate(pred, golden, metrics = [R@5,MAP], perquery=False)
        r5, meanap = [v for v in eval.values()]

        score = r5

        print(f'result for retrieval run - R@5: {r5:.4f} MAP: {meanap:.4f} with config\tretriever: {retriever_label};\tds: {dataset_label}')
        
        data.append({
            'R5': r5,
            'MAP': meanap,
            'Retrieval':retriever_label, 
            'DS_Settings': dataset_label,
        })

# Convert the list of dictionaries to a DataFrame
df_retrieval = pd.DataFrame(data)

df_retrieval.to_pickle(f'{out_dir}/df_retrieval.pkl')
print('saved df!')

# Display the DataFrame
display(df_retrieval.sort_values(by='R5', ascending=False))

result for retrieval run - R@5: 0.0000 MAP: 0.0000 with config	retriever: tfidf;	ds: nopre-nam-bio
result for retrieval run - R@5: 0.0033 MAP: 0.0033 with config	retriever: openai;	ds: nopre-nam-bio
result for retrieval run - R@5: 0.0033 MAP: 0.0007 with config	retriever: tfidf;	ds: nopre-nam-nobio
result for retrieval run - R@5: 0.0033 MAP: 0.0033 with config	retriever: openai;	ds: nopre-nam-nobio
result for retrieval run - R@5: 0.0033 MAP: 0.0007 with config	retriever: tfidf;	ds: nopre-nonam-bio
result for retrieval run - R@5: 0.0033 MAP: 0.0033 with config	retriever: openai;	ds: nopre-nonam-bio
result for retrieval run - R@5: 0.0033 MAP: 0.0007 with config	retriever: tfidf;	ds: nopre-nonam-nobio
result for retrieval run - R@5: 0.0033 MAP: 0.0033 with config	retriever: openai;	ds: nopre-nonam-nobio
result for retrieval run - R@5: 0.0000 MAP: 0.0000 with config	retriever: tfidf;	ds: pre-nam-bio
result for retrieval run - R@5: 0.0033 MAP: 0.0033 with config	retriever: openai;	ds: pre-n

Unnamed: 0,R5,MAP,Retrieval,DS_Settings
1,0.003333,0.003333,openai,nopre-nam-bio
2,0.003333,0.000667,tfidf,nopre-nam-nobio
3,0.003333,0.003333,openai,nopre-nam-nobio
4,0.003333,0.000667,tfidf,nopre-nonam-bio
5,0.003333,0.003333,openai,nopre-nonam-bio
6,0.003333,0.000667,tfidf,nopre-nonam-nobio
7,0.003333,0.003333,openai,nopre-nonam-nobio
9,0.003333,0.003333,openai,pre-nam-bio
10,0.003333,0.000667,tfidf,pre-nam-nobio
11,0.003333,0.003333,openai,pre-nam-nobio
