This notebook will help to compute the Precision for all experiment settings.

## Settings

In [1]:
metric = "precision"
models = ["bm25", "bert"]
datasets = ["ms_marco", "threshold=2", "threshold=3"]
cutoffs = [5,10]

## Import Libraries

In [2]:
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join
from tqdm.auto import tqdm 
from tqdm import tqdm_notebook

## Load Data

In [3]:
data_dir = 'data/'

# binary threshold (irrelevant <2; relevant >= 2)
thesis_qrels_threshold2_filename = 'thesis_dataset_binary_threshold2.tsv'

# binary theshold (irrelevant <3; relevant >= 3)
thesis_qrels_threshold3_filename = 'thesis_dataset_binary_threshold3.tsv'

# msmarco relevance file
og_qrels_filename = 'qrels.dev.small.tsv'

# BM25 top 100 ranking
bm25_top100_filename = 'run_development_top100.tsv'

# BERT top 100 ranking
bert_top100_filename = 'bert_thesis_dataset_top100.tsv'

In [4]:
bm25_df = pd.read_csv(data_dir + bm25_top100_filename,delimiter='\t',encoding='utf-8',header=None)
bm25_df.columns = ['query_id', 'passage_id', 'bm25_rank']

bert_df = pd.read_csv(data_dir + bert_top100_filename,delimiter='\t',encoding='utf-8',header=None)
bert_df.columns = ['query_id', 'passage_id', 'bm25_rank', 'query', 'passage', 'bert_score', 'bert_rank']

og_qrels_df = pd.read_csv(data_dir + og_qrels_filename,delimiter='\t',encoding='utf-8',header=None)
og_qrels_df.columns = ['query_id','label1','passage_id','label2']

new_qrels2_df = pd.read_csv(data_dir + thesis_qrels_threshold2_filename,delimiter='\t',encoding='utf-8',header=None)
new_qrels2_df.columns = ['query_id','label1','passage_id','label2']

new_qrels3_df = pd.read_csv(data_dir + thesis_qrels_threshold3_filename,delimiter='\t',encoding='utf-8',header=None)
new_qrels3_df.columns = ['query_id','label1','passage_id','label2']

models_dict = {"bm25": bm25_df, "bert": bert_df}
dataset_dict = {"ms_marco": og_qrels_df, "threshold=2": new_qrels2_df, "threshold=3": new_qrels3_df}

## Helper Functions

In [5]:
def compute_precision(gt,ranking,n):
    precision = 0.0
    nr_relevant_items = 0
    for index, row in ranking.iterrows():
        if row['passage_id'] in gt:
            nr_relevant_items += 1
    precision = nr_relevant_items/n
    return precision

def get_query_ids(dataframe):
    return list(np.unique(dataframe['query_id'].tolist()))

def get_top_n_ranking(dataframe,rank_column,n):
    top_n_ranking = dataframe[dataframe[rank_column] <= n].sort_values(by=[rank_column])
    return top_n_ranking

def get_passage_ids(dataframe):
    relevant_passages = dataframe['passage_id'].values.tolist()
    return relevant_passages

## Compute Precision

In [7]:
scores = []

for model in models:
    for dataset in datasets:
        for cutoff in cutoffs:
            output_file = "output/%s_scores_%s_%s_N%s.txt"%(metric,model,dataset,cutoff)
            
            model_df = models_dict[model].copy()
            dataset_df = dataset_dict[dataset].copy()

            N = cutoff

            p_scores = []

            summed_p = 0.0

            query_ids = get_query_ids(new_qrels2_df)

            for query_id in tqdm_notebook(query_ids):
                model_query_subset = model_df[model_df['query_id'] == query_id].copy()
                qrels_query_subset = dataset_df[dataset_df['query_id'] == query_id].copy()

                top_n_ranking = get_top_n_ranking(model_query_subset,"%s_rank"%(model),N)

                relevant_items = get_passage_ids(qrels_query_subset)

                p = compute_precision(relevant_items,top_n_ranking,N)

                summed_p += p

                p_scores.append((query_id,p))

            ap = round((summed_p/len(query_ids))*100,1)
            score_data = "model: %s; dataset: %s; cutoff: %s; %s_score: %s\n"%(model,dataset,cutoff,metric,ap)
            scores.append(score_data)
        
            with open(output_file,'w') as outfile:
                for (query_id, p) in p_scores:
                    line = "%s\t%s\n"%(query_id,p)
                    outfile.write(line)
with open("output/%s_scores.txt"%(metric),'w') as outfile:
    for score_data in scores:
        outfile.write(score_data)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=42.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=42.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=42.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=42.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=42.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=42.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=42.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=42.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=42.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=42.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=42.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=42.0), HTML(value='')))




In [8]:
for score_data in scores:
    print(score_data)

model: bm25; dataset: ms_marco; cutoff: 5; precision_score: 14.3

model: bm25; dataset: ms_marco; cutoff: 10; precision_score: 8.6

model: bm25; dataset: threshold=2; cutoff: 5; precision_score: 80.5

model: bm25; dataset: threshold=2; cutoff: 10; precision_score: 67.1

model: bm25; dataset: threshold=3; cutoff: 5; precision_score: 63.3

model: bm25; dataset: threshold=3; cutoff: 10; precision_score: 50.5

model: bert; dataset: ms_marco; cutoff: 5; precision_score: 17.1

model: bert; dataset: ms_marco; cutoff: 10; precision_score: 9.3

model: bert; dataset: threshold=2; cutoff: 5; precision_score: 67.1

model: bert; dataset: threshold=2; cutoff: 10; precision_score: 50.2

model: bert; dataset: threshold=3; cutoff: 5; precision_score: 60.0

model: bert; dataset: threshold=3; cutoff: 10; precision_score: 42.6

