This notebook will help to compute the MRR for the BERT re-ranking on the MS MARCO dataset making use of both the old relevance label dataset and the newly acquired relevance label dataset. 

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join
from tqdm.auto import tqdm 
from tqdm import tqdm_notebook

## Load Data

In [2]:
data_dir = 'data/'

# binary threshold (irrelevant <2; relevant >= 2)
thesis_qrels_threshold2_filename = 'thesis_dataset_binary_threshold2.tsv'

# binary theshold (irrelevant <3; relevant >= 3)
thesis_qrels_threshold3_filename = 'thesis_dataset_binary_threshold3.tsv'

# msmarco relevance file
og_qrels_filename = 'qrels.dev.small.tsv'

# BM25 top 100 ranking
bm25_top100_filename = 'run_development_top100.tsv'

# BERT top 100 ranking
bert_top100_filename = 'bert_thesis_dataset_top100.tsv'

In [3]:
bm25_df = pd.read_csv(data_dir + bm25_top100_filename,delimiter='\t',encoding='utf-8',header=None)
bm25_df.columns = ['query_id', 'passage_id', 'bm25_rank']

bert_df = pd.read_csv(data_dir + bert_top100_filename,delimiter='\t',encoding='utf-8',header=None)
bert_df.columns = ['query_id', 'passage_id', 'bm25_rank', 'query', 'passage', 'bert_score', 'bert_rank']

og_qrels_df = pd.read_csv(data_dir + og_qrels_filename,delimiter='\t',encoding='utf-8',header=None)
og_qrels_df.columns = ['query_id','label1','passage_id','label2']

new_qrels2_df = pd.read_csv(data_dir + thesis_qrels_threshold2_filename,delimiter='\t',encoding='utf-8',header=None)
new_qrels2_df.columns = ['query_id','label1','passage_id','label2']

new_qrels3_df = pd.read_csv(data_dir + thesis_qrels_threshold3_filename,delimiter='\t',encoding='utf-8',header=None)
new_qrels3_df.columns = ['query_id','label1','passage_id','label2']

models_dict = {"bm25": bm25_df, "bert": bert_df}
new_qrels_dict = {"threshold=2": new_qrels2_df, "threshold=3": new_qrels3_df}

## Helper Functions

In [4]:
def get_query_ids(dataframe):
    return list(np.unique(dataframe['query_id'].tolist()))

In [5]:
def get_top_n_ranking(dataframe,rank_column,n):
    top_n_ranking = dataframe[dataframe[rank_column] <= n].sort_values(by=[rank_column])
    return top_n_ranking

In [6]:
def get_passage_ids(dataframe):
    relevant_passages = dataframe['passage_id'].values.tolist()
    return relevant_passages

In [7]:
def compute_mfr(gt,ranking,model,n):
    score = n+1
    best_rank = n+1
    for index, row in ranking.iterrows():
        current_rank = row['%s_rank'%(model)]
        if row['passage_id'] in gt:
            if current_rank < best_rank:
                score = row['%s_rank'%(model)]
                best_rank = current_rank
    return score

## Settings

In [8]:
N = 5

# Evaluate BM25

## Compute MRR (threshold = 2)

In [9]:
# Irrelevant < 2; relevant >= 2
binary_threshold = 2

model = "bm25"

model_df = models_dict[model].copy()

# Get new relevance dataset
new_qrels_df = new_qrels_dict["threshold=%s"%(binary_threshold)].copy()

# Original MRR and new MRR set to zero
# Original == ms marco relevance dataset; new == new relevance dataset
og_mfr = 0.0
new_mfr = 0.0

# The new relevance dataset contains less query ids than the original msmarco dataset.
# So get these query ids and use them to create subsets of the original dataset and the bm25 output.
query_ids = get_query_ids(new_qrels_df)
og_qrels_subset_df = og_qrels_df[og_qrels_df['query_id'].isin(query_ids)].copy()
model_subset_df = model_df[model_df['query_id'].isin(query_ids)].copy()

for query_id in tqdm_notebook(query_ids):
    # Get query specific ranking and relevance datasets
    query_subset = model_subset_df[model_subset_df['query_id'] == query_id].copy()
    og_qrels_query_subset = og_qrels_subset_df[og_qrels_subset_df['query_id'] == query_id]
    new_qrels_query_subset = new_qrels_df[new_qrels_df['query_id'] == query_id]
    
    top_n_ranking = get_top_n_ranking(query_subset,'%s_rank'%(model),N)
    
    og_relevant_passages = get_passage_ids(og_qrels_query_subset)
    new_relevant_passages = get_passage_ids(new_qrels_query_subset)
    
    og_mfr += compute_mfr(og_relevant_passages,top_n_ranking,model,N)
    new_mfr += compute_mfr(new_relevant_passages,top_n_ranking,model,N)
    
og_mfr = round((og_mfr/len(query_ids)),2)
new_mfr = round((new_mfr/len(query_ids)),2)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




In [10]:
print(og_mfr)

3.44


In [11]:
print(new_mfr)

1.05


In [12]:
print(len(query_ids))

43


## Compute MRR (threshold = 3)

In [13]:
# Irrelevant < 3; relevant >= 3
binary_threshold = 3

model = "bm25"

model_df = models_dict[model].copy()

# Get new relevance dataset
new_qrels_df = new_qrels_dict["threshold=%s"%(binary_threshold)].copy()

# Original MRR and new MRR set to zero
# Original == ms marco relevance dataset; new == new relevance dataset
og_mfr = 0.0
new_mfr = 0.0

# The new relevance dataset contains less query ids than the original msmarco dataset.
# So get these query ids and use them to create subsets of the original dataset and the bm25 output.
query_ids = get_query_ids(new_qrels_df)
og_qrels_subset_df = og_qrels_df[og_qrels_df['query_id'].isin(query_ids)].copy()
model_subset_df = model_df[model_df['query_id'].isin(query_ids)].copy()

for query_id in tqdm_notebook(query_ids):
    # Get query specific ranking and relevance datasets
    query_subset = model_subset_df[model_subset_df['query_id'] == query_id].copy()
    og_qrels_query_subset = og_qrels_subset_df[og_qrels_subset_df['query_id'] == query_id]
    new_qrels_query_subset = new_qrels_df[new_qrels_df['query_id'] == query_id]
    
    top_n_ranking = get_top_n_ranking(query_subset,'%s_rank'%(model),N)
    
    og_relevant_passages = get_passage_ids(og_qrels_query_subset)
    new_relevant_passages = get_passage_ids(new_qrels_query_subset)
    
    og_mfr += compute_mfr(og_relevant_passages,top_n_ranking,model,N)
    new_mfr += compute_mfr(new_relevant_passages,top_n_ranking,model,N)
    
og_mfr = round((og_mfr/len(query_ids)),2)
new_mfr = round((new_mfr/len(query_ids)),2)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




In [14]:
print(og_mfr)

3.44


In [15]:
print(new_mfr)

1.26


In [16]:
print(len(query_ids))

43


# Evaluate BERT

## Compute MFR (threshold = 2)

In [17]:
# Irrelevant < 2; relevant >= 2
binary_threshold = 2

model = "bert"

model_df = models_dict[model].copy()

# Get new relevance dataset
new_qrels_df = new_qrels_dict["threshold=%s"%(binary_threshold)].copy()

# Original MRR and new MRR set to zero
# Original == ms marco relevance dataset; new == new relevance dataset
og_mfr = 0.0
new_mfr = 0.0

# The new relevance dataset contains less query ids than the original msmarco dataset.
# So get these query ids and use them to create subsets of the original dataset and the bm25 output.
query_ids = get_query_ids(new_qrels_df)
og_qrels_subset_df = og_qrels_df[og_qrels_df['query_id'].isin(query_ids)].copy()
model_subset_df = model_df[model_df['query_id'].isin(query_ids)].copy()

for query_id in tqdm_notebook(query_ids):
    # Get query specific ranking and relevance datasets
    query_subset = model_subset_df[model_subset_df['query_id'] == query_id].copy()
    og_qrels_query_subset = og_qrels_subset_df[og_qrels_subset_df['query_id'] == query_id]
    new_qrels_query_subset = new_qrels_df[new_qrels_df['query_id'] == query_id]
    
    top_n_ranking = get_top_n_ranking(query_subset,'%s_rank'%(model),N)
    
    og_relevant_passages = get_passage_ids(og_qrels_query_subset)
    new_relevant_passages = get_passage_ids(new_qrels_query_subset)
    
    og_mfr += compute_mfr(og_relevant_passages,top_n_ranking,model,N)
    new_mfr += compute_mfr(new_relevant_passages,top_n_ranking,model,N)
    
og_mfr = round((og_mfr/len(query_ids)),2)
new_mfr = round((new_mfr/len(query_ids)),2)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




In [18]:
print(og_mfr)

2.21


In [19]:
print(new_mfr)

1.28


In [20]:
print(len(query_ids))

43


## Compute MFR (threshold = 3)

In [21]:
# Irrelevant < 3; relevant >= 3
binary_threshold = 3

model = "bert"

model_df = models_dict[model].copy()

# Get new relevance dataset
new_qrels_df = new_qrels_dict["threshold=%s"%(binary_threshold)].copy()

# Original MRR and new MRR set to zero
# Original == ms marco relevance dataset; new == new relevance dataset
og_mfr = 0.0
new_mfr = 0.0

# The new relevance dataset contains less query ids than the original msmarco dataset.
# So get these query ids and use them to create subsets of the original dataset and the bm25 output.
query_ids = get_query_ids(new_qrels_df)
og_qrels_subset_df = og_qrels_df[og_qrels_df['query_id'].isin(query_ids)].copy()
model_subset_df = model_df[model_df['query_id'].isin(query_ids)].copy()

for query_id in tqdm_notebook(query_ids):
    # Get query specific ranking and relevance datasets
    query_subset = model_subset_df[model_subset_df['query_id'] == query_id].copy()
    og_qrels_query_subset = og_qrels_subset_df[og_qrels_subset_df['query_id'] == query_id]
    new_qrels_query_subset = new_qrels_df[new_qrels_df['query_id'] == query_id]
    
    top_n_ranking = get_top_n_ranking(query_subset,'%s_rank'%(model),N)
    
    og_relevant_passages = get_passage_ids(og_qrels_query_subset)
    new_relevant_passages = get_passage_ids(new_qrels_query_subset)
    
    og_mfr += compute_mfr(og_relevant_passages,top_n_ranking,model,N)
    new_mfr += compute_mfr(new_relevant_passages,top_n_ranking,model,N)
    
og_mfr = round((og_mfr/len(query_ids)),2)
new_mfr = round((new_mfr/len(query_ids)),2)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=43.0), HTML(value='')))




In [22]:
print(og_mfr)

2.21


In [23]:
print(new_mfr)

1.37


In [24]:
print(len(query_ids))

43
