## Import Libraries

In [1]:
import pandas as pd
import json
import numpy as np
from collections import Counter

## Load Data

In [28]:
data_dir = 'data/'

query_subset_filename = 'queries.dev.small.tsv'

# binary threshold (irrelevant <2; relevant >= 2)
thesis_qrels_threshold2_filename = 'thesis_dataset_binary_threshold2.tsv'

# binary theshold (irrelevant <3; relevant >= 3)
thesis_qrels_threshold3_filename = 'thesis_dataset_binary_threshold3.tsv'

# msmarco relevance file
og_qrels_filename = 'qrels.dev.small.tsv'

# BM25 top 100 ranking
bm25_top100_filename = 'run_development_top100.tsv'

# BERT top 100 ranking
bert_top100_filename = 'bert_thesis_dataset_top100.tsv'

# Experiment query ids
thesis_query_subset = 'experiment_query_subset.tsv'

In [29]:
bm25_df = pd.read_csv(data_dir + bm25_top100_filename,delimiter='\t',encoding='utf-8',header=None)
bm25_df.columns = ['query_id', 'passage_id', 'bm25_rank']

bert_df = pd.read_csv(data_dir + bert_top100_filename,delimiter='\t',encoding='utf-8',header=None)
bert_df.columns = ['query_id', 'passage_id', 'bm25_rank', 'query', 'passage', 'bert_score', 'bert_rank']

og_qrels_df = pd.read_csv(data_dir + og_qrels_filename,delimiter='\t',encoding='utf-8',header=None)
og_qrels_df.columns = ['query_id','label1','passage_id','label2']

new_qrels2_df = pd.read_csv(data_dir + thesis_qrels_threshold2_filename,delimiter='\t',encoding='utf-8',header=None)
new_qrels2_df.columns = ['query_id','label1','passage_id','label2']

new_qrels3_df = pd.read_csv(data_dir + thesis_qrels_threshold3_filename,delimiter='\t',encoding='utf-8',header=None)
new_qrels3_df.columns = ['query_id','label1','passage_id','label2']

query_subset = pd.read_csv(data_dir + thesis_query_subset,delimiter='\t',encoding='utf-8',header=None)
query_subset.columns = ['query_id', 'query']

models_dict = {"bm25": bm25_df, "bert": bert_df}
new_qrels_dict = {"threshold=2": new_qrels2_df, "threshold=3": new_qrels3_df}

query_subset = pd.read_csv(data_dir + query_subset_filename,delimiter='\t',encoding='utf-8', header=None)
query_subset.columns = ['query_id', 'query']

In [4]:
firebase_answer_types = {}
counter_answer_types = 0
counter_no_answer_types = 0
with open("data/firebase_answer_types.txt", "r") as infile:
    for line in infile:
        if("\t" in line.rstrip()):
            counter_answer_types += 1
            [query_text,answer_type] = line.rstrip().split("\t")
            firebase_answer_types[query_text] = answer_type.split(":")[0]
        else:
            if not line.rstrip() == "":
                counter_no_answer_types += 1
                firebase_answer_types[line.rstrip()] = ""

In [5]:
with open("data/manual_answer_types.txt", "r", encoding='utf-8') as infile:
    for line in infile:
        [query_text,answer_type] = line.rstrip().split("\t")
        firebase_answer_types[query_text] = answer_type

In [11]:
answer_types = list(np.unique(list(firebase_answer_types.values())))

## helper functions

In [20]:
def get_query_ids(dataframe):
    return list(np.unique(dataframe['query_id'].tolist()))

In [35]:
def get_top_n_ranking(dataframe,rank_column,n):
    top_n_ranking = dataframe[dataframe[rank_column] <= n].sort_values(by=[rank_column])
    return top_n_ranking

In [34]:
def get_passage_ids(dataframe):
    relevant_passages = dataframe['passage_id'].values.tolist()
    return relevant_passages

In [33]:
def compute_mrr(gt,ranking,model,n):
    score = 0.0
    best_rank = n+1
    for index, row in ranking.iterrows():
        current_rank = row['%s_rank'%(model)]
        if row['passage_id'] in gt:
            if current_rank < best_rank:
                score = 1.0 / (row['%s_rank'%(model)])
                best_rank = current_rank
    return score

## Compute Dict for answer types and query ids

In [21]:
query_ids = get_query_ids(query_subset)

In [31]:
answer_type_query_id_dict = {}
for query_id in query_ids:
    query_text = query_subset[query_subset['query_id'] == query_id]['query'].values.tolist()[0]
    answer_type = firebase_answer_types[query_text]
    if not answer_type in answer_type_query_id_dict.keys():
        answer_type_query_id_dict[answer_type] = [query_id]
    else:
        answer_type_query_id_dict[answer_type].append(query_id)

In [52]:
for answer_type in answer_types:
    print(answer_type)
    print(len(answer_type_query_id_dict[answer_type]))

DESC
26
ENTY
1
HUM
3
LOC
1
NUM
12


## Compute Average MRR per answer type

In [36]:
N = 10

In [39]:
# Irrelevant < 2; relevant >= 2
binary_threshold = 2

model = "bm25"

model_df = models_dict[model].copy()

# Get new relevance dataset
new_qrels_df = new_qrels_dict["threshold=%s"%(binary_threshold)].copy()

og_mrr_answer_type = {}
new_mrr_answer_type = {}

for answer_type in answer_types:
    query_ids = answer_type_query_id_dict[answer_type]
    og_qrels_subset_df = og_qrels_df[og_qrels_df['query_id'].isin(query_ids)].copy()
    model_subset_df = model_df[model_df['query_id'].isin(query_ids)].copy()
    
    og_mrr = 0.0
    new_mrr = 0.0
    
    for query_id in query_ids:
        # Get query specific ranking and relevance datasets
        query_subset = model_subset_df[model_subset_df['query_id'] == query_id].copy()
        og_qrels_query_subset = og_qrels_subset_df[og_qrels_subset_df['query_id'] == query_id]
        new_qrels_query_subset = new_qrels_df[new_qrels_df['query_id'] == query_id]
    
        top_n_ranking = get_top_n_ranking(query_subset,'%s_rank'%(model),N)
    
        og_relevant_passages = get_passage_ids(og_qrels_query_subset)
        new_relevant_passages = get_passage_ids(new_qrels_query_subset)
    
        og_mrr += compute_mrr(og_relevant_passages,top_n_ranking,model,N)
        new_mrr += compute_mrr(new_relevant_passages,top_n_ranking,model,N)
    
    og_mrr_answer_type[answer_type] = round((og_mrr/len(query_ids)),2)
    new_mrr_answer_type[answer_type] = round((new_mrr/len(query_ids)),2)

In [40]:
og_mrr_answer_type

{'DESC': 0.45, 'ENTY': 1.0, 'HUM': 0.67, 'LOC': 0.5, 'NUM': 0.36}

In [41]:
new_mrr_answer_type

{'DESC': 0.96, 'ENTY': 1.0, 'HUM': 1.0, 'LOC': 1.0, 'NUM': 1.0}

In [42]:
# Irrelevant < 2; relevant >= 2
binary_threshold = 2

model = "bert"

model_df = models_dict[model].copy()

# Get new relevance dataset
new_qrels_df = new_qrels_dict["threshold=%s"%(binary_threshold)].copy()

og_mrr_answer_type = {}
new_mrr_answer_type = {}

for answer_type in answer_types:
    query_ids = answer_type_query_id_dict[answer_type]
    og_qrels_subset_df = og_qrels_df[og_qrels_df['query_id'].isin(query_ids)].copy()
    model_subset_df = model_df[model_df['query_id'].isin(query_ids)].copy()
    
    og_mrr = 0.0
    new_mrr = 0.0
    
    for query_id in query_ids:
        # Get query specific ranking and relevance datasets
        query_subset = model_subset_df[model_subset_df['query_id'] == query_id].copy()
        og_qrels_query_subset = og_qrels_subset_df[og_qrels_subset_df['query_id'] == query_id]
        new_qrels_query_subset = new_qrels_df[new_qrels_df['query_id'] == query_id]
    
        top_n_ranking = get_top_n_ranking(query_subset,'%s_rank'%(model),N)
    
        og_relevant_passages = get_passage_ids(og_qrels_query_subset)
        new_relevant_passages = get_passage_ids(new_qrels_query_subset)
    
        og_mrr += compute_mrr(og_relevant_passages,top_n_ranking,model,N)
        new_mrr += compute_mrr(new_relevant_passages,top_n_ranking,model,N)
    
    og_mrr_answer_type[answer_type] = round((og_mrr/len(query_ids)),2)
    new_mrr_answer_type[answer_type] = round((new_mrr/len(query_ids)),2)

In [43]:
og_mrr_answer_type

{'DESC': 0.71, 'ENTY': 0.5, 'HUM': 0.78, 'LOC': 0.5, 'NUM': 0.64}

In [44]:
new_mrr_answer_type

{'DESC': 0.98, 'ENTY': 1.0, 'HUM': 0.78, 'LOC': 1.0, 'NUM': 0.79}

In [45]:
# Irrelevant < 3; relevant >= 3
binary_threshold = 3

model = "bm25"

model_df = models_dict[model].copy()

# Get new relevance dataset
new_qrels_df = new_qrels_dict["threshold=%s"%(binary_threshold)].copy()

og_mrr_answer_type = {}
new_mrr_answer_type = {}

for answer_type in answer_types:
    query_ids = answer_type_query_id_dict[answer_type]
    og_qrels_subset_df = og_qrels_df[og_qrels_df['query_id'].isin(query_ids)].copy()
    model_subset_df = model_df[model_df['query_id'].isin(query_ids)].copy()
    
    og_mrr = 0.0
    new_mrr = 0.0
    
    for query_id in query_ids:
        # Get query specific ranking and relevance datasets
        query_subset = model_subset_df[model_subset_df['query_id'] == query_id].copy()
        og_qrels_query_subset = og_qrels_subset_df[og_qrels_subset_df['query_id'] == query_id]
        new_qrels_query_subset = new_qrels_df[new_qrels_df['query_id'] == query_id]
    
        top_n_ranking = get_top_n_ranking(query_subset,'%s_rank'%(model),N)
    
        og_relevant_passages = get_passage_ids(og_qrels_query_subset)
        new_relevant_passages = get_passage_ids(new_qrels_query_subset)
    
        og_mrr += compute_mrr(og_relevant_passages,top_n_ranking,model,N)
        new_mrr += compute_mrr(new_relevant_passages,top_n_ranking,model,N)
    
    og_mrr_answer_type[answer_type] = round((og_mrr/len(query_ids)),2)
    new_mrr_answer_type[answer_type] = round((new_mrr/len(query_ids)),2)

In [46]:
og_mrr_answer_type

{'DESC': 0.45, 'ENTY': 1.0, 'HUM': 0.67, 'LOC': 0.5, 'NUM': 0.36}

In [47]:
new_mrr_answer_type

{'DESC': 0.92, 'ENTY': 1.0, 'HUM': 1.0, 'LOC': 1.0, 'NUM': 0.83}

In [48]:
# Irrelevant < 3; relevant >= 3
binary_threshold = 3

model = "bert"

model_df = models_dict[model].copy()

# Get new relevance dataset
new_qrels_df = new_qrels_dict["threshold=%s"%(binary_threshold)].copy()

og_mrr_answer_type = {}
new_mrr_answer_type = {}

for answer_type in answer_types:
    query_ids = answer_type_query_id_dict[answer_type]
    og_qrels_subset_df = og_qrels_df[og_qrels_df['query_id'].isin(query_ids)].copy()
    model_subset_df = model_df[model_df['query_id'].isin(query_ids)].copy()
    
    og_mrr = 0.0
    new_mrr = 0.0
    
    for query_id in query_ids:
        # Get query specific ranking and relevance datasets
        query_subset = model_subset_df[model_subset_df['query_id'] == query_id].copy()
        og_qrels_query_subset = og_qrels_subset_df[og_qrels_subset_df['query_id'] == query_id]
        new_qrels_query_subset = new_qrels_df[new_qrels_df['query_id'] == query_id]
    
        top_n_ranking = get_top_n_ranking(query_subset,'%s_rank'%(model),N)
    
        og_relevant_passages = get_passage_ids(og_qrels_query_subset)
        new_relevant_passages = get_passage_ids(new_qrels_query_subset)
    
        og_mrr += compute_mrr(og_relevant_passages,top_n_ranking,model,N)
        new_mrr += compute_mrr(new_relevant_passages,top_n_ranking,model,N)
    
    og_mrr_answer_type[answer_type] = round((og_mrr/len(query_ids)),2)
    new_mrr_answer_type[answer_type] = round((new_mrr/len(query_ids)),2)

In [49]:
og_mrr_answer_type

{'DESC': 0.71, 'ENTY': 0.5, 'HUM': 0.78, 'LOC': 0.5, 'NUM': 0.64}

In [50]:
new_mrr_answer_type 

{'DESC': 0.94, 'ENTY': 1.0, 'HUM': 0.78, 'LOC': 1.0, 'NUM': 0.77}