## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join
from tqdm.auto import tqdm 
from tqdm import tqdm_notebook
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize 

## Load Data

In [2]:
data_dir = 'data/'

# binary threshold (irrelevant <2; relevant >= 2)
thesis_qrels_threshold2_filename = 'thesis_dataset_binary_threshold2.tsv'

# binary theshold (irrelevant <3; relevant >= 3)
thesis_qrels_threshold3_filename = 'thesis_dataset_binary_threshold3.tsv'

# msmarco relevance file
og_qrels_filename = 'qrels.dev.small.tsv'

# BM25 top 100 ranking
bm25_top100_filename = 'run_development_top100.tsv'

# BERT top 100 ranking
bert_top100_filename = 'bert_thesis_dataset_top100.tsv'

# Experiment query ids
thesis_query_subset = 'experiment_query_subset.tsv'

In [3]:
bm25_df = pd.read_csv(data_dir + bm25_top100_filename,delimiter='\t',encoding='utf-8',header=None)
bm25_df.columns = ['query_id', 'passage_id', 'bm25_rank']

bert_df = pd.read_csv(data_dir + bert_top100_filename,delimiter='\t',encoding='utf-8',header=None)
bert_df.columns = ['query_id', 'passage_id', 'bm25_rank', 'query', 'passage', 'bert_score', 'bert_rank']

og_qrels_df = pd.read_csv(data_dir + og_qrels_filename,delimiter='\t',encoding='utf-8',header=None)
og_qrels_df.columns = ['query_id','label1','passage_id','label2']

new_qrels2_df = pd.read_csv(data_dir + thesis_qrels_threshold2_filename,delimiter='\t',encoding='utf-8',header=None)
new_qrels2_df.columns = ['query_id','label1','passage_id','label2']

new_qrels3_df = pd.read_csv(data_dir + thesis_qrels_threshold3_filename,delimiter='\t',encoding='utf-8',header=None)
new_qrels3_df.columns = ['query_id','label1','passage_id','label2']

query_subset = pd.read_csv(data_dir + thesis_query_subset,delimiter='\t',encoding='utf-8',header=None)
query_subset.columns = ['query_id', 'query']

models_dict = {"bm25": bm25_df, "bert": bert_df}
new_qrels_dict = {"threshold=2": new_qrels2_df, "threshold=3": new_qrels3_df}

In [4]:
experiment_query_ids = list(np.unique(new_qrels2_df['query_id'].values.tolist()))

## Helper Functions

In [5]:
stop_words = set(stopwords.words('english'))

In [6]:
def get_query_ids(dataframe):
    return list(np.unique(dataframe['query_id'].tolist()))

In [7]:
def compute_query_token_occurrences(passage,query):
    passage_tokens = word_tokenize(passage)
    query_tokens = word_tokenize(query)
    filtered_passage = [w for w in passage_tokens if not w in stop_words]
    token_matches = [w for w in filtered_passage if w in query_tokens]
    return len(token_matches)/len(filtered_passage)

In [8]:
def compute_fqt(query_id,model,threshold):
    top_n_ranking = bert_df[(bert_df['query_id'] == query_id) & (bert_df['%s_rank'%(model)] <= threshold)]
    passages = top_n_ranking['passage'].values.tolist()
    query = top_n_ranking['query'].values.tolist()[0]
    fqt = 0
    for passage in passages:
        fqt += compute_query_token_occurrences(passage,query)
    return fqt/threshold

In [9]:
def get_top_n_ranking(dataframe,rank_column,n):
    top_n_ranking = dataframe[dataframe[rank_column] <= n].sort_values(by=[rank_column])
    return top_n_ranking

In [10]:
def get_ranks_relevant_passages(qrels_query_subset,top_n_ranking,model):
    ranks_relevant_items = sorted(qrels_query_subset.merge(top_n_ranking,how='left',on=['query_id','passage_id'])['%s_rank'%(model)].values.tolist())
    cleaned_rank_relevant_items = sorted([int(item) for item in ranks_relevant_items if ~np.isnan(item)])
    return cleaned_rank_relevant_items

In [11]:
def compute_precision(index,rank):
    precision = index/rank
    return precision

In [12]:
def compute_average_precision(sorted_ranks_relevant_items):
    average_precision = 0.0
    if not (len(sorted_ranks_relevant_items) == 0):
        summed_precision = 0.0
        for index,rank in enumerate(sorted_ranks_relevant_items):
            summed_precision += compute_precision(index+1,rank)
        average_precision = summed_precision/len(sorted_ranks_relevant_items)
    return average_precision

## Compute FQT Top 10 ranking

In [13]:
query_ids = get_query_ids(query_subset)
bm25_fqt_dict = {}
bert_fqt_dict = {}
threshold = 10

for query_id in query_ids:
    bm25_fqt_dict[query_id] = compute_fqt(query_id,'bm25',threshold)
    bert_fqt_dict[query_id] = compute_fqt(query_id,'bert',threshold)

## Compute MAP per ranges of FQT

In [14]:
N = 10

In [15]:
intervals = [(0.0,0.1), (0.1,0.15), (0.15,0.2), (0.2, 0.25), (0.25,1.0)]

In [16]:
def compute_group(fqt):
    for index, (low,high) in enumerate(intervals):
        if not index == (len(intervals) + 1):
            if (fqt >= low) & (fqt < high):
                return index
        else:
            if (fqt >= low) & (fqt <= high):
                return index

In [17]:
bm25_groups = [[],[],[],[],[]]
bert_groups = [[],[],[],[],[]]
for query_id in query_ids:
    bm25_fqt = bm25_fqt_dict[query_id]
    bert_fqt = bert_fqt_dict[query_id]
    bm25_groups[compute_group(bm25_fqt)].append(query_id)
    bert_groups[compute_group(bert_fqt)].append(query_id)
bm25_group_counts = [len(group) for group in bm25_groups]
bert_group_counts = [len(group) for group in bert_groups]

In [24]:
bm25_group_counts

[6, 12, 14, 6, 5]

In [25]:
bert_group_counts

[9, 13, 15, 3, 3]

### Compute group MAPs for MS MARCO

In [18]:
bm25_ms_map_per_group = []
bert_ms_map_per_group = []

for bm25_group in bm25_groups:
    query_ids = bm25_group
    
    ap = 0.0
    
    for query_id in query_ids:
        # Get query specific ranking and relevance datasets
        query_subset = bm25_df[bm25_df['query_id'] == query_id].copy()
        qrels_query_subset = og_qrels_df[og_qrels_df['query_id'] == query_id]
    
        top_n_ranking = get_top_n_ranking(query_subset,'bm25_rank',N)
    
        rank_relevant_items = get_ranks_relevant_passages(qrels_query_subset,top_n_ranking,"bm25")
    
        ap += compute_average_precision(rank_relevant_items)
    
    bm25_ms_map_per_group.append(round((ap/len(query_ids)),2))
    
for bert_group in bert_groups:
    query_ids = bert_group
    
    ap = 0.0
    
    for query_id in query_ids:
        # Get query specific ranking and relevance datasets
        query_subset = bert_df[bert_df['query_id'] == query_id].copy()
        qrels_query_subset = og_qrels_df[og_qrels_df['query_id'] == query_id]
    
        top_n_ranking = get_top_n_ranking(query_subset,'bert_rank',N)
    
        rank_relevant_items = get_ranks_relevant_passages(qrels_query_subset,top_n_ranking,"bert")
    
        ap += compute_average_precision(rank_relevant_items)
    
    bert_ms_map_per_group.append(round((ap/len(query_ids)),2))

In [19]:
bm25_ms_map_per_group

[0.16, 0.66, 0.48, 0.43, 0.3]

In [20]:
bert_ms_map_per_group

[0.65, 0.65, 0.78, 0.49, 0.67]

### Compute group MAPs for T=2

In [21]:
bm25_t2_map_per_group = []
bert_t2_map_per_group = []

for bm25_group in bm25_groups:
    query_ids = bm25_group
    
    ap = 0.0
    
    for query_id in query_ids:
        # Get query specific ranking and relevance datasets
        query_subset = bm25_df[bm25_df['query_id'] == query_id].copy()
        qrels_query_subset = new_qrels2_df[new_qrels2_df['query_id'] == query_id]
    
        top_n_ranking = get_top_n_ranking(query_subset,'bm25_rank',N)
    
        rank_relevant_items = get_ranks_relevant_passages(qrels_query_subset,top_n_ranking,"bm25")
    
        ap += compute_average_precision(rank_relevant_items)
    
    bm25_t2_map_per_group.append(round((ap/len(query_ids)),2))
    
for bert_group in bert_groups:
    query_ids = bert_group
    
    ap = 0.0
    
    for query_id in query_ids:
        # Get query specific ranking and relevance datasets
        query_subset = bert_df[bert_df['query_id'] == query_id].copy()
        qrels_query_subset = new_qrels2_df[new_qrels2_df['query_id'] == query_id]
    
        top_n_ranking = get_top_n_ranking(query_subset,'bert_rank',N)
    
        rank_relevant_items = get_ranks_relevant_passages(qrels_query_subset,top_n_ranking,"bert")
    
        ap += compute_average_precision(rank_relevant_items)
    
    bert_t2_map_per_group.append(round((ap/len(query_ids)),2))

In [22]:
bm25_t2_map_per_group

[0.93, 0.89, 0.91, 0.96, 0.95]

In [23]:
bert_t2_map_per_group

[0.86, 0.71, 0.86, 0.61, 0.86]

### Compute group MAPs for T=3

In [26]:
bm25_t3_map_per_group = []
bert_t3_map_per_group = []

for bm25_group in bm25_groups:
    query_ids = bm25_group
    
    ap = 0.0
    
    for query_id in query_ids:
        # Get query specific ranking and relevance datasets
        query_subset = bm25_df[bm25_df['query_id'] == query_id].copy()
        qrels_query_subset = new_qrels3_df[new_qrels3_df['query_id'] == query_id]
    
        top_n_ranking = get_top_n_ranking(query_subset,'bm25_rank',N)
    
        rank_relevant_items = get_ranks_relevant_passages(qrels_query_subset,top_n_ranking,"bm25")
    
        ap += compute_average_precision(rank_relevant_items)
    
    bm25_t3_map_per_group.append(round((ap/len(query_ids)),2))
    
for bert_group in bert_groups:
    query_ids = bert_group
    
    ap = 0.0
    
    for query_id in query_ids:
        # Get query specific ranking and relevance datasets
        query_subset = bert_df[bert_df['query_id'] == query_id].copy()
        qrels_query_subset = new_qrels3_df[new_qrels3_df['query_id'] == query_id]
    
        top_n_ranking = get_top_n_ranking(query_subset,'bert_rank',N)
    
        rank_relevant_items = get_ranks_relevant_passages(qrels_query_subset,top_n_ranking,"bert")
    
        ap += compute_average_precision(rank_relevant_items)
    
    bert_t3_map_per_group.append(round((ap/len(query_ids)),2))

In [27]:
bm25_t3_map_per_group

[0.78, 0.82, 0.77, 0.85, 0.86]

In [28]:
bert_t3_map_per_group

[0.84, 0.7, 0.84, 0.61, 0.74]