This notebook will help to compute the MAP for the BM25 ranking with hits=1000 to check if I achieve the same MAP score as with the anserini msmarco_eval script.

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join
from tqdm.auto import tqdm 
from tqdm import tqdm_notebook

## Load Data

In [2]:
data_dir = 'data/'

# msmarco relevance file
og_qrels_filename = 'qrels.dev.small.tsv'

# BM25 top 1000 ranking
bm25_top1000_filename = 'run_development_small_top1000.15.5.2020.tsv'

In [4]:
bm25_df = pd.read_csv(data_dir + bm25_top1000_filename,delimiter='\t',encoding='utf-8',header=None)
bm25_df.columns = ['query_id', 'passage_id', 'bm25_rank']

og_qrels_df = pd.read_csv(data_dir + og_qrels_filename,delimiter='\t',encoding='utf-8',header=None)
og_qrels_df.columns = ['query_id','label1','passage_id','label2']

## Helper Functions

In [5]:
def get_query_ids(dataframe):
    return list(np.unique(dataframe['query_id'].tolist()))

In [6]:
def get_top_n_ranking(dataframe,rank_column,n):
    top_n_ranking = dataframe[dataframe[rank_column] <= n].sort_values(by=[rank_column])
    return top_n_ranking

In [7]:
def get_ranks_relevant_passages(qrels_query_subset,top_n_ranking,model):
    ranks_relevant_items = sorted(qrels_query_subset.merge(top_n_ranking,how='left',on=['query_id','passage_id'])['%s_rank'%(model)].values.tolist())
    cleaned_rank_relevant_items = sorted([int(item) for item in ranks_relevant_items if ~np.isnan(item)])
    return cleaned_rank_relevant_items

In [8]:
def compute_precision(index,rank):
    precision = index/rank
    return precision

In [9]:
def compute_average_precision(sorted_ranks_relevant_items):
    average_precision = 0.0
    if not (len(sorted_ranks_relevant_items) == 0):
        summed_precision = 0.0
        for index,rank in enumerate(sorted_ranks_relevant_items):
            summed_precision += compute_precision(index+1,rank)
        average_precision = summed_precision/len(sorted_ranks_relevant_items)
    return average_precision

## Settings

In [10]:
N = 1000

In [24]:
model = "bm25"

og_map = 0.0

# The new relevance dataset contains less query ids than the original msmarco dataset.
# So get these query ids and use them to create subsets of the original dataset and the bm25 output.
query_ids = get_query_ids(og_qrels_df)
model_subset_df = bm25_df[bm25_df['query_id'].isin(query_ids)].copy()

for query_id in tqdm_notebook(query_ids):
    # Get query specific ranking and relevance datasets
    query_subset = model_subset_df[model_subset_df['query_id'] == query_id].copy()
    og_qrels_query_subset = og_qrels_df[og_qrels_df['query_id'] == query_id]
    
    top_n_ranking = get_top_n_ranking(query_subset,'%s_rank'%(model),N)
    
    og_ranks_relevant_items = get_ranks_relevant_passages(og_qrels_query_subset,top_n_ranking,model)
    
    og_map += compute_average_precision(og_ranks_relevant_items)
    
og_map = round((og_map/len(query_ids))*100,2)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  # Remove the CWD from sys.path while we load stuff.


HBox(children=(FloatProgress(value=0.0, max=6980.0), HTML(value='')))




In [25]:
og_map

19.6

I achieve the same result as with trec_eval, which validates my implementation of MAP.