In [1]:
import numpy as np
import pandas as pd

In [2]:
# Define the relevant items for each query
relevant_items = [[3, 0, 1, 0, 2, 0, 0, 5, 0, 4], [4, 0, 3, 0, 2, 0, 4, 0, 1, 0]]
cols = ["D"+str(i) for i in range(1, 11)]

items_df = pd.DataFrame(relevant_items, columns=cols)

In [24]:
def calc_metrics(query: np.ndarray, cols: list) -> pd.DataFrame:
    """ Calculate the precision for each query at each document position
    """
    precision = [(query>0)[:i].sum()/len(query[:i]) for i in range(1, len(query)+1)]
    precision = np.array(precision)
    
    # calculate the recall
    recall = [2 * (query>0)[:i].sum()/len(query) for i in range(1, len(query)+1)]
    
    # calculate the interpolated precision based on the recall values
    interpolated_precision = [max(precision[i:]) for i in range(len(precision))]
    
    # wrap the results in a dataframe
    indices = ["Query", "Precision", "Recall", "Interpolated Precision"]
    df = pd.DataFrame(np.vstack((query, precision, recall, interpolated_precision)), 
                      columns=cols, index=indices)

    return df
    
    

In [27]:
q1 = calc_metrics(items_df.iloc[0, :].values, cols) # type: ignore
q1

Unnamed: 0,D1,D2,D3,D4,D5,D6,D7,D8,D9,D10
Query,3.0,0.0,1.0,0.0,2.0,0.0,0.0,5.0,0.0,4.0
Precision,1.0,0.5,0.666667,0.5,0.6,0.5,0.428571,0.5,0.444444,0.5
Recall,0.2,0.2,0.4,0.4,0.6,0.6,0.6,0.8,0.8,1.0
Interpolated Precision,1.0,0.666667,0.666667,0.6,0.6,0.5,0.5,0.5,0.5,0.5


In [30]:
q2 = calc_metrics(items_df.iloc[1, :].values, cols) # type: ignore
q2

Unnamed: 0,D1,D2,D3,D4,D5,D6,D7,D8,D9,D10
Query,4.0,0.0,3.0,0.0,2.0,0.0,4.0,0.0,1.0,0.0
Precision,1.0,0.5,0.666667,0.5,0.6,0.5,0.571429,0.5,0.555556,0.5
Recall,0.2,0.2,0.4,0.4,0.6,0.6,0.8,0.8,1.0,1.0
Interpolated Precision,1.0,0.666667,0.666667,0.6,0.6,0.571429,0.571429,0.555556,0.555556,0.5


In [35]:
# Calculate the average precision for each query
q1_rel_idx = q1.loc["Query"]>0
q1_rel_precision = q1.loc["Precision"][q1_rel_idx]
q1_avg_precision = q1_rel_precision.sum()/len(q1_rel_precision)
q1_avg_precision

0.6533333333333333

In [36]:
# Calculate the average precision for each query
q2_rel_idx = q2.loc["Query"]>0
q2_rel_precision = q2.loc["Precision"][q2_rel_idx]
q2_avg_precision = q2_rel_precision.sum()/len(q2_rel_precision)
q2_avg_precision

0.6787301587301586

In [37]:
map = (q1_avg_precision + q2_avg_precision)/2
map

0.666031746031746

In [43]:
d = 3 + 1/np.log2(3) + 2/np.log2(5) + 5/np.log2(8) + 4/np.log2(10)

In [44]:
idcg = 1 / np.log2(5) + 2 / np.log2(4) + 3 / np.log2(3) + 4/np.log2(2) + 5

In [45]:
d/idcg

0.5974836646859079