In [1]:
import pandas as pd
import math
import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv('recs_synthetic.csv')
data['rnk'] = data.groupby('user_id').cumcount()
data.head()

Unnamed: 0,user_id,score,target,total_actions,rnk
0,0,0.993668,1,17,0
1,0,0.896899,0,17,1
2,0,0.875116,1,17,2
3,0,0.844549,0,17,3
4,0,0.837955,0,17,4


In [3]:
def precision_at_k(df, k):
    df['pred'] = df['rnk'].apply(lambda x: 1 if x < k else 0)
    df['tp'] = df[['target', 'pred']].apply(lambda x: 1 if x[0]==x[1]==1 else 0, axis=1)
    df['fp'] = df[['target', 'pred']].apply(lambda x: 1 if x[0]!=x[1] and x[0]==0 else 0, axis=1)
    return df['tp'].sum()/(df['tp'].sum()+df['fp'].sum())

def recall_at_k(df, k):
    df['pred'] = df['rnk'].apply(lambda x: 1 if x < k else 0)
    df['tp'] = df[['target', 'pred']].apply(lambda x: 1 if x[0]==x[1]==1 else 0, axis=1)
    agg_df = df[['user_id', 'tp', 'total_actions']].groupby('user_id').agg({'tp': 'sum', 'total_actions': 'max'})
    agg_df['recall'] = agg_df['tp']/agg_df['total_actions']
    return agg_df['recall'].sum()/agg_df.shape[0]

In [4]:
def map_at_k(df, k):
    df = df[df['rnk'] < k]
    df['target_cumsum'] = df[['user_id', 'target']].groupby('user_id').cumsum()
    df['for_running_sum'] = df['target_cumsum']/(df['rnk']+1)
    agg_df = df[df['target']==1][['user_id', 'for_running_sum']]\
                    .groupby('user_id')\
                    .agg({'for_running_sum':['sum', 'count']}).reset_index()
    agg_df.columns = ['user_id', 'sum', 'count']
    agg_df['ap'] = agg_df['sum']/agg_df['count']
    return agg_df['ap'].sum()/agg_df.shape[0]

In [40]:
def ndcg_at_k(df, k):
    df = df[df['rnk'] < k]
    df['ideal_target'] = [1]*df.shape[0]
    df['dcg_num'] = 2**df['target']-1
    df['idcg_num'] = 2**df['ideal_target']-1
    df['dcg_denom'] = df['rnk'].apply(lambda x: math.log(x + 2, 2))
    df['dcg_i'] = df['dcg_num']/df['dcg_denom']
    df['idcg_i'] = df['idcg_num']/df['dcg_denom']
    dcg_df = df[['user_id', 'dcg_i', 'idcg_i']].groupby('user_id').sum()
    dcg_df['ndcg'] = dcg_df['dcg_i']/dcg_df['idcg_i']
    return dcg_df['ndcg'].sum()/dcg_df.shape[0]

In [6]:
def mrr(df):
    df = df[df['target']==1]
    agg_df = df[['user_id', 'rnk']].groupby('user_id').min()
    agg_df['tmp'] = 1/(agg_df['rnk']+1)
    return agg_df.tmp.sum()/agg_df.shape[0]

In [7]:
print(f'precision@8: {precision_at_k(df=data.copy(), k=8)}')
print(f'precision@30: {precision_at_k(df=data.copy(), k=30)}')

precision@8: 0.323125
precision@30: 0.33303333333333335


In [8]:
print(f'recall@8: {recall_at_k(df=data.copy(), k=8)}')
print(f'recall@30: {recall_at_k(df=data.copy(), k=30)}')

recall@8: 0.15214328945706182
recall@30: 0.5879062358080964


In [9]:
print(f'MAP@8: {map_at_k(df=data.copy(), k=8)}')
print(f'MAP@30: {map_at_k(df=data.copy(), k=30)}')

MAP@8: 0.4974598303779759
MAP@30: 0.39767171242806565


In [10]:
print(f'MRR: {mrr(df=data.copy())}')

MRR: 0.5359987465750623


In [41]:
ndcg_at_k(df=data.copy(), k=8), ndcg_at_k(df=data.copy(), k=30)

(0.322230578288455, 0.33069498154567506)