In [10]:
import pandas as pd
import numpy as np
import reco_utils
from reco_utils.evaluation.python_evaluation import (
    precision_at_k,
    recall_at_k,
    ndcg_at_k,
    map_at_k,
    auc 
)

### Version

In [8]:
reco_utils.__version__

'2021.2.9'

### Data preparation

In [9]:
df_pred = pd.read_csv('./preds_full.csv')
df_test = pd.read_csv('./test.csv')

df_pred_new = pd.merge(
    df_pred, 
    df_test.loc[:, ['user_id', 'item_id', 'relevance']], on=['user_id', 'item_id'], how='left'
)

df_pred_new = df_pred_new[df_pred_new.user_id.isin(df_test.user_id)]

df_pred_new.fillna(0, inplace=True)

df_pred_new.relevance.value_counts()

0.0    21307932
4.5       25887
5.0       12924
Name: relevance, dtype: int64

### Evaluation metrics

In [11]:
results = pd.DataFrame()

In [12]:
results.at["Precision@20","value"] = precision_at_k(
    df_test,
    df_pred_new,
    col_user='user_id',
    col_item='item_id',
    col_rating='relevance',
    col_prediction='score',
    k=20,
)

In [13]:
results.at["Recall@20","value"] = recall_at_k(
    df_test,
    df_pred_new,
    col_user='user_id',
    col_item='item_id',
    col_rating='relevance',
    col_prediction='score',
    k=20,
)

In [14]:
results.at["MAP@20","value"] = map_at_k(
    df_test,
    df_pred_new,
    col_user='user_id',
    col_item='item_id',
    col_rating='relevance',
    col_prediction='score',
    k=20,
)

In [15]:
results.at["NDCG@20","value"] = ndcg_at_k(
    df_test,
    df_pred_new,
    col_user='user_id',
    col_item='item_id',
    col_rating='relevance',
    col_prediction='score',
    k=20,
)

In [16]:
df_pred_new.relevance = (df_pred_new.relevance > 0).astype(int)

In [19]:
results.at["AUC","value"] = auc(
    df_pred_new,
    df_pred_new,
    col_user='user_id',
    col_item='item_id',
    col_rating='relevance',
    col_prediction='score'
)

### Results

In [20]:
results

Unnamed: 0,value
Precision@20,0.057519
Recall@20,0.096321
MAP@20,0.03169
NDCG@20,0.093264
AUC,0.686625
