In [1]:
import pandas as pd
import numpy as np
import beta_rec
from beta_rec.utils.evaluation import *

### Version

In [1]:
beta_rec.__version__

'0.2.4'

In [18]:
!pip freeze | grep beta

beta-rec==0.3.2


### Data preparation

In [23]:
df_pred = pd.read_csv('./preds_full.csv')
df_test = pd.read_csv('./test.csv')

df_pred_new = pd.merge(
    df_pred, 
    df_test.loc[:, ['user_id', 'item_id', 'relevance']], 
    on=['user_id', 'item_id'], 
    how='left'
)
df_pred_new = df_pred_new[df_pred_new.user_id.isin(df_test.user_id)]
df_pred_new.fillna(0, inplace=True)
df_pred_new.relevance.value_counts()

0.0    21307932
4.5       25887
5.0       12924
Name: relevance, dtype: int64

In [24]:
df_pred_new.head()

Unnamed: 0,user_id,item_id,score,relevance
0,28238,4993,0.507034,0.0
1,28238,593,0.481357,0.0
2,28238,3147,0.350242,0.0
3,28238,6874,0.346964,0.0
4,28238,32587,0.34174,0.0


In [25]:
df_test.head()

Unnamed: 0,user_id,item_id,rating,timestamp,relevance
0,1256,5378,5.0,2007-10-31 12:18:24,5.0
1,1256,778,4.5,2007-10-31 12:19:51,4.5
2,1256,8376,5.0,2007-10-31 12:30:59,5.0
3,1256,2594,5.0,2007-10-31 12:20:49,5.0
4,1256,30810,4.5,2007-10-31 12:32:24,4.5


### Evaluation metrics

In [36]:
results = pd.DataFrame()

In [38]:
results.at["Precision@20","value"] = precision_at_k(
    df_test,
    df_pred_new,
    col_user='user_id',
    col_item='item_id',
    col_rating='relevance',
    col_prediction='score',
    k=20,
)

In [39]:
results.at["Recall@20","value"] = recall_at_k(
    df_test,
    df_pred_new,
    col_user='user_id',
    col_item='item_id',
    col_rating='relevance',
    col_prediction='score',
    k=20,
)

In [40]:
results.at["MAP@20","value"] = map_at_k(
    df_pred_new,
    df_pred_new,
    col_user='user_id',
    col_item='item_id',
    col_rating='relevance',
    col_prediction='score',
    k=20,
)

In [41]:
results.at["NDCG@20","value"] = ndcg_at_k(
    df_test,
    df_pred_new,
    col_user='user_id',
    col_item='item_id',
    col_rating='relevance',
    col_prediction='score',
    k=20,
)

In [44]:
df_pred_new.relevance = (df_pred_new.relevance > 0).astype(int)

results.at["AUC","value"] = auc(
    df_pred_new,
    df_pred_new,
    col_user='user_id',
    col_item='item_id',
    col_rating='relevance',
    col_prediction='score',
)

### Results

In [45]:
results

Unnamed: 0,value
Precision@20,0.057519
Recall@20,0.096321
MAP@20,0.03169
NDCG@20,0.093264
AUC,0.686625
