In [8]:
import pandas as pd
import numpy as np
import elliot
from elliot.evaluation.metrics import 
from elliot.evaluation.relevance import Relevance
from types import SimpleNamespace

### Version

In [9]:
elliot.__version__

'0.3.1'

### Data preparation

In [10]:
df_pred = pd.read_csv('./preds_full.csv')
df_test = pd.read_csv('./test.csv')
df_train = pd.read_csv('./train.csv')

In [11]:
NUM_ITEMS = len(df_train.item_id.unique())

In [12]:
train_dict = df_train.groupby('user_id').item_id.apply(list).to_dict()

In [14]:
df_test = df_test[df_test.user_id.isin(df_pred.user_id)]

In [13]:
recommendations = {}

for user, item_id, score in zip(df_pred.user_id, df_pred.item_id, df_pred.score):

    if user in recommendations:
        recommendations[user] += [(item_id, score)]
    else:
        recommendations[user] = [(item_id, score)] 

In [15]:
gt_recommendations = {}

for user, item_id, score in zip(df_test.user_id, df_test.item_id, df_test.relevance):

    if user in gt_recommendations:
        gt_recommendations[user][item_id] = score
    else:
        gt_recommendations[user] = {item_id: score}

### Evaluation metrics

In [16]:
results = pd.DataFrame()

In [17]:
results.at["Precision@20","value"] = Precision(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

In [18]:
results.at["Recall@20","value"] = Recall(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

In [19]:
results.at["MAP@20","value"] = MAP(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

In [20]:
results.at["MRR@20","value"] = MRR(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

In [21]:
results.at["HitRate@20","value"] = HR(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

In [26]:
results.at["NDCG20","value"] = nDCG(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 1),
              cutoff = 20
          )
).eval()

In [23]:
results.at["AUC","value"] = AUC(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff=None,
              num_items=NUM_ITEMS,
              data=SimpleNamespace(train_dict=train_dict)
          )
).eval()

In [24]:
results.at["GAUC","value"] = GAUC(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff=20,
              num_items=NUM_ITEMS,
              data=SimpleNamespace(train_dict=train_dict)
          )
).eval()

In [25]:
results.at["LAUC@20","value"] = LAUC(
    recommendations, 
    SimpleNamespace(config_test=False), 
    None, 
    eval_objects = SimpleNamespace(
      relevance=Relevance(gt_recommendations, 0.5),
      cutoff = 20,
      num_items=NUM_ITEMS,
      data=SimpleNamespace(train_dict=train_dict)
    )
).eval()

### Results

In [27]:
results

Unnamed: 0,value
Precision@20,0.057519
Recall@20,0.096321
MAP@20,0.072985
MRR@20,0.186324
HitRate@20,0.47457
AUC,0.688172
GAUC,0.704623
LAUC@20,0.1119
NDCG20,0.089849
