In [1]:
import elliot
elliot.__version__

'0.1'

In [2]:
from elliot.evaluation.metrics import *

In [3]:
from elliot.evaluation.relevance import Relevance

In [4]:
from types import SimpleNamespace

In [5]:
import pandas as pd
import numpy as np

In [6]:
df_pred = pd.read_csv('./preds_full.csv')
df_test = pd.read_csv('./test.csv')
df_train = pd.read_csv('./train.csv')

In [7]:
NUM_ITEMS = len(df_train.item_id.unique())

In [8]:
NUM_ITEMS

7895

In [9]:
train_dict = df_train.groupby('user_id').item_id.apply(list).to_dict()

In [10]:
df_pred_new = pd.merge(
    df_pred, 
    df_test.loc[:, ['user_id', 'item_id', 'relevance']], on=['user_id', 'item_id'], how='left'
)

df_pred_new = df_pred_new[df_pred_new.user_id.isin(df_test.user_id)]

df_pred_new.fillna(0, inplace=True)

df_pred_new.relevance.value_counts()

0.0    21307932
4.5       25887
5.0       12924
Name: relevance, dtype: int64

In [11]:
df_test = df_test[df_test.user_id.isin(df_pred.user_id)]

In [12]:
recommendations = {}

for user, item_id, score in zip(df_pred_new.user_id, df_pred_new.item_id, df_pred_new.score):

    if user in recommendations:
        recommendations[user] += [(item_id, score)]
    else:
        recommendations[user] = [(item_id, score)] 

In [13]:
gt_recommendations = {}

for user, item_id, score in zip(df_test.user_id, df_test.item_id, df_test.relevance):

    if user in gt_recommendations:
        gt_recommendations[user][item_id] = score
    else:
        gt_recommendations[user] = {item_id: score}

In [14]:
Precision(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

0.0575192096597146

In [15]:
Recall(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

0.09632131280310641

In [16]:
MAP(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

0.07298532565140084

In [17]:
MRR(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

0.18632413351027094

In [18]:
HR(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

0.47457006952067327

In [19]:
NDCG(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 1),
              cutoff = 20
          )
).eval()

0.08984914516171721

In [20]:
AUC(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff=None,
              num_items=NUM_ITEMS,
              data=SimpleNamespace(train_dict=train_dict)
          )
).eval()

0.6881719788628613

In [21]:
GAUC(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff=20,
              num_items=NUM_ITEMS,
              data=SimpleNamespace(train_dict=train_dict)
          )
).eval()

0.7046229461413768

In [22]:
LAUC(
    recommendations, 
    SimpleNamespace(config_test=False), 
    None, 
    eval_objects = SimpleNamespace(
      relevance=Relevance(gt_recommendations, 0.5),
      cutoff = 20,
      num_items=NUM_ITEMS,
      data=SimpleNamespace(train_dict=train_dict)
    )
).eval()

0.11189951786245314