In [1]:
import elliot
elliot.__version__

'0.1'

In [2]:
from elliot.evaluation.metrics import *

In [3]:
from elliot.evaluation.relevance import Relevance

In [4]:
from types import SimpleNamespace

In [5]:
import pandas as pd
import numpy as np

In [6]:
df_pred = pd.read_csv('./pred.csv')
df_test = pd.read_csv('./test.csv')

In [7]:
df_pred_new = pd.merge(
    df_pred, 
    df_test.loc[:, ['user_id', 'item_id', 'relevance']], on=['user_id', 'item_id'], how='left'
)

df_pred_new = df_pred_new[df_pred_new.user_id.isin(df_test.user_id)]

df_pred_new.fillna(0, inplace=True)

df_pred_new.loc[df_pred_new.relevance != 0, 'relevance'] = 1

df_pred_new.relevance.value_counts()

0.0    51516
1.0     3144
Name: relevance, dtype: int64

In [8]:
df_test = df_test[df_test.user_id.isin(df_pred.user_id)]

In [9]:
df_pred_new

Unnamed: 0,user_id,item_id,score,relevance
40,3,590,0.280305,0.0
41,3,1136,0.263707,0.0
42,3,920,0.231234,0.0
43,3,1246,0.188023,0.0
44,3,318,0.187111,0.0
...,...,...,...,...
1179555,71534,1304,0.088851,0.0
1179556,71534,778,0.085875,0.0
1179557,71534,1307,0.082908,0.0
1179558,71534,1258,0.082717,0.0


In [10]:
df_test

Unnamed: 0,user_id,item_id,rating,timestamp,relevance
0,1256,5378,5.0,2007-10-31 12:18:24,5.0
1,1256,778,4.5,2007-10-31 12:19:51,4.5
2,1256,8376,5.0,2007-10-31 12:30:59,5.0
3,1256,2594,5.0,2007-10-31 12:20:49,5.0
4,1256,30810,4.5,2007-10-31 12:32:24,4.5
...,...,...,...,...,...
38806,68571,112,5.0,2007-01-28 01:37:59,5.0
38807,68571,7256,5.0,2007-01-28 01:36:16,5.0
38808,68571,34405,4.5,2007-01-28 01:13:53,4.5
38809,68571,2618,5.0,2007-01-28 01:43:19,5.0


In [11]:
recommendations = {}

for user, item_id, score in zip(df_pred_new.user_id, df_pred_new.item_id, df_pred_new.score):

    if user in recommendations:
        recommendations[user] += [(item_id, score)]
    else:
        recommendations[user] = [(item_id, score)] 

In [12]:
gt_recommendations = {}

for user, item_id, score in zip(df_test.user_id, df_test.item_id, df_test.relevance):
    
#     if score > 0:

    if user in gt_recommendations:
        gt_recommendations[user][item_id] = score
    else:
        gt_recommendations[user] = {item_id: score}

In [13]:
Precision(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

0.057519209659714604

In [14]:
Recall(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

0.09632131280310642

In [15]:
MAP(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

0.07298532565140084

In [16]:
MRR(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

0.18632413351027097

In [17]:
HR(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

0.47457006952067327

In [18]:
NDCG(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff = 20
          )
).eval()

0.08988414852701611

In [19]:
gt_recommendations = {}

for user, item_id, score in zip(df_pred_new.user_id, df_pred_new.item_id, df_pred_new.relevance):
    if user in gt_recommendations:
        gt_recommendations[user][item_id] = score
    else:
        gt_recommendations[user] = {item_id: score}

In [20]:
AUC(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff=None,
              num_items=19,
              data=SimpleNamespace(train_dict={x:[] for x in recommendations.keys()})
          )
).eval()

0.6023404513183068

In [21]:
GAUC(recommendations, None, None, 
          eval_objects = SimpleNamespace(
              relevance=Relevance(gt_recommendations, 0.5),
              cutoff=20,
              num_items=20,
              data=SimpleNamespace(train_dict={x:[] for x in recommendations.keys()})
          )
).eval()

0.6187365226601701

In [22]:
LAUC(
    recommendations, 
    SimpleNamespace(config_test=False), 
    None, 
    eval_objects = SimpleNamespace(
      relevance=Relevance(gt_recommendations, 0.5),
      cutoff = 20,
      num_items=19,
      data=SimpleNamespace(train_dict={x:[] for x in recommendations.keys()})
    )
).eval()

0.5966968658330363