In [1]:
import pandas as pd
import numpy as np
import openrec
import tensorflow as tf
from openrec.tf1.utils.evaluators import Precision
from openrec.tf2.metrics.ranking_metrics import Recall, NDCG, AUC

### Version

In [2]:
!pip freeze | grep openrec

openrec==0.3.0


In [3]:
tf.__version__

'2.5.0'

### Data preparation

In [4]:
df_pred = pd.read_csv('./preds_full.csv')
df_test = pd.read_csv('./test.csv')

df_pred_new = pd.merge(
    df_pred, 
    df_test.loc[:, ['user_id', 'item_id', 'relevance']], on=['user_id', 'item_id'], how='left'
)

df_pred_new = df_pred_new[df_pred_new.user_id.isin(df_test.user_id)]

df_pred_new.fillna(0, inplace=True)

df_pred_new.relevance.value_counts()

0.0    21307932
4.5       25887
5.0       12924
Name: relevance, dtype: int64

In [5]:
all_items = np.unique(np.hstack(
    [df_pred_new.item_id.unique(), df_test.item_id.unique()]
))

In [6]:
all_users = df_test.user_id.unique()

In [7]:
user_dict = dict(zip(all_users, range(len(all_users))))
item_dict = dict(zip(all_items, range(len(all_items))))

In [8]:
user_pred_np = np.zeros((len(all_users), len(all_items)))
user_pos_mask_np = np.zeros((len(all_users), len(all_items)), dtype=bool)

for user_id, item_id, score in zip(df_pred_new.user_id, df_pred_new.item_id, df_pred_new.score):
    user_pred_np[user_dict[user_id]][item_dict[item_id]] = score

for user_id, item_id, score in zip(df_test.user_id, df_test.item_id, df_test.relevance):
    user_pos_mask_np[user_dict[user_id]][item_dict[item_id]] = True

In [9]:
arr_inds = user_pred_np.argsort(axis=1)
arr_inds = np.flip(arr_inds, axis=1)

In [10]:
user_pred_np = np.take_along_axis(user_pred_np, arr_inds, axis=1)
user_pos_mask_np = np.take_along_axis(user_pos_mask_np, arr_inds, axis=1)

In [11]:
user_excl_mask = tf.constant(np.zeros_like(user_pos_mask_np, dtype=bool))
user_pred = tf.constant(user_pred_np.astype(np.float32), dtype=tf.float32)
user_pos_mask = tf.constant(user_pos_mask_np, dtype=tf.bool)

2021-07-13 18:18:30.786560: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Evaluation metrics

In [12]:
results = pd.DataFrame()

In [13]:
results.at["Recall@20","value"] = tf.math.reduce_mean(
    Recall(pos_mask=user_pos_mask, pred=user_pred, excl_mask=user_excl_mask, at=[20])
).numpy()



In [15]:
results.at["NDCG@20","value"] = tf.math.reduce_mean(
    NDCG(pos_mask=user_pos_mask, pred=user_pred, excl_mask=user_excl_mask, at=[20])
).numpy()

In [16]:
results.at["AUC","value"] = tf.math.reduce_mean(
    AUC(pos_mask=user_pos_mask, pred=user_pred, excl_mask=user_excl_mask)
).numpy()

In [17]:
true_pos = [
    np.where(
        df_pred_new[df_pred_new.user_id == user].sort_values('score', ascending=False).relevance.tolist()
    )[0] + 1
    for user in df_pred_new.user_id.unique()
]

In [18]:
results.at["Precision@20","value"] = np.mean(
    [Precision(precision_at=[20]).compute(x, None) for x in true_pos]
)

### Results

In [19]:
results

Unnamed: 0,value
Recall@20,0.096321
NDCG@20,0.462628
AUC,0.704878
Precision@20,0.057519
