# Neural Networks Recommenders as DirMF baselines

## Experimental Setup

First of all, we include required libraries.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from keras.models import Model, Sequential
from keras.layers import Embedding, Flatten, Input, Dropout, Dense, Concatenate, Dot, Lambda
from keras.optimizers import Adam
from keras import backend as K

from sklearn.metrics import precision_score, recall_score, ndcg_score, mean_absolute_error, mean_squared_error, r2_score

Now, we configure the parameters of the experiments. Please, note that each cell contains the configuration for one dataset. Run only the cell of the dataset that you want to evaluate.

In [8]:
dataset = 'ml1m'
latent_dim = 5
like_threshold = 4
steps_per_epoch = None

gmf_epochs = 10
ncf_epochs = 10

num_users = 6040
num_items = 3706

In [22]:
dataset = 'filmtrust'
latent_dim = 5
like_threshold = 3
steps_per_epoch = None

gmf_epochs = 15
ncf_epochs = 8

num_users = 1508
num_items = 2071

In [38]:
dataset = 'myanimelist'
latent_dim = 7
like_threshold = 8
steps_per_epoch = None

gmf_epochs = 20
ncf_epochs = 15

num_users = 69600
num_items = 9927

In [2]:
dataset = 'netflix'
latent_dim = 6
like_threshold = 4
steps_per_epoch = 200000

gmf_epochs = 5
ncf_epochs = 4

num_users = 480189
num_items = 17770

Dataset loading.

In [3]:
df_train = pd.read_csv('../data/' + dataset + '/training-ratings.csv', delimiter = ',')
df_test = pd.read_csv('../data/' + dataset + '/test-ratings.csv', delimiter = ',')

Dataset split into train and test partitions.

In [4]:
X_train = [df_train['user'].to_numpy(), df_train['item'].to_numpy()]
y_train = df_train['rating'].to_numpy()

X_test = [df_test['user'].to_numpy(), df_test['item'].to_numpy()]
y_test = df_test['rating'].to_numpy()

## Neural based Collaborative Filtering models definition

### GMF

Model architecture:

In [5]:
user_input = Input(shape=[1])
user_embedding = Embedding(num_users, latent_dim)(user_input)
user_vec = Flatten()(user_embedding)

item_input = Input(shape=[1])
item_embedding = Embedding(num_items, latent_dim)(item_input)
item_vec = Flatten()(item_embedding) 
        
dot = Dot(axes=1)([item_vec, user_vec])
    
GMF = Model([user_input, item_input], dot)  

Model fitting using GPU:

In [6]:
with tf.device('/GPU:0'):
    GMF.compile(optimizer='adam', metrics=['mae'], loss='mean_squared_error')
    GMF.summary()

    gmf_report = GMF.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=gmf_epochs, steps_per_epoch=steps_per_epoch, verbose=1)

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_1 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1, 6)         106620      input_2[0][0]                    
__________________________________________________________________________________________________
embedding (Embedding)           (None, 1, 6)         2881134     input_1[0][0]                    
_______________________________________________________________________________________

### NCF

Model architecture:

In [7]:
item_input = Input(shape=[1], name='item-input')
item_embedding = Embedding(num_items, latent_dim, name='item-embedding')(item_input)
item_vec = Flatten(name='item-flatten')(item_embedding)

user_input = Input(shape=[1], name='user-input')
user_embedding = Embedding(num_users, latent_dim, name='user-embedding')(user_input)
user_vec = Flatten(name='user-flatten')(user_embedding)

concat = Concatenate(axis=1, name='item-user-concat')([item_vec, user_vec])
fc_1 = Dense(70, name='fc-1', activation='relu')(concat)
fc_1_dropout = Dropout(0.5, name='fc-1-dropout')(fc_1)
fc_2 = Dense(30, name='fc-2', activation='relu')(fc_1_dropout)
fc_2_dropout = Dropout(0.4, name='fc-2-dropout')(fc_2)
fc_3 = Dense(1, name='fc-3', activation='relu')(fc_2_dropout)

NCF = Model([user_input, item_input], fc_3)

Model fitting using GPU:

In [8]:
with tf.device('/GPU:0'):
    NCF.compile(optimizer='adam', metrics=['mae'], loss='mean_squared_error')
    NCF.summary()
    
    ncf_report = NCF.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=ncf_epochs, steps_per_epoch=steps_per_epoch, verbose=1)

Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
item-input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
user-input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
item-embedding (Embedding)      (None, 1, 6)         106620      item-input[0][0]                 
__________________________________________________________________________________________________
user-embedding (Embedding)      (None, 1, 6)         2881134     user-input[0][0]                 
_______________________________________________________________________________________

## Experimental Results

In [9]:
methods = ['gmf', 'ncf']

In [10]:
preds = pd.DataFrame()

preds['user'] = X_test[0]
preds['item'] = X_test[1]

preds['y_test'] = y_test

Store predictions of the baselines:

In [11]:
preds['gmf'] = GMF.predict(X_test)

In [12]:
preds['ncf'] = NCF.predict(X_test)

### Quality of the predictions

In [13]:
print('MAE:')
for m in methods:
    print('-', m, ':', mean_absolute_error(preds['y_test'], preds[m]))

MAE:
- deepmf : 0.6861803149776209
- ncf : 0.6882784305068139


### Quality of the recommendations

In [14]:
def recommender_precision_recall(X, y_true, y_pred, N, threshold):
    precision = 0
    recall = 0
    count = 0
    
    rec_true = np.array([1 if rating >= threshold else 0 for rating in y_true])
    rec_pred = np.zeros(y_pred.size)
    
    for user_id in np.unique(X[:,0]):
        indices = np.where(X[:,0] == user_id)[0]
        
        rec_true = np.array([1 if y_true[i] >= threshold else 0 for i in indices])

        if (np.count_nonzero(rec_true) > 0): # ignore test users without relevant ratings
        
            user_pred = np.array([y_pred[i] for i in indices])
            rec_pred = np.zeros(indices.size)

            for pos in np.argsort(user_pred)[-N:]:
                if user_pred[pos] >= threshold:
                    rec_pred[pos] = 1
            
            precision += precision_score(rec_true, rec_pred, zero_division=0)
            recall += recall_score(rec_true, rec_pred)
            count += 1
        
    return precision/count, recall/count

In [15]:
n = 10;

for m in methods:
    ids = preds[['user', 'item']].to_numpy()
    y_true = preds['y_test'].to_numpy()
    y_pred = preds[m].to_numpy()
    precision, recall = recommender_precision_recall(ids, y_true, y_pred, n, like_threshold)
    
    print('-', m, ':', '\n\t', 'precision: ', precision, '\n\t', 'recall: ', recall)

- deepmf : 
	 precision:  0.600562101316028 
	 recall:  0.29922371652744606
- ncf : 
	 precision:  0.6401984761614494 
	 recall:  0.3079389919867263
