In [0]:
NB_EPOCHS = 300
BATCH_SIZE = 256
KFOLD = 5

In [2]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [3]:
import numpy as np
import tensorflow as tf
import pandas as pd
import gc, sys
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold

tf.__version__

'2.0.0'

##Rebuild train and test sets from input files
1. upload the interactions_train.csv and interactions_test.csv
2. execute next cell

In [4]:
train_matrix = pd.read_csv('./interactions_train_alt.csv').pivot_table(index='u', columns='i', values='rating', dropna=False)
print(f'Shape of train User-Movie-Matrix:\t{train_matrix.shape}')
test_matrix = pd.read_csv('./interactions_test_alt.csv').pivot_table(index='u', columns='i', values='rating', dropna=False)
print(f'Shape of test User-Movie-Matrix:\t{test_matrix.shape}')

Shape of train User-Movie-Matrix:	(6384, 3148)
Shape of test User-Movie-Matrix:	(6384, 3148)


In [0]:
#remplace rating from 0 to 5 to a boolean information : > 3 like (1), otherwise dislike (0)
train_matrix = train_matrix.applymap(lambda x : x if np.isnan(x) else int(x>3))
test_matrix = test_matrix.applymap(lambda x : x if np.isnan(x) else int(x>3))

In [0]:
#replace all missing rating by -1 (as rating are from 0 to 5)
#the -1 will be then used in the model loss function as a mask
train_matrix.fillna(-1, inplace=True)
test_matrix.fillna(-1, inplace=True)

#Build the model

In [0]:
def BuildAEModel(n_recipes, emb_dim = 256, activation=None):
    inputs = tf.keras.layers.Input((n_recipes,))
    #encoded_layer1 = tf.keras.layers.Dense(8192,activation=None, name='Encoder_Layer_1')(inputs)
    #encoded_layer2 = tf.keras.layers.Dense(4096,activation=None, name='Encoder_Layer_2')(inputs)
    #encoded_layer3 = tf.keras.layers.Dense(2048,activation=None, name='Encoder_Layer_3')(encoded_layer2)
    
    embedded = tf.keras.layers.Dense(emb_dim,activation=activation, name='embedder')(inputs)
    
    #decoded_layer1 = tf.keras.layers.Dense(2048,activation=None, name='Decoder_Layer_1')(embedded)
    #decoded_layer2 = tf.keras.layers.Dense(4096,activation=None, name='Decoder_Layer_2')(embedded)
    #decoded_layer3 = tf.keras.layers.Dense(8192,activation=None, name='Decoder_Layer_3')(decoded_layer2)
    
    outputs = tf.keras.layers.Dense(n_recipes, activation='linear', name = 'Reconstructor')(embedded)
    
    model = tf.keras.Model(inputs=inputs, outputs = [outputs])
    return model

## define a specific loss function 
- to compare recipes rating for only rated recipes, ie recipes that have values <> -1
- For those recipes, mse will be computed

In [0]:
def customMaskedMSE(ytrue, ypred):
  mask = tf.not_equal(ytrue, -1)
  return tf.keras.backend.mean(tf.keras.backend.square(tf.boolean_mask(ytrue - ypred, mask)))

def customMaskedMAE(ytrue, ypred):
  mask = tf.not_equal(ytrue, -1)
  return tf.keras.backend.mean(tf.keras.backend.abs(tf.boolean_mask(ytrue - ypred, mask)))

## start the training
- Kfold validation

In [0]:
kf = KFold(n_splits=KFOLD, random_state=689)
results = []

for em_dim in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]:
  for train_index, test_index in kf.split(train_matrix):
    X_train, X_test = train_matrix.iloc[train_index,:], train_matrix.iloc[test_index,:]

    my_model = BuildAEModel(train_matrix.shape[1], em_dim, 'relu')
    adam = tf.keras.optimizers.Adam(0.001)
    my_model.compile('adam',loss=customMaskedMSE)

    my_model.fit(X_train.values, X_train.values,
                      epochs=NB_EPOCHS, batch_size=BATCH_SIZE, verbose = 0)
    results.append(my_model.evaluate(X_test.values, X_test.values, batch_size=BATCH_SIZE, verbose = 0))
    print(results[-1])

  print('CV score for emdim', em_dim,'is :', np.mean(results))


0.06568300317952477
0.06950888140951178
0.07243408628549068
0.07258840512043811
0.07293607236169349
CV score for emdim 1 is : 0.07063008967133176
0.0762519050001538
