In [23]:
import csv

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.layers import Activation, Lambda
from tensorflow.keras.layers import Concatenate, Dense, Dropout
from tensorflow.keras.layers import Embedding, Input
from tensorflow.keras.models import Model, model_from_json
from tensorflow.keras.optimizers import SGD, Adam


Data loading and imports similar to other workbooks.

In [4]:
with open ("../data/ml-100k/u.data", "r") as f:
    data = list(csv.reader(f, delimiter="\t"))
data = np.array(data)
film_dim = np.amax(np.array(data[:,1]).astype(np.float))
user_dim = np.amax(np.array(data[:,0]).astype(np.float))
print("film size", film_dim)
print("user size", user_dim)
data = data.astype(np.int)
data = pd.DataFrame(data)
data.sort_values([0,3],inplace=True) ## Sort data
data.rename(columns= {0: 'user_id',
                      1: 'movie_id',
                      2: 'rating',
                      3: 'time'},
            inplace=True)

film size 1682.0
user size 943.0


We use the LabelEncoder to transform IDs using a relation (x,y) -> (0,n) where n = # of IDs, so that we do not have IDs just as a random number, but going from 0 to n. 

In [5]:
user_encoder = LabelEncoder()
data['user_id_encoded'] = user_encoder.fit_transform(data['user_id'].values)
user_count = data['user_id_encoded'].nunique()

movie_encoder = LabelEncoder()
data['movie_id_encoded'] = movie_encoder.fit_transform(data['movie_id'].values)
movie_count = data['movie_id_encoded'].nunique()

We load the data into test and train sets, but for better performance, we will rescale the ratings to a more Gaussian distribution as regression models tend to work better with normally distributed data.

In [7]:
X_data = data[['user_id_encoded', 'movie_id_encoded']].values
Y_data = data['rating'].values

y_scaler = StandardScaler().fit(Y_data.reshape(len(Y_data), 1))
y_scaled = y_scaler.transform(Y_data.reshape(len(Y_data), 1))[:, 0]

train_x, test_x, train_y, test_y = train_test_split(X_data, y_scaled, test_size=0.1, random_state=315)


We select a number of factors for user / movie which we input into the Embedding layer. We also store the minimum and maximum ratings we need to use in the final Lambda layer and we prepare train and test arrays (each containing two arrays for users and for movies IDs).

We define a Recommender model starting with an input layer followed by Embedding layers, one for both movies and users.  We then Concatenate the two Embeddings and follow them by a Dense layer with relu activation function. The final layers contains a Dense layer with a single numeric output.

In [8]:
def ModelDenseEmbed(x, y, f):
    u = Input(shape=(1,))
    m = Input(shape=(1,))
    ue = Embedding(x, f)(u)
    me = Embedding(y, f)(m)
    x = Concatenate()([ue, me])
    x = Dropout(0.1)(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    model = Model(inputs=[user, movie], outputs=x)
    return model


We load the model and fit it to the training data. We experimented with a SGD but it turned out to train slower and with the same or worse final performance. We then resorted to Adam with 0.001 learning rate as it proved to be the most efficien

In [17]:
learning_rate = 0.01
decay_rate = 0.01 / 50
momentum = 0.5

sgd = SGD(lr=learning_rate, momentum=momentum, decay=decay_rate)

adam = Adam(lr=0.001)

model = ModelDenseEmbed(user_count, movie_count, 64)
model.compile(loss='MSE', optimizer=adam)
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_9 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_10 (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding_8 (Embedding)         (None, 1, 64)        60352       input_9[0][0]                    
__________________________________________________________________________________________________
embedding_9 (Embedding)         (None, 1, 64)        107648      input_10[0][0]                   
____________________________________________________________________________________________

In [54]:
training_res = model.fit(x=[train_x[:, 0], train_x[:, 1]], y=train_y, batch_size=64, epochs=50, verbose=1, validation_data=([test_x[:, 0], test_x[:, 1]], test_y))

Train on 90000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


The desired models converges to a loss and val_loss value of abt. 0.66. The loss function used is mean squared error. Since the difference between the max and min of the y_scaled is 3.5, 0.66 can be considered a bit underwhelming result. We can print out some sample data to see the actual numbers (we need to inversely transform the data using the y_scaler instantiated sooner in the code).

In [26]:
predictions = model.predict([test_x[:, 0], test_x[:, 1]])

In [27]:
model_json = model.to_json()
with open("NNE_model.json", "w") as json_file:
    json_file.write(model_json)
model.save_weights("NNE_model.h5")

In [45]:
# load json and create model
json_file = open('NNE_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("NNE_model.h5")
loaded_model.compile(loss='MSE', optimizer=adam)
loaded_predictions = loaded_model.predict([test_x[:, 0], test_x[:, 1]])

In [50]:
y_scaler.inverse_transform(loaded_predictions[10:20])

array([[[3.7539701]],

       [[4.0057034]],

       [[3.084084 ]],

       [[3.8751   ]],

       [[4.089307 ]],

       [[3.6873972]],

       [[4.6803074]],

       [[5.0133653]],

       [[4.27945  ]],

       [[3.3583715]]], dtype=float32)

In [51]:
y_scaler.inverse_transform(predictions[10:20])

array([[[3.7539701]],

       [[4.0057034]],

       [[3.084084 ]],

       [[3.8751   ]],

       [[4.089307 ]],

       [[3.6873972]],

       [[4.6803074]],

       [[5.0133653]],

       [[4.27945  ]],

       [[3.3583715]]], dtype=float32)

In [52]:
y_scaler.inverse_transform(test_y[10:20])

array([4., 4., 3., 4., 4., 5., 5., 5., 5., 5.])