In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.layers import Embedding, Input, Dot, Dense, Flatten, Multiply, Concatenate
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adagrad, Adam, SGD, RMSprop
from tensorflow.keras.regularizers import l2
from tensorflow.keras.metrics import MeanSquaredError, RootMeanSquaredError
import datetime

# Preprocess data

In [2]:
def preprocess_data(data_path):
    header_data = ['user_id', 'item_id', 'rating', 'timestamp']
    data = pd.read_csv(data_path, sep='\t', names=header_data)
    data = data.drop("timestamp", axis=1).astype(int)
    
    num_items = data["item_id"].max()
    num_users = data["user_id"].max()
    
    y = data["rating"]
    y = np.where(y==5, 1, 0)

    X = data.drop("rating", axis=1)
    # Minus 1 so the index starts from 0
    X = X-1
    X = X.astype('category')
    
    return X, y, num_items, num_users

In [3]:
X_train, y_train, num_items, num_users = preprocess_data("data/ub.base")
X_test , y_test, _, _ = preprocess_data("data/ub.test")

In [4]:
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(90570, 2) (90570,) (9430, 2) (9430,)


# Negative Sampling

In [5]:
def get_train_instances(users, items, num_negatives):
    user_input, item_input, labels = [],[],[]
    num_users = users.shape[0]
    for (u, i) in zip(users, items):
        # positive instance
        user_input.append(u)
        item_input.append(i)
        labels.append(1)
        # negative instances
        for t in range(num_negatives):
            j = np.random.randint(num_items)
#             while train.has_key((u, j)):
#                 j = np.random.randint(num_items)
            user_input.append(u)
            item_input.append(j)
            labels.append(0)
    return user_input, item_input, labels

In [6]:
user_input_training, item_input_training, labels_training \
    = get_train_instances(X_train["user_id"].to_numpy(), X_train["item_id"].to_numpy(), 4)
user_input_test, item_input_test, labels_test \
    = get_train_instances(X_test["user_id"].to_numpy(), X_test["item_id"].to_numpy(), 4)

In [7]:
print(len(user_input_training), len(user_input_test))

452850 47150


# Get model

In [8]:
# Dot product model
def get_model_dot_product(num_users, num_items, latent_dim, regs=[0,0]):
    
    # Input variables
    user_input = Input(shape=(1,), dtype='int32', name = 'user_input')
    item_input = Input(shape=(1,), dtype='int32', name = 'item_input')
    
    # Embedding layers
    user_embedding_layer = Embedding(input_dim = num_users, output_dim = latent_dim, name = 'user_embedding',
                                  embeddings_initializer='uniform', input_length=1)
    user_embedding = user_embedding_layer(user_input)
    
    item_embedding_layer = Embedding(input_dim = num_items, output_dim = latent_dim, name = 'item_embedding',
                                  embeddings_initializer='uniform', input_length=1)
    item_embedding = item_embedding_layer(item_input)
    
    # Flatten embeddings
    user_latent = Flatten()(user_embedding)
    item_latent = Flatten()(item_embedding)
    
    # Dot product layer
    dotted = Dot(axes=1)([user_latent, item_latent])
    
    # add the sigmoid output layer
    output = Dense(1, activation='sigmoid')(dotted)
    
    model = Model([user_input, item_input], output)
    
    return model

In [9]:
# GMF model
def get_model_gmf(num_users, num_items, latent_dim, regs=[0,0]):
    
    # Input variables
    user_input = Input(shape=(1,), dtype='int32', name = 'user_input')
    item_input = Input(shape=(1,), dtype='int32', name = 'item_input')
    
    # Embedding layers
    user_embedding_layer = Embedding(input_dim = num_users, output_dim = latent_dim, name = 'user_embedding',
                                  embeddings_initializer='uniform', input_length=1)
    user_embedding = user_embedding_layer(user_input)
    
    item_embedding_layer = Embedding(input_dim = num_items, output_dim = latent_dim, name = 'item_embedding',
                                  embeddings_initializer='uniform', input_length=1)
    item_embedding = item_embedding_layer(item_input)
    
    # Flatten embeddings
    user_latent = Flatten()(user_embedding)
    item_latent = Flatten()(item_embedding)
    
    # Element wise multiply layer
    multiply = Multiply()([user_latent, item_latent])
    
    # add the sigmoid output layer
    output = Dense(1, activation='sigmoid')(multiply)
    
    model = Model([user_input, item_input], output)
    
    return model

In [10]:
# MLP model
def get_model_mlp(num_users, num_items,  layers = [20,10], reg_layers=[0,0]):
    assert len(layers) == len(reg_layers)
    num_layer = len(layers) #Number of layers in the MLP
    
    # Input variables
    user_input = Input(shape=(1,), dtype='int32', name = 'user_input')
    item_input = Input(shape=(1,), dtype='int32', name = 'item_input')
    
    # Embedding layers, embedding size is half of the first MLP layer
    user_embedding_layer = Embedding(input_dim = num_users, output_dim = int(layers[0]/2), name = 'user_embedding',
                                  embeddings_initializer='uniform', input_length=1)
    user_embedding = user_embedding_layer(user_input)
    
    item_embedding_layer = Embedding(input_dim = num_items, output_dim = int(layers[0]/2), name = 'item_embedding',
                                  embeddings_initializer='uniform', input_length=1)
    item_embedding = item_embedding_layer(item_input)
    
    # Flatten embeddings
    user_latent = Flatten()(user_embedding)
    item_latent = Flatten()(item_embedding)
    
    # Concatenate two embeddings
    vector = Concatenate()([user_latent, item_latent])
    
    # MLP layer
    for idx in range(1, num_layer):
        layer = Dense(layers[idx], kernel_regularizer= l2(reg_layers[idx]), activation='relu', name = 'layer%d' %idx)
        vector = layer(vector)
    
    # Final prediction layer
    output = Dense(1, activation='sigmoid', name = 'prediction')(vector)
    
    model = Model([user_input, item_input], output)
    
    return model

In [11]:
# NMF
def get_model_nmf(num_users, num_items,  mf_dim=10, mlp_layers=[20, 10], reg_mlp_layers=[1, 1], reg_mf=0):
    assert len(mlp_layers) == len(reg_mlp_layers)
    num_layer = len(mlp_layers) #Number of layers in the MLP
    
    # Input variables
    user_input = Input(shape=(1,), dtype='int32', name = 'user_input')
    item_input = Input(shape=(1,), dtype='int32', name = 'item_input')
    
    # Embedding layers, embedding size is half of the first MLP layer
    MF_Embedding_User = Embedding(input_dim = num_users, output_dim = mf_dim, name = 'MF_user_embedding',
                                  embeddings_initializer='uniform', input_length=1)(user_input)
    MF_Embedding_Item = Embedding(input_dim = num_items, output_dim = mf_dim, name = 'MF_item_embedding',
                                  embeddings_initializer='uniform', input_length=1)(item_input)
    MLP_Embedding_User = Embedding(input_dim = num_users, output_dim = int(mlp_layers[0]/2), name = 'MLP_user_embedding',
                                  embeddings_initializer='uniform', input_length=1)(user_input)
    MLP_Embedding_Item = Embedding(input_dim = num_items, output_dim = int(mlp_layers[0]/2), name = 'MLP_item_embedding',
                                  embeddings_initializer='uniform', input_length=1)(item_input)
    
    # MF part
    MF_user_latent = Flatten()(MF_Embedding_User)
    MF_item_latent = Flatten()(MF_Embedding_Item)
    mf_vector = Multiply(name="MF_output")([MF_user_latent, MF_item_latent])
    
    
    # MLP part
    MLP_user_latent = Flatten()(MLP_Embedding_User)
    MLP_item_latent = Flatten()(MLP_Embedding_Item)
    mlp_vector = Concatenate()([MLP_user_latent, MLP_item_latent])
    for idx in range(1, num_layer):
        layer = Dense(mlp_layers[idx], kernel_regularizer= l2(reg_mlp_layers[idx]), activation='relu', name = 'MLP_layer%d_output' %idx)
        mlp_vector = layer(mlp_vector)
    
    # Concatenate MF and MLP
    predict_vector = Concatenate(axis = -1, name="MLP_MF_concatenate_layer")([mf_vector, mlp_vector])
    
    # Final prediction layer
    output = Dense(1, activation='sigmoid', name = 'prediction')(predict_vector)
    
    model = Model([user_input, item_input], output)
    
    return model

# Compile and fit the model

In [12]:
# model = get_model_dot_product(num_users, num_items, 20, [1,1])
# model = get_model_gmf(num_users, num_items, 20, [1,1])
# model = get_model_mlp(num_users, num_items, [40, 100, 100], [1, 1, 1])
model = get_model_nmf(num_users, num_items, mf_dim=10, mlp_layers=[40, 10], reg_mlp_layers=[1, 1], reg_mf=0)
    
#Optimizer
model.compile(optimizer=Adam(lr=0.01), loss='binary_crossentropy', metrics=[MeanSquaredError(), RootMeanSquaredError()])

In [13]:
print(model.summary())

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
MLP_user_embedding (Embedding)  (None, 1, 20)        18860       user_input[0][0]                 
__________________________________________________________________________________________________
MLP_item_embedding (Embedding)  (None, 1, 20)        33640       item_input[0][0]                 
______________________________________________________________________________________________

In [14]:
# Clear any logs from previous runs
!rm -rf ./logs/ 

In [15]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir)
early_stopping_callback = tf.keras.callbacks.EarlyStopping(patience=3)

In [16]:
# Training
hist = model.fit(x = [np.array(user_input_training), np.array(item_input_training)], #input
                 y = np.array(labels_training), # labels 
                 validation_data = ([np.array(user_input_test), np.array(item_input_test)], np.array(labels_test)),
                 callbacks = [tensorboard_callback, early_stopping_callback],
                 batch_size=25, epochs=10, verbose=2, shuffle=True)

Epoch 1/10
18114/18114 - 29s - loss: 0.4288 - mean_squared_error: 0.1320 - root_mean_squared_error: 0.3633 - val_loss: 0.3890 - val_mean_squared_error: 0.1240 - val_root_mean_squared_error: 0.3521
Epoch 2/10
18114/18114 - 29s - loss: 0.3835 - mean_squared_error: 0.1241 - root_mean_squared_error: 0.3522 - val_loss: 0.3451 - val_mean_squared_error: 0.1103 - val_root_mean_squared_error: 0.3321
Epoch 3/10
18114/18114 - 29s - loss: 0.3763 - mean_squared_error: 0.1221 - root_mean_squared_error: 0.3495 - val_loss: 0.3375 - val_mean_squared_error: 0.1083 - val_root_mean_squared_error: 0.3291
Epoch 4/10
18114/18114 - 29s - loss: 0.3760 - mean_squared_error: 0.1220 - root_mean_squared_error: 0.3493 - val_loss: 0.3457 - val_mean_squared_error: 0.1103 - val_root_mean_squared_error: 0.3322
Epoch 5/10
18114/18114 - 29s - loss: 0.3763 - mean_squared_error: 0.1219 - root_mean_squared_error: 0.3492 - val_loss: 0.3491 - val_mean_squared_error: 0.1094 - val_root_mean_squared_error: 0.3308
Epoch 6/10
1811

In [17]:
# Run tensorboard --logdir logs/fit in terminal

In [20]:
model.evaluate(x = [np.array(user_input_training), np.array(item_input_training)], y = np.array(labels_training))



[0.3559238016605377, 0.11557349562644958, 0.3399610221385956]

In [18]:
model.evaluate(x = [np.array(user_input_test), np.array(item_input_test)], y = np.array(labels_test))



[0.3417290151119232, 0.10919277369976044, 0.33044329285621643]

In [21]:
# Take the corresponding layer name
user_layer_name = 'MLP_user_embedding'
user_embedding_layer_model = tf.keras.Model(inputs=model.input,
                                       outputs=model.get_layer(user_layer_name).output)
item_layer_name = 'MLP_item_embedding'
item_embedding_layer_model = tf.keras.Model(inputs=model.input,
                                       outputs=model.get_layer(item_layer_name).output)

In [23]:
user_embedding = user_embedding_layer_model([X_train["user_id"].unique(), np.zeros(X_train["user_id"].nunique())])
item_embedding = item_embedding_layer_model([np.zeros(X_train["item_id"].nunique()), X_train['item_id'].unique()])

In [24]:
user_embedding

<tf.Tensor: shape=(943, 20), dtype=float32, numpy=
array([[-2.0033271 ,  0.5904838 ,  0.93900466, ..., -0.67950684,
        -1.0107559 , -1.6100581 ],
       [ 0.85070235,  0.00541484,  0.30535725, ...,  3.1354122 ,
         0.21869719,  0.61427754],
       [ 0.16278872,  0.48043352, -1.9866865 , ...,  0.32641444,
         0.2142582 ,  0.08195353],
       ...,
       [-0.972533  ,  0.15127365,  1.1538212 , ...,  0.19976269,
         0.6710257 ,  0.3668556 ],
       [-0.683696  , -0.72120225, -1.3651665 , ..., -0.7133322 ,
        -0.9754297 ,  2.0918026 ],
       [-0.47959092, -0.14987373, -1.0891918 , ..., -0.49521285,
         0.39609465, -0.5826363 ]], dtype=float32)>

In [25]:
item_embedding

<tf.Tensor: shape=(1675, 20), dtype=float32, numpy=
array([[ 2.7229576 , -1.105682  ,  1.3418177 , ...,  2.721942  ,
         2.2409976 , -1.6720752 ],
       [ 0.7336192 , -0.8630612 ,  0.30974436, ...,  0.20996082,
         0.48820525, -0.55639577],
       [ 0.0594411 , -0.32444417,  0.41829103, ..., -0.38656452,
        -0.30472952, -0.3879488 ],
       ...,
       [-8.843306  ,  8.643384  , -8.63309   , ..., -8.249027  ,
        -8.783174  ,  8.211351  ],
       [-8.460501  ,  7.7445517 , -7.8495474 , ..., -8.399564  ,
        -8.278993  ,  7.4697475 ],
       [-8.342359  ,  8.161148  , -7.7674675 , ..., -8.017623  ,
        -7.72812   ,  8.049816  ]], dtype=float32)>