In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import collections
import time

In [None]:
import os

if not os.path.exists('../Results'):
    os.makedirs('../Results/Figure')

In [None]:
data = pd.read_csv('../Data/u.data', delimiter = '\t', names = ['User', 'Item', 'Rating', 'Time'])

In [None]:
num_users = len(pd.unique(data['User']))
num_items = len(pd.unique(data['Item']))

In [None]:
train_data = pd.read_csv('../Data_Split/train.csv').to_numpy()[:, 1 :]
val_data = pd.read_csv('../Data_Split/val.csv').to_numpy()[:, 1 :]
test_data = pd.read_csv('../Data_Split/test.csv').to_numpy()[:, 1:]

In [None]:
train_mean_rating = np.mean(train_data[:, 2])

In [None]:
def rmse(y_true, y_pred):
    return np.sqrt(np.mean(np.square(y_true - y_pred)))

In [None]:
import tensorflow as tf
import keras
from keras.layers import Input, Embedding, Dense, Flatten, Dropout, BatchNormalization, Concatenate, Multiply

In [None]:
keras.__version__

In [None]:
# Dimension of latent factor embedding for MLP and Matrix Factorization operations


num_user_mlp = 10
num_item_mlp = 10
num_mf = 30
learning_rate_list = [1e-3, 1e-2]

In [None]:
# Hyperparameters to tune

early_stop_iters = 30
num_epochs = 100

dropout_list = [0.05, 0.2, 0.4]
batch_size_list = [1024, 128, 256, 512]
l2_reg_kernel_list = [0.005, 0.02, 0.1]
bn_list = [True, False]

In [None]:
user_mlp_dim_list = []
item_mlp_dim_list = []
mf_dim_list = []
early_stop_list = []
max_epochs_list = []

dropout_params = []
batch_size_params = []
lr_params = []
l2_reg_params = []
bn_params = []
rmse_train_list = []
rmse_val_list = []
rmse_test_list = []

iteration = 1

for learning_rate in learning_rate_list:
    for dropout_rate in dropout_list:
        for l2_reg_kernel in l2_reg_kernel_list:
            for batch_size in batch_size_list:
                for bn in bn_list:

                    user_input = Input(shape = (1, ), name = 'User_Input')
                    user_MF_vec = Flatten(name = 'Flatten-1')(Embedding(input_dim = num_users + 1, output_dim = num_mf, embeddings_regularizer = tf.keras.regularizers.l2(l2_reg_kernel), name = 'User_MF_Latent_Factor')(user_input))
                    user_MLP_vec = Flatten(name = 'Flatten-2')(Embedding(input_dim = num_users + 1, output_dim = num_user_mlp, embeddings_regularizer = tf.keras.regularizers.l2(l2_reg_kernel), name = 'User_MLP_Latent_Factor')(user_input))


                    item_input = Input(shape = (1, ), name = 'Item_Input')
                    item_MF_vec = Flatten(name = 'Flatten-3')(Embedding(input_dim = num_items + 1, output_dim = num_mf, embeddings_regularizer = tf.keras.regularizers.l2(l2_reg_kernel), name = 'Item_MF_Latent_Factor')(item_input))
                    item_MLP_vec = Flatten(name = 'Flatten-4')(Embedding(input_dim = num_items + 1, output_dim = num_item_mlp, embeddings_regularizer = tf.keras.regularizers.l2(l2_reg_kernel), name = 'Item_MLP_Latent_Factor')(item_input))


                    MF_embedding = Multiply(name = 'Multiply')([user_MF_vec, item_MF_vec])

                    MLP_embedding = Concatenate(name = 'Concat-1')([user_MLP_vec, item_MLP_vec])
                    MLP_embedding = Dropout(rate = dropout_rate, name = 'Dropout-1')(MLP_embedding)
                    MLP_embedding = Dense(units = 200, activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(l2_reg_kernel), name = 'Dense-1')(MLP_embedding)
                    if (bn == True):
                        MLP_embedding = BatchNormalization(name = 'Batch_Normalization-1')(MLP_embedding)
                    MLP_embedding = Dropout(rate = dropout_rate, name = 'Dropout-2')(MLP_embedding)
                    MLP_embedding = Dense(units = 100, activation = 'relu',kernel_regularizer = tf.keras.regularizers.l2(l2_reg_kernel), name = 'Dense-2')(MLP_embedding)
                    if (bn == True):
                        MLP_embedding = BatchNormalization(name = 'Batch_Normalization-2')(MLP_embedding)
                    #MLP_embedding = Dropout(rate = dropout_rate, name = 'Dropout-3')(MLP_embedding)
                    #MLP_embedding = Dense(units = 50, activation = 'relu',kernel_regularizer = tf.keras.regularizers.l2(l2_reg_kernel), name = 'Dense-3')(MLP_embedding)
                    #MLP_embedding = BatchNormalization(name = 'Batch_Normalization-3')(MLP_embedding)
                    #MLP_embedding = Dropout(rate = dropout_rate, name = 'Dropout-4')(MLP_embedding)

                    MLP_embedding = Dense(units = 20, activation = 'relu',kernel_regularizer = tf.keras.regularizers.l2(l2_reg_kernel), name = 'Dense-4')(MLP_embedding)

                    MLP_MF_embedding = Concatenate(name = 'Concat-2')([MF_embedding, MLP_embedding])
                    #MLP_MF_embedding = Dense(units = 200, activation = 'relu',kernel_regularizer = tf.keras.regularizers.l2(l2_reg_kernel), name = 'Dense-7')(MLP_MF_embedding)
                    #MLP_MF_embedding = BatchNormalization(name = 'Batch_Normalization-4')(MLP_MF_embedding)
                    #MLP_MF_embedding = Dropout(rate = dropout_rate, name = 'Dropout-5')(MLP_MF_embedding)
                    #MLP_MF_embedding = Dense(units = 64, activation = 'relu',kernel_regularizer = tf.keras.regularizers.l2(l2_reg_kernel), name = 'Dense-8')(MLP_MF_embedding)
                    #MLP_MF_embedding = BatchNormalization(name = 'Batch_Normalization-5')(MLP_MF_embedding)
                    #MLP_MF_embedding = Dropout(rate = dropout_rate, name = 'Dropout-6')(MLP_MF_embedding)
                    #MLP_MF_embedding = Dense(units = 32, activation = 'relu',kernel_regularizer = tf.keras.regularizers.l2(l2_reg_kernel), name = 'Dense-9')(MLP_MF_embedding)
                    #MLP_MF_embedding = BatchNormalization(name = 'Batch_Normalization-6')(MLP_MF_embedding)
                    #MLP_MF_embedding = Dropout(rate = dropout_rate, name = 'Dropout-7')(MLP_MF_embedding)
                    #MLP_MF_embedding = Dense(units = 16, activation = 'relu',kernel_regularizer = tf.keras.regularizers.l2(l2_reg_kernel), name = 'Dense-5')(MLP_MF_embedding)
                    #MLP_MF_embedding = BatchNormalization(name = 'Batch_Normalization-7')(MLP_MF_embedding)
                    #MLP_MF_embedding = Dropout(rate = dropout_rate, name = 'Dropout-8')(MLP_MF_embedding)
                    MLP_MF_embedding = Dense(units = 8, activation = 'relu',kernel_regularizer = tf.keras.regularizers.l2(l2_reg_kernel), name = 'Dense-6')(MLP_MF_embedding)
                    #MLP_MF_embedding = Dense(units = 3, activation = 'relu',kernel_regularizer = tf.keras.regularizers.l2(l2_reg_kernel), name = 'Dense-10')(MLP_MF_embedding)

                    results = Dense(units = 1, activation = 'linear', name = 'Prediction')(MLP_MF_embedding) 


                    model = keras.Model(inputs = [user_input, item_input], outputs = results)
                    model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate), loss = 'mse', metrics = [keras.metrics.RootMeanSquaredError()])

                    callback = keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = early_stop_iters, restore_best_weights = True)
                    history = model.fit(x = [train_data[:, 0], train_data[:, 1]], y = train_data[:, 2], batch_size = batch_size, epochs = num_epochs, 
                                        validation_data = ([val_data[:, 0], val_data[:, 1]], val_data[:, 2]), callbacks = [callback],
                                        verbose = 2)


                    test_pred = np.ravel(model.predict([test_data[:, 0], test_data[:, 1]]))
                    rmse_test = rmse(test_data[:, 2], test_pred)

                    if (len(history.history['loss']) == num_epochs):
                        rmse_train = history.history['root_mean_squared_error'][-1]
                        rmse_val = history.history['val_root_mean_squared_error'][-1]

                    else:
                        rmse_train = history.history['root_mean_squared_error'][-early_stop_iters - 1]
                        rmse_val = history.history['val_root_mean_squared_error'][-early_stop_iters - 1]


                    user_mlp_dim_list.append(num_user_mlp)
                    item_mlp_dim_list.append(num_item_mlp)
                    mf_dim_list.append(num_mf)
                    early_stop_list.append(early_stop_iters)
                    max_epochs_list.append(num_epochs)

                    dropout_params.append(dropout_rate)
                    batch_size_params.append(batch_size)
                    lr_params.append(learning_rate)
                    l2_reg_params.append(l2_reg_kernel)
                    bn_params.append(bn)
                    rmse_train_list.append(rmse_train)
                    rmse_val_list.append(rmse_val)
                    rmse_test_list.append(rmse_test)

                    print('Iteration {} is complete'.format(iteration))
                    iteration += 1

                    plt.figure()
                    plt.plot(history.history['root_mean_squared_error'], c = 'red', label = 'Training Error')
                    plt.plot(history.history['val_root_mean_squared_error'], c = 'blue', label = 'Validation Error')
                    plt.legend()
                    plt.xlabel('Epochs')
                    plt.ylabel('Root Mean Squared Error')
                    plt.title('RMSE vs Epochs During Training and Validation')
                    plt.savefig('../Results/Figure/umlp{}_imlp{}_mf{}_epochs{}_drop{}_batch{}_lr{}_l2{}_bn{}.png'.format(num_user_mlp, num_item_mlp, num_mf, num_epochs, dropout_rate, batch_size, learning_rate, l2_reg_kernel, bn))



In [None]:
results_df = pd.DataFrame()
results_df['User_MLP_Latent_Factor'] = user_mlp_dim_list
results_df['Item_MLP_Latent_Factor'] = item_mlp_dim_list
results_df['MF_Latent_Factor'] = mf_dim_list
results_df['Early_Stop_Iters'] = early_stop_list
results_df['Max_Epochs'] = max_epochs_list
results_df['Dropout_Rate'] = dropout_params
results_df['Batch_Size'] = batch_size_params
results_df['Learning_Rate'] = lr_params
results_df['L2_Lamda'] = l2_reg_params
results_df['Batch_Normalization'] = bn_params
results_df['RMSE_Train'] = rmse_train_list
results_df['RMSE_Val'] = rmse_val_list
results_df['RMSE_Test'] = rmse_test_list
results_df.to_csv('../Results/NCF_results.csv')