In [1]:
import yaml
from models.MLP import MLP
from models.GMF import GMF
from models.NeuMF import NeuMF
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import os

In [15]:
RATINGS_PATH = './data/preprocessed_data/ratings/ratings_preprocessed_ml.csv'
JOKES_PATH = './data/preprocessed_data/jokes/jokes_preprocessed.csv'

In [16]:
with open("neumf-config.yaml", "r") as file:
    data = yaml.safe_load(file)

ratings = pd.read_csv(RATINGS_PATH, sep=',', header=0)
jokes = pd.read_csv(JOKES_PATH, sep=',', header=0)

In [4]:
ratings['rating'] = (ratings['rating'] + 10) / 20

In [5]:
train_data, test_data = train_test_split(ratings, shuffle=True, test_size=0.2, random_state=42)
train_data, eval_data = train_test_split(train_data, shuffle=True, test_size=0.25, random_state=42)  

user_train = train_data['user'].values
item_train = train_data['joke_id'].values
rating_train = train_data['rating'].values

user_eval = eval_data['user'].values
item_eval = eval_data['joke_id'].values
rating_eval = eval_data['rating'].values

user_test = test_data['user'].values
item_test = test_data['joke_id'].values
rating_test = test_data['rating'].values

num_users = ratings['user'].nunique()
num_items = ratings['joke_id'].nunique()

In [None]:
neumf = NeuMF(data['NeuMF'], num_users, num_items)
neumf_model = neumf.build_model()
neumf_model = neumf.compile_model(neumf_model)

mlp = MLP(data['NeuMF']['MLP'], num_users, num_items)
mlp_model = mlp.build_model()

gmf = GMF(data['NeuMF']['GMF'], num_users, num_items)
gmf_model = gmf.build_model()

# Verifying if we need to train MLP and GMF models
mlp_weights_path = data['NeuMF']['MLP']['model_parameters_path']
if os.path.exists(mlp_weights_path):
    mlp_model.load_weights(data['NeuMF']['MLP']['model_parameters_path'])
else:
    mlp_model = mlp.compile_model(mlp_model)
    neumf.train_MLP(mlp_model, user_train, item_train, rating_train, eval_data=(user_eval, item_eval, rating_eval))
    mlp_model.load_weights(mlp_weights_path)

gmf_weights_path = data['NeuMF']['GMF']['model_parameters_path']
if os.path.exists(gmf_weights_path):
    gmf_model.load_weights(data['NeuMF']['GMF']['model_parameters_path'])
else:
    gmf_model = gmf.compile_model(gmf_model)
    neumf.train_GMF(gmf_model, user_train, item_train, rating_train, eval_data=(user_eval, item_eval, rating_eval))
    gmf_model.load_weights(gmf_weights_path)

neumf_model = neumf.load_pretrain_weights(neumf_model, mlp_model, gmf_model)
history = neumf_model.fit([user_train, item_train], 
                          rating_train, 
                          validation_data=([user_eval, item_eval], rating_eval),
                          epochs=data['NeuMF']['epochs'], 
                          batch_size=data['NeuMF']['batch_size'], 
                          verbose=1)


In [7]:
evaluation = neumf_model.evaluate([user_test, item_test], rating_test, batch_size=data['NeuMF']['batch_size'])

[1m808/808[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 944us/step - loss: 0.0629 - mae: 0.1864


In [8]:
predictions = neumf_model.predict([user_test, item_test])

[1m25853/25853[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 1ms/step


In [9]:
compare_pred = pd.DataFrame({'True Values': rating_test, 'Predictions': predictions.flatten()})

In [10]:
compare_pred.sample(10)

Unnamed: 0,True Values,Predictions
339578,0.745,0.622734
341430,0.3105,0.43492
389014,0.626,0.649968
63147,0.66,0.687414
631118,0.4685,0.62536
740944,0.602,0.618596
767932,0.148,0.608797
192288,0.4685,0.656335
732701,0.386,0.306235
27999,0.6725,0.681429


In [None]:
mse = evaluation[0]
mae = evaluation[1]
rmse = np.sqrt(mse)

print("Mean Squared Error (MSE): ", mse)
print("Mean Absolute Error (MAE): ", mae)
print("Root Mean Squared Error (RMSE): ", rmse)

In [None]:
#neumf_model.save('./data/weights/neumf.h5')