In [1]:
import pandas as pd
import numpy as np
from keras import Model
from keras.layers import Input, Dense, Concatenate, Dropout, Embedding, Flatten
from keras.optimizers import Adam
from sklearn.model_selection import KFold
from keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Multiply

In [2]:
from keras.regularizers import l2


In [3]:
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


In [5]:
def NeuMF(num_users, num_items, mf_dim, layers, reg_mf, reg_layers):
    # Input variables
    user_input = Input(shape=(1,), dtype='int32', name='user_input')
    item_input = Input(shape=(1,), dtype='int32', name='item_input')

    # Embedding layers
    mf_user_embedding = Embedding(input_dim=num_users, output_dim=mf_dim, name='mf_user_embedding',
                                  embeddings_initializer='random_normal', input_length=1)
    mf_item_embedding = Embedding(input_dim=num_items, output_dim=mf_dim, name='mf_item_embedding',
                                  embeddings_initializer='random_normal', input_length=1)
    mlp_user_embedding = Embedding(input_dim=num_users, output_dim=layers[0]//2, name='mlp_user_embedding',
                                   embeddings_initializer='random_normal', input_length=1)
    mlp_item_embedding = Embedding(input_dim=num_items, output_dim=layers[0]//2, name='mlp_item_embedding',
                                   embeddings_initializer='random_normal', input_length=1)

    # GMF branch
    mf_user_latent = Flatten()(mf_user_embedding(user_input))
    mf_item_latent = Flatten()(mf_item_embedding(item_input))
    mf_vector = Multiply()([mf_user_latent, mf_item_latent])

    # MLP branch
    mlp_user_latent = Flatten()(mlp_user_embedding(user_input))
    mlp_item_latent = Flatten()(mlp_item_embedding(item_input))
    mlp_vector = Concatenate()([mlp_user_latent, mlp_item_latent])
    for i in range(len(layers)):
        layer = Dense(layers[i], activation='relu', name=f'layer{i}', kernel_regularizer=l2(reg_layers[i]))
        mlp_vector = layer(mlp_vector)
        dropout_layer = Dropout(rate=0.2, name=f'dropout_layer{i}')
        mlp_vector = dropout_layer(mlp_vector)

    # Concatenate GMF and MLP branches
    concat_vector = Concatenate()([mf_vector, mlp_vector])

    # Output layer
    output_layer = Dense(1, activation='linear', name='output_layer', kernel_regularizer=l2(reg_mf))
    prediction = output_layer(concat_vector)

    # Define the model
    model = Model(inputs=[user_input, item_input], outputs=prediction)

    return model


In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2


In [None]:
# Load data
ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')

# Split data into train and test sets
train, test = train_test_split(ratings, test_size=0.2, random_state=42)

# Define hyperparameters to search over
hyperparameters = {
    'num_factors': [8, 16],
    'layers': [[64, 32, 16]],
    'reg_mf': [0.01],
    'reg_layers': [[0.01, 0.01, 0.01, 0.01], [0.005, 0.005, 0.005,0.005]]
}


In [20]:


# Define number of epochs and batch size
num_epochs = 10
batch_size = 512

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

# Iterate over all combinations of hyperparameters
for num_factors in hyperparameters['num_factors']:
    for layers in hyperparameters['layers']:
        for reg_mf in hyperparameters['reg_mf']:
            for reg_layers in hyperparameters['reg_layers']:
                # Define and compile the model
                model = NeuMF(num_users=len(ratings['userId'].unique()), num_items=len(ratings['movieId'].unique()),
                              mf_dim=num_factors, layers=layers, reg_mf=reg_mf, reg_layers=reg_layers)
                model.compile(optimizer=Adam(lr=0.001), loss='mean_squared_error')

                # Train the model
                history = model.fit([train['userId'], train['movieId']], train['rating'],
                                    batch_size=batch_size,
                                    epochs=num_epochs,
                                    validation_data=([test['userId'], test['movieId']], test['rating']),
                                    callbacks=[early_stopping])

                # Save the model and training history
                model_name = f"model_nf{num_factors}_layers{'_'.join(map(str, layers))}_regmf{reg_mf}_reg{'_'.join(map(str, reg_layers))}.h5"
                model.save(model_name)
                history_name = f"history_nf{num_factors}_layers{'_'.join(map(str, layers))}_regmf{reg_mf}_reg{'_'.join(map(str, reg_layers))}.csv"
                pd.DataFrame(history.history).to_csv(history_name, index=False)


Epoch 1/10


  super().__init__(name, **kwargs)
2023-05-11 20:05:07.827074: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-05-11 20:06:15.301042: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10


2023-05-11 20:18:07.412673: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-05-11 20:19:23.870985: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10


2023-05-11 20:32:09.023909: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-05-11 20:33:28.147542: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10


2023-05-11 20:45:56.736247: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-05-11 20:47:14.949936: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [24]:
from keras.models import load_model
model = load_model('model_nf16_layers64_32_16_regmf0.01_reg0.005_0.005_0.005_0.005.h5')

In [25]:
def recommend_movies(model, user_id, movie_df, top_n=10):
    # Get list of all movie IDs
    all_movies = list(movie_df.movieId.unique())

    # Create list of tuples for user ID and each movie ID
    user = np.full(len(all_movies), user_id, dtype='int32')
    movies = np.array(all_movies, dtype='int32')
    user_movie_matrix = [user, movies]

    # Predict ratings for all movies
    ratings = model.predict(user_movie_matrix)
    ratings[ratings>5.0] = 5.0
   
    # Create DataFrame of predicted ratings for each movie
    ratings_df = pd.DataFrame({'movieId': movies, 'rating': ratings.flatten()})

    # Merge with movie DataFrame to get movie titles
    merged_df = pd.merge(ratings_df, movie_df, on='movieId')

    # Sort by predicted rating and return top n movies
    top_movies = merged_df.sort_values(by='rating', ascending=False).head(top_n)

    return top_movies[['movieId', 'title','rating']]

In [26]:
movies = pd.read_csv('movies.csv')

for i in range(0,5):
    # Generate recommendations for user i
    recommendations = recommend_movies(model, i, movies)
    # Print top 10 recommended movies for user i
    print("user ",i+1,":")
    print(recommendations)
    print()
    print("*******************************************************************************************************")
    print()

  3/853 [..............................] - ETA: 23s 

2023-05-11 21:04:59.833250: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


user  1 :
      movieId                                              title    rating
315       318                   Shawshank Redemption, The (1994)  4.198479
4217     4312   Himalaya (Himalaya - l'enfance d'un chef) (1999)  4.185206
6873     6985  Passion of Joan of Arc, The (Passion de Jeanne...  4.172028
7973     8656  Short Film About Killing, A (Krótki film o zab...  4.122800
6172     6271          Day for Night (La Nuit Américaine) (1973)  4.122089
3622     3713                         Long Walk Home, The (1990)  4.121580
5721     5820           Standing in the Shadows of Motown (2002)  4.109116
6501     6611                                  Umberto D. (1952)  4.083952
5508     5607     Son of the Bride (Hijo de la novia, El) (2001)  4.080187
7464     7767     Best of Youth, The (La meglio gioventù) (2003)  4.075929

*******************************************************************************************************

user  2 :
      movieId                                    