In [43]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import (
    Input, Embedding, Flatten, Dense, Concatenate, Multiply
)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam


In [44]:
# Load the ratings dataset
ratings = pd.read_csv('/ratings.csv')

# Encode user and movie IDs to be continuous integers
user_ids = ratings['userId'].unique().tolist()
movie_ids = ratings['movieId'].unique().tolist()

user2user_encoded = {x: i for i, x in enumerate(user_ids)}
movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}

ratings['user'] = ratings['userId'].map(user2user_encoded)
ratings['movie'] = ratings['movieId'].map(movie2movie_encoded)

# Get the number of unique users and movies
num_users = len(user2user_encoded)
num_movies = len(movie2movie_encoded)

# Prepare the input data and targets
X = ratings[['user', 'movie']].values
y = ratings['rating'].values

# Normalize ratings to [0, 1] (helps with sigmoid activation in the model)
y = y / 5.0

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [45]:
def build_ncf_model(num_users, num_movies, mf_dim=8, mlp_dim=32, layers=[64, 32, 16, 8]):
    # Inputs for user and movie IDs
    user_input = Input(shape=(1,), name='user_input')
    movie_input = Input(shape=(1,), name='movie_input')

    # Embedding layers for MF
    mf_user_embedding = Embedding(num_users, mf_dim, name='mf_user_embedding')(user_input)
    mf_movie_embedding = Embedding(num_movies, mf_dim, name='mf_movie_embedding')(movie_input)

    # Flatten embeddings and compute element-wise product (MF component)
    mf_user_latent = Flatten()(mf_user_embedding)
    mf_movie_latent = Flatten()(mf_movie_embedding)
    mf_vector = Multiply()([mf_user_latent, mf_movie_latent])

    # Embedding layers for MLP
    mlp_user_embedding = Embedding(num_users, mlp_dim, name='mlp_user_embedding')(user_input)
    mlp_movie_embedding = Embedding(num_movies, mlp_dim, name='mlp_movie_embedding')(movie_input)

    # Flatten and concatenate embeddings (MLP component)
    mlp_user_latent = Flatten()(mlp_user_embedding)
    mlp_movie_latent = Flatten()(mlp_movie_embedding)
    mlp_vector = Concatenate()([mlp_user_latent, mlp_movie_latent])

    # Stack dense layers for the MLP part
    for layer_size in layers:
        mlp_vector = Dense(layer_size, activation='relu')(mlp_vector)

    # Concatenate MF and MLP components
    final_vector = Concatenate()([mf_vector, mlp_vector])

    # Final output layer with sigmoid activation (for rating prediction)
    output = Dense(1, activation='sigmoid', name='prediction')(final_vector)

    # Build and compile the model
    model = Model(inputs=[user_input, movie_input], outputs=output)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

    return model

# Build the model
ncf_model = build_ncf_model(num_users, num_movies)
ncf_model.summary()


In [46]:
# Train the model
history = ncf_model.fit(
    [X_train[:, 0], X_train[:, 1]],  # User and movie inputs
    y_train,  # Ratings
    validation_data=([X_test[:, 0], X_test[:, 1]], y_test),
    epochs=10,
    batch_size=64,
    verbose=1
)


Epoch 1/10
[1m1261/1261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - loss: 0.0390 - val_loss: 0.0318
Epoch 2/10
[1m1261/1261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - loss: 0.0277 - val_loss: 0.0304
Epoch 3/10
[1m1261/1261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 0.0222 - val_loss: 0.0312
Epoch 4/10
[1m1261/1261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 0.0171 - val_loss: 0.0328
Epoch 5/10
[1m1261/1261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - loss: 0.0141 - val_loss: 0.0338
Epoch 6/10
[1m1261/1261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0122 - val_loss: 0.0350
Epoch 7/10
[1m1261/1261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - loss: 0.0106 - val_loss: 0.0362
Epoch 8/10
[1m1261/1261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - loss: 0.0094 - val_loss: 0.0372
Epoch 9/10
[1m1261/1

In [47]:
# Evaluate the model
test_loss = ncf_model.evaluate([X_test[:, 0], X_test[:, 1]], y_test)
print(f'Test Loss: {test_loss}')


[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0393
Test Loss: 0.039129577577114105


In [48]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Predict ratings for the test set
y_pred = ncf_model.predict([X_test[:, 0], X_test[:, 1]]).flatten()

# Since the predictions are normalized to [0, 1], scale them back to [0, 5]
y_pred = y_pred * 5
y_test_scaled = y_test * 5

# Calculate MAE
mae = mean_absolute_error(y_test_scaled, y_pred)
print(f'Mean Absolute Error (MAE): {mae:.4f}')

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test_scaled, y_pred))
print(f'Root Mean Squared Error (RMSE): {rmse:.4f}')

# Calculate R² Score
r2 = r2_score(y_test_scaled, y_pred)
print(f'R² Score: {r2:.4f}')


[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
Mean Absolute Error (MAE): 0.7530
Root Mean Squared Error (RMSE): 0.9891
R² Score: 0.1107
