In [82]:
import numpy as np
import sklearn
import tensorflow as tf
import pandas as pd

from keras.src.layers import Dense
from tensorflow import keras
from keras import layers
from tensorflow.keras.layers import Embedding, Flatten, Dense, Dot, Input, Dropout, Multiply, Concatenate, Add, Activation, BatchNormalization, Lambda
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import regularizers
import keras_tuner as kt
from keras_tuner import HyperModel

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import json


In [2]:
%config Completer.use_jedi = False

# Load The Data

In [75]:
# --- 1. Data Loading and Preprocessing ---

def load_and_preprocess_data(data_path="Data/ratings.dat", test_size=0.2, random_state=42):
    """
    Loads and preprocesses the MovieLens-1M ratings data.
    """
    # Load data (UserID::MovieID::Rating::Timestamp)
    ratings_df = pd.read_csv(data_path,
                             sep='::',
                             engine='python',
                             header=None,
                             names=['userId', 'movieId', 'rating', 'timestamp'])

    # Label encode user and movie IDs to be 0-indexed and dense
    user_encoder = LabelEncoder()
    movie_encoder = LabelEncoder()

    ratings_df['u_idx'] = user_encoder.fit_transform(ratings_df['userId'])
    ratings_df['m_idx'] = movie_encoder.fit_transform(ratings_df['movieId'])

    num_users = ratings_df['u_idx'].nunique()
    num_movies = ratings_df['m_idx'].nunique()

    print(f"Number of unique users: {num_users}")
    print(f"Number of unique movies: {num_movies}")
    print(f"Min/Max u_idx: {ratings_df['u_idx'].min()}/{ratings_df['u_idx'].max()}")
    print(f"Min/Max m_idx: {ratings_df['m_idx'].min()}/{ratings_df['m_idx'].max()}")


    # Mean-center ratings (good practice for regression tasks)
    mean_rating = ratings_df['rating'].mean()
    ratings_df['rating_centered'] = ratings_df['rating'] - mean_rating
    print(f"Mean rating: {mean_rating}")

    # Prepare inputs for the model
    X_user = ratings_df['u_idx'].values
    X_movie = ratings_df['m_idx'].values
    y = ratings_df['rating_centered'].values.astype(np.float32)

    # Split data
    X_user_train, X_user_val, \
    X_movie_train, X_movie_val, \
    y_train, y_val = train_test_split(X_user, X_movie, y,
                                      test_size=test_size,
                                      random_state=random_state)

    return {
        "X_user_train": X_user_train, "X_movie_train": X_movie_train, "y_train": y_train,
        "X_user_test": X_user_val, "X_movie_test": X_movie_val, "y_test": y_val,
        "num_users": num_users, "num_movies": num_movies,
        "mean_rating": mean_rating,
        "user_encoder": user_encoder, "movie_encoder": movie_encoder, "ratings" : ratings_df
    }

In [76]:
# --- Load Data ---
data_path = "/kaggle/input/movielens-1m-dataset/ratings.dat" # MODIFY THIS PATH if your data is elsewhere
data = load_and_preprocess_data(data_path=data_path)
num_users = data['num_users']
num_movies = data['num_movies']
mean_rating_val = data['mean_rating']

user_ids = data['X_user_train'] 
movie_ids = data['X_movie_train']
ratings = data['y_train']

print(user_ids.shape, movie_ids.shape, ratings.shape)

Number of unique users: 6040
Number of unique movies: 3706
Min/Max u_idx: 0/6039
Min/Max m_idx: 0/3705
Mean rating: 3.581564453029317
(800167,) (800167,) (800167,)


In [55]:
# For Scaling outputs of final model
temp = pd.DataFrame(data["ratings"])
temp.describe()
lower = temp["rating_centered"].min()
higher = temp["rating_centered"].max()
print(lower, higher)

-2.581564453029317 1.418435546970683


In [8]:
# Trivial Case
temp = np.array(ratings)
guess_MSE = np.var(temp)
print("MSE if guessing the mean of ratings: ", guess_MSE)

MSE if guessing the mean of ratings:  1.2464459


# Collaborative Filtering Model

### When you think collaborative filtering, think of statements like:
- Users who liked similar items also liked...
- Items similar to this item

### 🧠 “Behavioral Similarity”
The system learns from what users did, not what items are about.

> It doesn’t care what genre the item is — it just learns from the pattern of user behavior.

### 🔍 How It Works:
- Looks at user-item interactions (ratings, likes, views)
- Learns latent similarities between users or items
- Powered by embeddings, matrix factorization, or neural models

finds patterns in behavior



## Notes:
- Bias terms let the model learn per-user and per-item offsets (e.g. some users consistently rate higher; some items are universally popular).

In [25]:
# Generalized Matrix Factorization for Hyperparameter Tuning

def buildGMF(hp):
    # GMF Branch: Captures a linear, element-wise interaction between user and item latent factors
    
    gmf_dim  = hp.Choice("gmf_dim", values=[16, 32, 64]) #defines dimensions of gmf
    reg_L2 = hp.Choice("gmf_user_emb_regularization", values=[1e-6, 1e-5, 1e-4])
                           
    # Input layer takes item of vector size 1
    user_input = Input(shape=(1,), name='userId')
    item_input = Input(shape=(1,), name='movieId')
    
    # Create Embeddings for users and items. These are the paramaters we are training
    gmf_user_emb = Embedding(
        num_users,
        gmf_dim,
        embeddings_regularizer=regularizers.l2(reg_L2),
        embeddings_initializer='he_normal',
        name = "gmf_user_emb")(user_input) #embedding for userID
    gmf_item_emb = Embedding(
        num_items,
        gmf_dim,
        embeddings_regularizer=regularizers.l2(reg_L2),
        embeddings_initializer='he_normal',
        name = "gmf_item_emb")(item_input) # embedding for itemID

    # Latent Vectors (The learned features as vectors in higher dimensional space)
    gmf_user_vec = Flatten(name = "gmf_user_vec")(gmf_user_emb) # flatten them
    gmf_user_vec = Dropout(
        hp.Choice("gmf_user_vec_dropoutRate",values=[0.0, 0.1, 0.2, 0.3]),
        name = "gmf_user_vec_dropout")(gmf_user_vec)
    
    gmf_item_vec = Flatten(name = "gmf_item_vec")(gmf_item_emb) # flatten them
    gmf_item_vec = Dropout(
        hp.Choice("gmf_item_vec_dropoutRate", values=[0.0, 0.1, 0.2, 0.3]),
        name = "gmf_item_vec_dropout")(gmf_item_vec)

    # multiply to check how much the movie aligns with the user in training
    gmf_vector = Multiply(name = "gmf_vector")([gmf_user_vec, gmf_item_vec])

    #Output Layer
    output = Dense(1, activation = "linear", name = "prediction")(gmf_vector)

    
    model = Model(inputs=[user_input, item_input], outputs=output)
    model.compile(
        optimizer=Adam(learning_rate=hp.Choice("lr", values=[1e-3, 1e-4, 1e-5])),
        loss='mse'
    )

    return model

In [26]:
GMF_tuner = kt.Hyperband(
    buildGMF,
    objective='val_loss',         # Metric to optimize
    max_epochs=10,                # Max epochs to train each model
    factor=3,                     # Reduction factor for resources
    directory='Hyperparameters',  # Where to save logs
    project_name='GMF_tuning'     # Subfolder name
)

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

GMF_tuner.search([user_ids, movie_ids], ratings,
             epochs=10,
             validation_split=0.2,
             batch_size = 512,
             callbacks=[early_stop])

GMF_best_hps = GMF_tuner.get_best_hyperparameters(num_trials=1)[0]
GMF_best_model = GMF_tuner.get_best_models(1)[0]
print(GMF_best_hps.values)

Trial 30 Complete [00h 00m 35s]
val_loss: 1.2457084655761719

Best val_loss So Far: 0.7641966342926025
Total elapsed time: 00h 08m 25s
{'gmf_dim': 32, 'gmf_user_emb_regularization': 1e-06, 'gmf_user_vec_dropoutRate': 0.2, 'gmf_item_vec_dropoutRate': 0.0, 'lr': 0.001, 'tuner/epochs': 10, 'tuner/initial_epoch': 4, 'tuner/bracket': 2, 'tuner/round': 2, 'tuner/trial_id': '0012'}


In [27]:
def buildMLP(hp):
    mlp_dim  = hp.Choice("mlp_dim", values=[16, 32, 64]) # defines dimensions of mlp

    layers_choice = hp.Choice(
        "mlp_layers_choice",
        values=["[64, 32]", "[128, 64]", "[128, 64, 32]", "[64, 32, 16, 8]", "[64, 32, 16]" ]
    )
    reg_L2 = hp.Choice("mlp_user_emb_regularization", values=[1e-4, 1e-5, 1e-6])
    
    # parse it back into an actual Python list
    mlp_layers = eval(layers_choice) # mlp neuron layers

     # Input layer takes item of vector size 1
    user_input = Input(shape=(1,), name='userId')
    item_input = Input(shape=(1,), name='movieId')

    # 4. MLP branch: Captures higher-order and non-linear interactions between user and item factors via multiple neural layers
    mlp_user_emb = Embedding(
        num_users,
        mlp_dim,
        embeddings_regularizer=regularizers.l2(reg_L2),
        name = "mlp_user_emb")(user_input) # embedding for userID
    mlp_item_emb = Embedding(
        num_items,
        mlp_dim,
        embeddings_regularizer=regularizers.l2(reg_L2),
        name = "mlp_item_emb")(item_input) # embedding for movieID
    
    mlp_user_vec = Flatten(name = "mlp_user_vec")(mlp_user_emb) # flatten them
    mlp_user_vec = Dropout(
        hp.Choice("mlp_user_vec_dropoutRate",values=[0.0, 0.1, 0.2, 0.3]),
        name = "mlp_user_vec_dropout")(mlp_user_vec)
    
    mlp_item_vec = Flatten(name = "mlp_item_vec")(mlp_item_emb) # flatten them
    mlp_item_vec = Dropout(
        hp.Choice("mlp_item_vec_dropoutRate",values=[0.0, 0.1, 0.2, 0.3]),
        name = "mlp_item_vec_dropout")(mlp_item_vec)
    
    mlp_vector = Concatenate(name = "mlp_vector")([mlp_user_vec, mlp_item_vec]) # stacks them both together
    
    for units in mlp_layers: 
        mlp_vector = Dense(units)(mlp_vector) # goes through neurons, activation relu to allow complexity
        if hp.Boolean("use_BatchNorm"):
            mlp_vector = BatchNormalization()(mlp_vector)
        mlp_vector = Activation('relu')(mlp_vector)
        mlp_vector = Dropout(
            hp.Choice("mlp_vector_dropoutRate",values=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5]))(mlp_vector) # Isn't given a name, cuz it causes errors due to duplicate names

    output = Dense(1, activation="linear")(mlp_vector)
    
    model = Model(inputs=[user_input, item_input], outputs=output)
    model.compile(
        optimizer=Adam(learning_rate=hp.Choice("lr", values=[1e-3, 1e-4, 1e-5, 1e-6])),
        loss='mse'
    )

    return model

In [28]:
MLP_tuner = kt.Hyperband(
    buildMLP,
    objective='val_loss',         # Metric to optimize
    max_epochs=10,                # Max epochs to train each model
    factor=3,                     # Reduction factor for resources
    directory='Hyperparameters',  # Where to save logs
    project_name='MLP_tuning'     # Subfolder name
)

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

MLP_tuner.search([user_ids, movie_ids],ratings,
             epochs=10,
             validation_split=0.2,
             batch_size = 512,
             callbacks=[early_stop])

MLP_best_hps = MLP_tuner.get_best_hyperparameters(num_trials=1)[0]
MLP_best_model = MLP_tuner.get_best_models(1)[0]
print(MLP_best_hps.values)

Trial 30 Complete [00h 00m 54s]
val_loss: 0.8128757476806641

Best val_loss So Far: 0.8022095561027527
Total elapsed time: 00h 12m 19s
{'mlp_dim': 32, 'mlp_layers_choice': '[128, 64, 32]', 'mlp_user_emb_regularization': 1e-06, 'mlp_user_vec_dropoutRate': 0.1, 'mlp_item_vec_dropoutRate': 0.3, 'use_BatchNorm': False, 'mlp_vector_dropoutRate': 0.3, 'lr': 0.0001, 'tuner/epochs': 10, 'tuner/initial_epoch': 4, 'tuner/bracket': 2, 'tuner/round': 2, 'tuner/trial_id': '0012'}


In [29]:
def build_two_output_GMF_from_hps(best_hps):
    """
    Rebuilds a GMF model with two outputs (vector + score)
    using the tuned hyperparameters.
    To expose the latent vectors in the NeuMF Model
    """

    # Extract tuned hyperparameters
    gmf_dim = best_hps['gmf_dim']
    gmf_emb_regularization = best_hps['gmf_user_emb_regularization']
    gmf_user_vec_dropoutRate = best_hps['gmf_user_vec_dropoutRate']
    gmf_item_vec_dropoutRate = best_hps['gmf_item_vec_dropoutRate']
    learning_rate = best_hps['lr']

    # Input Layers
    user_input = Input(shape=(1,), name='userId')
    item_input = Input(shape=(1,), name='movieId')

    # Embedding layers
    user_emb = Embedding(
        input_dim=num_users,
        output_dim=gmf_dim,
        embeddings_regularizer=regularizers.l2(gmf_emb_regularization),
        embeddings_initializer='he_normal',
        name='gmf_user_emb'
    )(user_input)

    item_emb = Embedding(
        input_dim=num_items,
        output_dim=gmf_dim,
        embeddings_regularizer=regularizers.l2(gmf_emb_regularization),
        embeddings_initializer='he_normal',
        name='gmf_item_emb'
    )(item_input)

    # Flatten + Dropout
    user_vec = Flatten(name='gmf_user_vec')(user_emb)
    user_vec = Dropout(gmf_user_vec_dropoutRate, name='gmf_user_vec_dropout')(user_vec)

    item_vec = Flatten(name='gmf_item_vec')(item_emb)
    item_vec = Dropout(gmf_item_vec_dropoutRate, name='gmf_item_vec_dropout')(item_vec)

    # Interaction vector (latent features)
    gmf_vector = Multiply(name='gmf_vector')([user_vec, item_vec])
    # Score
    gmf_score = Dense(1, activation='linear', name='prediction')(gmf_vector)

    # Build two‐output model
    model = Model(
        inputs=[user_input, item_input],
        outputs=[gmf_vector, gmf_score],
        name='GMF_two_output'
    )

    # Compile so that only the score contributes to loss
    model.compile(
        optimizer=Adam(learning_rate),
        loss=[None, 'mse'],
        loss_weights=[0.0, 1.0]
    )

    return model


In [30]:
def build_two_output_MLP_from_hps(best_hps):
    """
    Rebuilds a MLP model with two outputs (vector + score)
    using the tuned hyperparameters.
    """

    
    # Extract tuned hyperparameters
    mlp_dim = best_hps['mlp_dim']
    mlp_layers = best_hps['mlp_layers_choice']
    mlp_emb_regularization = best_hps['mlp_user_emb_regularization']
    mlp_user_vec_dropoutRate = best_hps['mlp_user_vec_dropoutRate']
    mlp_item_vec_dropoutRate = best_hps['mlp_item_vec_dropoutRate']
    mlp_vector_dropoutRate = best_hps['mlp_vector_dropoutRate']
    learning_rate = best_hps['lr']
    BatchNorm = best_hps['use_BatchNorm']

    # Input Layers
    user_input = Input(shape=(1,), name='userId')
    item_input = Input(shape=(1,), name='movieId')

    # Embedding layers
    user_emb = Embedding(
        input_dim=num_users,
        output_dim=mlp_dim,
        embeddings_regularizer=regularizers.l2(mlp_emb_regularization),
        embeddings_initializer='he_normal',
        name='mlp_user_emb'
    )(user_input)

    item_emb = Embedding(
        input_dim=num_items,
        output_dim=mlp_dim,
        embeddings_regularizer=regularizers.l2(mlp_emb_regularization),
        embeddings_initializer='he_normal',
        name='mlp_item_emb'
    )(item_input)
    
    # Flatten + Dropout
    user_vec = Flatten(name='mlp_user_vec')(user_emb)
    user_vec = Dropout(mlp_user_vec_dropoutRate, name='mlp_user_vec_dropout')(user_vec)

    item_vec = Flatten(name='mlp_item_vec')(item_emb)
    item_vec = Dropout(mlp_item_vec_dropoutRate, name='mlp_item_vec_dropout')(item_vec)
    
    # Interaction vector (latent features)
    mlp_vector = Concatenate(name = "mlp_vector")([user_vec, item_vec]) # stacks them both together
    
    for units in eval(mlp_layers): 
        mlp_vector = Dense(units)(mlp_vector) # goes through neurons, activation relu to allow complexity
        if BatchNorm:
            mlp_vector = BatchNormalization()(mlp_vector)
        mlp_vector = Activation('relu')(mlp_vector)
        mlp_vector = Dropout(mlp_vector_dropoutRate)(mlp_vector) # Isn't given a name, cuz it causes errors due to duplicate names

    # Score 
    mlp_score = Dense(1, activation="linear")(mlp_vector)    

    # Build two‐output model
    model = Model(
        inputs=[user_input, item_input],
        outputs=[mlp_vector, mlp_score],
        name='MLP_two_output'
    )
    
    # Compile so that only the score contributes to loss
    model.compile(
        optimizer=Adam(learning_rate),
        loss=[None, 'mse'],
        loss_weights=[0.0, 1.0]
    )

    return model

In [31]:
# configure the callback
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',    # watch validation loss
    factor=0.5,            # multiply LR by this factor on plateau
    patience=2,            # wait this many epochs with no improvement
    min_lr=1e-7,           # don’t go below this LR
    verbose=1              # print messages when LR is reduced
)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    verbose=1,
    restore_best_weights=True,
    baseline = 0.001
)

# now include it in your fit call
GMF_Final = build_two_output_GMF_from_hps(GMF_best_hps)
gmf_dummy = np.zeros((ratings.shape[0], GMF_best_hps['gmf_dim']), dtype=np.float32)
GMF_Final.fit(
    x=[user_ids, movie_ids],
    y= [gmf_dummy, ratings],
    epochs=10,
    batch_size=512,
    validation_split=0.2,
    callbacks = [reduce_lr, early_stop]
)


MLP_Final = build_two_output_MLP_from_hps(MLP_best_hps)
mlp_dummy = np.zeros((ratings.shape[0], MLP_best_hps['mlp_dim']), dtype=np.float32)
MLP_Final.fit(
    x=[user_ids, movie_ids],
    y= [mlp_dummy, ratings],
    epochs=10,
    batch_size=512,
    validation_split=0.2,
    callbacks = [reduce_lr, early_stop]
)

Epoch 1/10
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 1.1908 - val_loss: 0.8566 - learning_rate: 0.0010
Epoch 2/10
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.8170 - val_loss: 0.7992 - learning_rate: 0.0010
Epoch 3/10
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.7534 - val_loss: 0.7789 - learning_rate: 0.0010
Epoch 4/10
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.7087 - val_loss: 0.7685 - learning_rate: 0.0010
Epoch 5/10
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.6683 - val_loss: 0.7658 - learning_rate: 0.0010
Epoch 6/10
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.6340 - val_loss: 0.7684 - learning_rate: 0.0010
Epoch 6: early stopping
Restoring model weights from the end of the best epoch: 5.
Epoch 1/10
[1m1251/1251[0m [32m━━━━━━━

<keras.src.callbacks.history.History at 0x7f4e0aa00f90>

In [32]:
# Extra Training If needed
GMF_Final.fit(
    x=[user_ids, movie_ids],
    y= [gmf_dummy, ratings],
    epochs=10,
    batch_size=512,
    validation_split=0.2,
    callbacks = [reduce_lr, early_stop]
)



Epoch 1/10
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.6301 - val_loss: 0.7685 - learning_rate: 0.0010
Epoch 2/10
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.6051 - val_loss: 0.7748 - learning_rate: 0.0010
Epoch 3/10
[1m1244/1251[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - loss: 0.5813
Epoch 3: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.5813 - val_loss: 0.7828 - learning_rate: 0.0010
Epoch 4/10
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.5546 - val_loss: 0.7863 - learning_rate: 5.0000e-04
Epoch 5/10
[1m1237/1251[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - loss: 0.5470
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[

<keras.src.callbacks.history.History at 0x7f4e0aaceed0>

In [35]:
# Extra Training If needed
MLP_Final.fit(
    x=[user_ids, movie_ids],
    y= [mlp_dummy, ratings],
    epochs=10,
    batch_size=512,
    validation_split=0.2,
    callbacks = [reduce_lr, early_stop]
)

Epoch 1/10
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.7527 - val_loss: 0.7778 - learning_rate: 1.0000e-04
Epoch 2/10
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.7527 - val_loss: 0.7778 - learning_rate: 1.0000e-04
Epoch 3/10
[1m1250/1251[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - loss: 0.7488
Epoch 3: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.7488 - val_loss: 0.7778 - learning_rate: 1.0000e-04
Epoch 4/10
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.7446 - val_loss: 0.7771 - learning_rate: 5.0000e-05
Epoch 5/10
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.7400 - val_loss: 0.7768 - learning_rate: 5.0000e-05
Epoch 6/10
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms

<keras.src.callbacks.history.History at 0x7f4e0999d410>

In [68]:
class MyNeuMF(HyperModel):
    def __init__(self,gmf_model, mlp_model, num_users, num_items):
        self.gmf_model = gmf_model
        self.mlp_model = mlp_model
        self.num_users = num_users
        self.num_items = num_items


    def build(self, hp):
        """
        Combines pretrained GMF and MLP two-output models into one NeuMF.
        
        Args:
          gmf_model: a Keras Model with outputs [gmf_vector, gmf_score]
          mlp_model: a Keras Model with outputs [mlp_vector, mlp_score]
          num_users, num_items: sizes for bias embeddings
          lr: learning rate for final NeuMF (For fine Tuning)
        
        Returns:
          neu_model: compiled Keras Model ready for fine-tuning
        """
        # 1) Input Layer
        user_input = Input(shape=(1,), name='userId')
        item_input = Input(shape=(1,), name='movieId')
        
        # 2) Call the pretrained branches, grab only the vector outputs
        gmf_vector, _ = self.gmf_model([user_input, item_input])
        mlp_vector, _ = self.mlp_model([user_input, item_input])

        gmf_vector = Dropout(hp.Choice("gmf_dropout", values=[0.0,0.1,0.2,0.3,0.4,0.5]))(gmf_vector)
        mlp_vector = Dropout(hp.Choice("mlp_dropout", values=[0.0,0.1,0.2,0.3,0.4,0.5]))(mlp_vector)
        
         # 3) Bias embeddings (one scalar per user / per item)
        user_bias = Embedding(self.num_users, 1,embeddings_initializer='zeros', name='user_bias')(user_input)      # (batch,1,1)
        item_bias = Embedding(self.num_items, 1,embeddings_initializer='zeros', name='item_bias')(item_input)     # (batch,1,1)
        user_bias = Flatten(name='user_bias_flat')(user_bias)                 # (batch,1)
        item_bias = Flatten(name='item_bias_flat')(item_bias)                 # (batch,1)
        bias_vector = Add(name='bias_add')([user_bias, item_bias])            # (batch,1)
        
    
        # 4) Fuse GMF+MLP feature vectors and append bias
        fusion = Concatenate(name='fusion')([gmf_vector, mlp_vector])         # (batch, D_gmf+D_mlp)

        if hp.Boolean("use_BatchNorm"):
            fusion = BatchNormalization()(fusion)
        # 5) Final prediction head (no additional bias, since bias is in the features)
        raw_score = Dense(
            1,
            activation='linear',
            use_bias=False,
            name='raw_score'
        )(fusion)
    
        output = Add(name = "prediction")([raw_score, bias_vector])
        
        # 6) Build & compile NeuMF
        model = Model([user_input, item_input], output, name='NeuMF')
        model.compile(
            optimizer=Adam(hp.Choice("lr", values=[1e-2,1e-3,1e-4,1e-5])),
            loss='mse'
        )
        return model
   

In [93]:
GMF_Final.trainable = False
MLP_Final.trainable = False

NeuMF = MyNeuMF(GMF_Final, MLP_Final, num_users, num_items)

tuner = kt.Hyperband(
    NeuMF,
    objective='val_loss',
    max_epochs=10,
    factor=3,
    directory='Hyperparameters',
    project_name='NeuMF_tuning'
)

history1 = tuner.search(
    x=[user_ids, movie_ids],
    y=ratings,
    epochs=10,
    validation_split=0.2,
    batch_size=512,
    callbacks = [reduce_lr, early_stop]
)

NeuMF = tuner.get_best_models(1)[0]
NeuMF_best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print(NeuMF_best_hp.values)

Reloading Tuner from Hyperparameters/NeuMF_tuning/tuner0.json
{'gmf_dropout': 0.4, 'mlp_dropout': 0.4, 'use_BatchNorm': False, 'lr': 0.001, 'tuner/epochs': 10, 'tuner/initial_epoch': 0, 'tuner/bracket': 0, 'tuner/round': 0}


  saveable.load_own_variables(weights_store.get(inner_path))


In [72]:
lr = NeuMF_best_hp["lr"]
new_lr = lr / 2

NeuMF.trainable = True

# Recompile with a smaller LR (to avoid blowing away pretrained weights)
NeuMF.compile(optimizer=Adam(new_lr), loss='mse')

# Continue training end-to-end
NeuMF.fit(
    x=[user_ids, movie_ids],
    y=ratings,
    epochs=20,                      # the remaining epochs
    batch_size=512,
    validation_split=0.2,
    callbacks=[reduce_lr, early_stop]
)

Epoch 1/20
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - loss: 0.6589 - val_loss: 0.7471 - learning_rate: 5.0000e-04
Epoch 2/20
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.6587 - val_loss: 0.7468 - learning_rate: 5.0000e-04
Epoch 3/20
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.6583 - val_loss: 0.7470 - learning_rate: 5.0000e-04
Epoch 4/20
[1m1248/1251[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - loss: 0.6569
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.6569 - val_loss: 0.7468 - learning_rate: 5.0000e-04
Epoch 5/20
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.6574 - val_loss: 0.7467 - learning_rate: 2.5000e-04
Epoch 6/20
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3m

<keras.src.callbacks.history.History at 0x7f4e11f56550>

In [None]:
# If we want any more epochs
NeuMF.fit(
    x=[user_ids, movie_ids],
    y=ratings,
    epochs=25,
    batch_size=512,
    validation_split=0.2,
    callbacks=[reduce_lr, early_stop]
)

In [90]:
def round_to_half(x):
    return round(x * 2) / 2.0

In [92]:
# Testing:
# Example of making a prediction and converting back to original scale
sample_user_idx = data['X_user_test']
sample_movie_idx = data['X_movie_test']
actual_centered_ratings = data['y_test']
actual_original_ratings = actual_centered_ratings + mean_rating_val

predicted_centered_ratings = NeuMF.predict([sample_user_idx, sample_movie_idx])
predicted_original_ratings = np.clip(np.round((predicted_centered_ratings.flatten() + mean_rating_val)*2)/2.0, 1, 5)

print("\nSample Predictions:")
for i in range(5):
    print(f"User: {sample_user_idx[i]}, Movie: {sample_movie_idx[i]} | Actual (orig): {actual_original_ratings[i]:.2f} | Predicted (orig): {predicted_original_ratings[i]:.2f}")

mse  = mean_squared_error(actual_original_ratings, predicted_original_ratings)
rmse = np.sqrt(mse)
mae  = mean_absolute_error(actual_original_ratings, predicted_original_ratings)

print(f"Test MSE:  {mse:.4f}")
print(f"Test RMSE: {rmse:.4f}")
print(f"Test MAE:  {mae:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step

Sample Predictions:
User: 5411, Movie: 2480 | Actual (orig): 2.00 | Predicted (orig): 3.00
User: 5439, Movie: 843 | Actual (orig): 5.00 | Predicted (orig): 5.00
User: 367, Movie: 3475 | Actual (orig): 4.00 | Predicted (orig): 3.50
User: 424, Movie: 1574 | Actual (orig): 4.00 | Predicted (orig): 3.50
User: 4941, Movie: 3455 | Actual (orig): 1.00 | Predicted (orig): 3.00
Test MSE:  1.1000
Test RMSE: 1.0488
Test MAE:  0.8000


In [None]:
print(predicted_original_ratings.shape)
#Trivial Case Model Performance:
guess = np.ones(predicted_original_ratings.shape) * mean_rating_val
guess_mse  = mean_squared_error(actual_original_ratings, guess)
rmse = np.sqrt(mse)
guess_mae  = mean_absolute_error(actual_original_ratings, guess)

print(f"Guess MSE:  {guess_mse:.4f}")
print(f"Guess MAE:  {guess_mae:.4f}")

# Content-Based Filtering Model

When you think of content-based filtering, think of statements like:
- Because you liked horror
- Because you searched laptops

### 🧠 “Attribute Similarity”
The system uses the metadata or features of items (or users) directly.

> It recommends items with similar features to what you liked, not because other users liked them.

### 📦 How It Works:
- Uses item (or user) attributes: genres, categories, descriptions
- Builds a user profile from liked item features
- Compares feature vectors (e.g., via cosine similarity)

finds patterns in features

Collaborative filtering learns from who likes what, no matter what it is.

Content-based filtering learns from what the thing is, no matter who liked it.

In [81]:
GMF_Final.summary()
MLP_Final.summary()
NeuMF.summary()

In [94]:
import matplotlib.pyplot as plt

def plot_mse_history(history):
    """
    Plots training vs validation MSE over epochs.
    Expects a Keras History object with keys 'loss' and 'val_loss'.
    """
    epochs = range(1, len(history.history['loss']) + 1)
    
    plt.figure()
    plt.plot(epochs, history.history['loss'], label='Training MSE')
    plt.plot(epochs, history.history['val_loss'], label='Validation MSE')
    plt.title('Training and Validation MSE over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('MSE')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_mse_history(history1)

AttributeError: 'NoneType' object has no attribute 'history'

In [96]:
GMF_Final.save('GMF_Model.keras')
MLP_Final.save("MLP_Model.keras")
NeuMF.save("NeuMF_Model.keras")