# CS412 - Assignment 3
## Atesam Abdullah | 2021114

# 1 Install Dependencies


In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

from tensorflow.keras import layers, Model
from tensorflow.keras.layers import StringLookup

print("TF version:", tf.__version__)
print("TFDS version:", tfds.__version__)
print("TFRS version:", tfrs.__version__)

TF version: 2.15.1
TFDS version: 4.9.8
TFRS version: v0.7.3


# 2 Load Data & Metadata

We need:
- **movie titles** from `movielens/100k-movies` (1 682 titles)
- **ratings** from `movielens/100k-ratings` (100 000 interactions)


In [2]:
# Load movie titles for lookup
movies_ds = tfds.load(
    "movielens/100k-movies",
    split="train"
).map(lambda x: x["movie_title"])  # yields 1 682 strings

# Build movie lookup
movie_lookup = StringLookup(mask_token=None, oov_token="[UNK]")
movie_lookup.adapt(movies_ds)

# Load ratings and preprocess
def preprocess(x):
    return {
        "user_id": tf.cast(x["user_id"], tf.string),
        "movie_title": x["movie_title"],
        "rating": tf.cast(x["user_rating"], tf.float32),
    }

ratings = tfds.load(
    "movielens/100k-ratings",
    split="train",
    shuffle_files=True
).map(preprocess)

# 3 Build User Lookup & Dataset Splits

- Adapt a **user** `StringLookup` on the 100 000 ratings (~943 unique IDs).  
- Shuffle & split 80 % train / 20 % test.  
- Batch & cache.


In [3]:
# User lookup
user_lookup = StringLookup(mask_token=None, oov_token="[UNK]")
user_lookup.adapt(ratings.map(lambda x: x["user_id"]))

# Inspect vocab sizes
num_users  = user_lookup.vocabulary_size()
num_movies = movie_lookup.vocabulary_size()
print(f"Users: {num_users}, Movies: {num_movies}")

Users: 944, Movies: 1665


# 4 Shuffle & split


In [4]:
tf.random.set_seed(42)

shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80_000).batch(4096).cache()
test  = shuffled.skip(80_000).batch(2048).cache()

# 5 Define the MLP Model

- Embed user & movie, concatenate, pass through dense layers → predict rating.


In [5]:
  class MLPModel(tf.keras.Model):
    def __init__(self, user_vocab_size, movie_vocab_size, embedding_dim=32, hidden_units=[64,32]):
        super().__init__()
        self.user_emb = layers.Embedding(user_vocab_size, embedding_dim)
        self.movie_emb = layers.Embedding(movie_vocab_size, embedding_dim)
        self.mlp = tf.keras.Sequential([
            layers.Dense(units, activation="relu") for units in hidden_units
        ] + [layers.Dense(1)])

    def call(self, inputs):
        u, m = inputs
        u_vec = self.user_emb(u)
        m_vec = self.movie_emb(m)
        x = tf.concat([u_vec, m_vec], axis=1)
        return self.mlp(x)

class MLPRecommender(tfrs.models.Model):
    def __init__(self, user_vocab_size, movie_vocab_size):
        super().__init__()
        self.model = MLPModel(user_vocab_size, movie_vocab_size)
        self.task = tfrs.tasks.Ranking(
            loss=tf.keras.losses.MeanSquaredError(),
            metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse")]
        )

    def call(self, features):
        # apply lookups
        u = user_lookup(features["user_id"])
        m = movie_lookup(features["movie_title"])
        return self.model((u, m))

    def compute_loss(self, features, training=False):
        labels = features.pop("rating")
        preds  = self(features)
        return self.task(labels=labels, predictions=preds)

# 6 Define the NeuMF Model

- Two paths: GMF (element-wise product) + MLP → merge → predict.


In [6]:
class NeuMFModel(tf.keras.Model):
    def __init__(self, user_vocab_size, movie_vocab_size, embedding_dim=32, mlp_units=[64,32]):
        super().__init__()
        # GMF path
        self.gmf_user = layers.Embedding(user_vocab_size, embedding_dim)
        self.gmf_movie = layers.Embedding(movie_vocab_size, embedding_dim)
        # MLP path
        self.mlp_user = layers.Embedding(user_vocab_size, embedding_dim)
        self.mlp_movie = layers.Embedding(movie_vocab_size, embedding_dim)
        self.mlp = tf.keras.Sequential([
            layers.Dense(u, activation="relu") for u in mlp_units
        ])
        # Final prediction
        self.out = layers.Dense(1)

    def call(self, inputs):
        u_id, m_id = inputs
        # GMF
        g = self.gmf_user(u_id) * self.gmf_movie(m_id)
        # MLP
        concat = tf.concat([self.mlp_user(u_id), self.mlp_movie(m_id)], axis=1)
        mlp_out = self.mlp(concat)
        # Merge
        x = tf.concat([g, mlp_out], axis=1)
        return self.out(x)

class NeuMFRecommender(tfrs.models.Model):
    def __init__(self, user_vocab_size, movie_vocab_size):
        super().__init__()
        self.model = NeuMFModel(user_vocab_size, movie_vocab_size)
        self.task = tfrs.tasks.Ranking(
            loss=tf.keras.losses.MeanSquaredError(),
            metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse")]
        )

    def call(self, features):
        u = user_lookup(features["user_id"])
        m = movie_lookup(features["movie_title"])
        return self.model((u, m))

    def compute_loss(self, features, training=False):
        labels = features.pop("rating")
        preds  = self(features)
        return self.task(labels=labels, predictions=preds)

# 7 Compile & Train

Train each model for 15 epochs.


In [8]:
# 7.1 MLP
# Instantiate and compile the MLP recommender
mlp = MLPRecommender(num_users, num_movies)
mlp.compile(
    optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1),
    metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse")]
)

print("▶️ Starting training: MLP model (25 epochs)...")
history_mlp = mlp.fit(
    train,
    validation_data=test,
    epochs=25,
    verbose=2  # show one line per epoch
)
print("✅ Finished training: MLP model!")

# 7.2 NeuMF
# Instantiate and compile the NeuMF recommender
nemf = NeuMFRecommender(num_users, num_movies)
nemf.compile(
    optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1),
    metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse")]
)

print("▶️ Starting training: NeuMF model (25 epochs)...")
history_nemf = nemf.fit(
    train,
    validation_data=test,
    epochs=25,
    verbose=2
)
print("✅ Finished training: NeuMF model!")

▶️ Starting training: MLP model (25 epochs)...
Epoch 1/25
20/20 - 1s - rmse: 1.5641 - loss: 1.1718 - regularization_loss: 0.0000e+00 - total_loss: 1.1718 - val_rmse: 1.0965 - val_loss: 1.2004 - val_regularization_loss: 0.0000e+00 - val_total_loss: 1.2004 - 843ms/epoch - 42ms/step
Epoch 2/25
20/20 - 0s - rmse: 1.0792 - loss: 1.0658 - regularization_loss: 0.0000e+00 - total_loss: 1.0658 - val_rmse: 1.0479 - val_loss: 1.0927 - val_regularization_loss: 0.0000e+00 - val_total_loss: 1.0927 - 214ms/epoch - 11ms/step
Epoch 3/25
20/20 - 0s - rmse: 1.0302 - loss: 0.9666 - regularization_loss: 0.0000e+00 - total_loss: 0.9666 - val_rmse: 0.9980 - val_loss: 0.9977 - val_regularization_loss: 0.0000e+00 - val_total_loss: 0.9977 - 215ms/epoch - 11ms/step
Epoch 4/25
20/20 - 0s - rmse: 0.9830 - loss: 0.9144 - regularization_loss: 0.0000e+00 - total_loss: 0.9144 - val_rmse: 0.9723 - val_loss: 0.9505 - val_regularization_loss: 0.0000e+00 - val_total_loss: 0.9505 - 193ms/epoch - 10ms/step
Epoch 5/25
20/20 

# 8 Evaluate & Compare Models

In this step we:

1. Compute the **RMSE** on the test set for both models.  
2. Print out the metrics side by side.  
3. Summarize which model performs better.


In [9]:
# 8.1 Evaluate on test set
mlp_results = mlp.evaluate(test, return_dict=True, verbose=0)
nemf_results = nemf.evaluate(test, return_dict=True, verbose=0)

# 8.2 Extract RMSE values
mlp_rmse  = mlp_results["rmse"]
nemf_rmse = nemf_results["rmse"]

# 8.3 Display raw outputs
print(f"MLP Evaluation Results:\n  Loss = {mlp_results['loss']:.4f}\n  RMSE = {mlp_rmse:.4f}\n")
print(f"NeuMF Evaluation Results:\n  Loss = {nemf_results['loss']:.4f}\n  RMSE = {nemf_rmse:.4f}\n")

# 8.4 Compare side by side
if mlp_rmse < nemf_rmse:
    better = "MLP"
    improvement = (nemf_rmse - mlp_rmse) / nemf_rmse * 100
else:
    better = "NeuMF"
    improvement = (mlp_rmse - nemf_rmse) / mlp_rmse * 100

print("📝 Comparison:")
print(f"  → {better} achieves the lower RMSE.")
print(f"  → Improvement: {abs(improvement):.2f}% relative reduction in RMSE.")

MLP Evaluation Results:
  Loss = 0.8893
  RMSE = 0.9368

NeuMF Evaluation Results:
  Loss = 0.8899
  RMSE = 0.9359

📝 Comparison:
  → NeuMF achieves the lower RMSE.
  → Improvement: 0.09% relative reduction in RMSE.


In [10]:
print(f"{'User':<6} {'Movie':<40} {'Actual':<6} {'MLP Pred':<8} {'NeuMF Pred':<10}")
print("-"*80)

# Unbatch the test set, take first 10 examples
for example in test.unbatch().take(10):
    user = example['user_id']
    movie = example['movie_title']
    actual = example['rating'].numpy()

    # Wrap into a batch of size 1 for prediction
    features = {
        "user_id": tf.expand_dims(user, 0),
        "movie_title": tf.expand_dims(movie, 0),
    }

    pred_mlp  = mlp(features).numpy()[0][0]
    pred_nemf = nemf(features).numpy()[0][0]

    print(f"{user.numpy().decode():<6} "
          f"{movie.numpy().decode():<40} "
          f"{actual:<6.1f} "
          f"{pred_mlp:<8.2f} "
          f"{pred_nemf:<10.2f}")

User   Movie                                    Actual MLP Pred NeuMF Pred
--------------------------------------------------------------------------------
346    M*A*S*H (1970)                           4.0    3.79     3.79      
602    Volcano (1997)                           4.0    3.32     3.31      
393    2001: A Space Odyssey (1968)             1.0    4.11     4.12      
152    Dances with Wolves (1990)                5.0    4.69     4.71      
738    Speed (1994)                             3.0    3.63     3.62      
382    Swingers (1996)                          2.0    3.71     3.72      
85     Casablanca (1942)                        5.0    4.24     4.25      
152    Independence Day (ID4) (1996)            5.0    4.25     4.25      
186    Hoodlum (1997)                           3.0    2.99     2.99      
130    Renaissance Man (1994)                   4.0    3.62     3.63      
