In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout
from sklearn.model_selection import train_test_split


In [2]:
# Load ratings dataset
ratings = pd.read_csv("ml-latest-small/ratings.csv")

# Scale ratings between 0 and 1 (implicit feedback)
ratings['rating'] = ratings['rating'].apply(lambda x: 1 if x >= 3.5 else 0)

# Get unique user and movie IDs
user_ids = ratings['userId'].unique().tolist()
movie_ids = ratings['movieId'].unique().tolist()

# Map users and movies to indices
user_id_map = {id: idx for idx, id in enumerate(user_ids)}
movie_id_map = {id: idx for idx, id in enumerate(movie_ids)}

# Replace original IDs with mapped indices
ratings['userId'] = ratings['userId'].map(user_id_map)
ratings['movieId'] = ratings['movieId'].map(movie_id_map)

# Split into training and testing sets
train, test = train_test_split(ratings, test_size=0.2, random_state=42)


In [3]:
# Define model parameters
num_users = len(user_id_map)
num_movies = len(movie_id_map)
embedding_dim = 50  # Dimensionality of embedding space

# Define input layers
user_input = Input(shape=(1,), name='User_Input')
movie_input = Input(shape=(1,), name='Movie_Input')

# Create embedding layers
user_embedding = Embedding(input_dim=num_users, output_dim=embedding_dim, name='User_Embedding')(user_input)
movie_embedding = Embedding(input_dim=num_movies, output_dim=embedding_dim, name='Movie_Embedding')(movie_input)

# Flatten embeddings
user_vector = Flatten()(user_embedding)
movie_vector = Flatten()(movie_embedding)

# Concatenate user and movie embeddings
concat = Concatenate()([user_vector, movie_vector])

# Fully connected layers
dense_1 = Dense(128, activation='relu')(concat)
dropout_1 = Dropout(0.3)(dense_1)
dense_2 = Dense(64, activation='relu')(dropout_1)
dropout_2 = Dropout(0.3)(dense_2)
dense_3 = Dense(32, activation='relu')(dropout_2)

# Output layer (binary classification: liked/not liked)
output = Dense(1, activation='sigmoid')(dense_3)

# Build and compile model
ncf_model = Model(inputs=[user_input, movie_input], outputs=output)
ncf_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Show model summary
ncf_model.summary()


In [4]:
# Prepare training data
train_user_input = train['userId'].values
train_movie_input = train['movieId'].values
train_labels = train['rating'].values  # 1 for liked, 0 for not liked

# Train model
history = ncf_model.fit(
    [train_user_input, train_movie_input], train_labels,
    epochs=10, batch_size=64, validation_split=0.1, verbose=1
)


Epoch 1/10
[1m1135/1135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6767 - loss: 0.5971 - val_accuracy: 0.7300 - val_loss: 0.5370
Epoch 2/10
[1m1135/1135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7637 - loss: 0.4901 - val_accuracy: 0.7305 - val_loss: 0.5394
Epoch 3/10
[1m1135/1135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7860 - loss: 0.4511 - val_accuracy: 0.7296 - val_loss: 0.5466
Epoch 4/10
[1m1135/1135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7999 - loss: 0.4220 - val_accuracy: 0.7265 - val_loss: 0.5695
Epoch 5/10
[1m1135/1135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8181 - loss: 0.3919 - val_accuracy: 0.7233 - val_loss: 0.6113
Epoch 6/10
[1m1135/1135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8367 - loss: 0.3578 - val_accuracy: 0.7205 - val_loss: 0.6454
Epoch 7/10
[1m1

In [5]:
# Prepare test data
test_user_input = test['userId'].values
test_movie_input = test['movieId'].values
test_labels = test['rating'].values

# Get predictions
predictions = ncf_model.predict([test_user_input, test_movie_input])
predicted_ratings = (predictions > 0.5).astype(int)

# Compute Accuracy
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(test_labels, predicted_ratings)
print(f"NCF Accuracy: {accuracy:.4f}")


[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 266us/step
NCF Accuracy: 0.7066


In [6]:
from collections import defaultdict

# Function to compute Precision@K and Recall@K
def precision_recall_at_k(y_true, y_pred, k=10):
    user_ratings = defaultdict(list)

    for uid, actual, pred in zip(test['userId'], y_true, y_pred):
        user_ratings[uid].append((actual, pred))

    precisions, recalls = [], []

    for uid, ratings in user_ratings.items():
        ratings.sort(key=lambda x: x[1], reverse=True)  # Sort by predicted score
        top_k = ratings[:k]

        relevant = sum((actual == 1) for actual, _ in ratings)
        recommended_relevant = sum((actual == 1) for actual, _ in top_k)

        precision = recommended_relevant / k
        recall = recommended_relevant / relevant if relevant > 0 else 0

        precisions.append(precision)
        recalls.append(recall)

    return np.mean(precisions), np.mean(recalls)

# Compute Precision@K and Recall@K for NCF
precision_ncf, recall_ncf = precision_recall_at_k(test_labels, predictions, k=10)
print(f"NCF Precision@10: {precision_ncf:.4f}, Recall@10: {recall_ncf:.4f}")


NCF Precision@10: 0.6005, Recall@10: 0.6496


In [10]:
import numpy as np


def ndcg_at_k(y_true, y_pred, k):
    ndcg_values = []

    # Loop through each user and their labels/predictions
    for actual, pred in zip(y_true, y_pred):
        # Ensure actual is a list or array
        if not isinstance(actual, (list, np.ndarray)):
            actual = [actual]  # Wrap single values in a list

        # Sort both by relevance
        ideal_ranking = sorted(actual, reverse=True)[:k]
        pred_ranking = [x for _, x in sorted(zip(pred, actual), reverse=True)][:k]

        # Compute DCG
        dcg = sum((rel / np.log2(i + 2)) for i, rel in enumerate(pred_ranking))
        # Compute IDCG
        idcg = sum((rel / np.log2(i + 2)) for i, rel in enumerate(ideal_ranking))
        # Handle division by zero in case idcg is 0
        ndcg = dcg / idcg if idcg > 0 else 0
        ndcg_values.append(ndcg)

    return np.mean(ndcg_values)


# Dummy data for testing
test_labels = [[3, 2, 3, 0, 1], [2, 1, 0, 0, 0]]  # True relevance scores per user
predictions = [[0.9, 0.8, 0.6, 0.5, 0.4], [0.7, 0.6, 0.2, 0.1, 0.0]]  # Predicted scores per user

# Compute NDCG at k=10
ndcg_ncf = ndcg_at_k(test_labels, predictions, k=10)
print(f"NCF NDCG@10: {ndcg_ncf:.4f}")


NCF NDCG@10: 0.9862
