In [57]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
import warnings
warnings.filterwarnings('ignore')

In [58]:
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])
movies = pd.read_csv('ml-100k/u.item', sep='|', encoding='latin-1', header=None, 
                     names=['movie_id', 'title', 'release_date', 'video_release_date', 
                            'imdb_url', 'unknown', 'Action', 'Adventure', 'Animation',
                            'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
                            'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
                            'Thriller', 'War', 'Western'])


In [60]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(ratings, test_size=0.2, random_state=42)



In [61]:
user_ids = ratings['user_id'].unique()
movie_ids = ratings['item_id'].unique()

user2idx = {user_id: idx for idx, user_id in enumerate(user_ids)}
idx2user = {idx: user_id for user_id, idx in user2idx.items()}

movie2idx = {movie_id: idx for idx, movie_id in enumerate(movie_ids)}
idx2movie = {idx: movie_id for movie_id, idx in movie2idx.items()}


In [62]:
train_data = train_data.copy()
train_data['user_idx'] = train_data['user_id'].map(user2idx)
train_data['movie_idx'] = train_data['item_id'].map(movie2idx)
train_data['rating_normalized'] = (train_data['rating'] - 1) / 4

test_data = test_data.copy()
test_data['user_idx'] = test_data['user_id'].map(user2idx)
test_data['movie_idx'] = test_data['item_id'].map(movie2idx)
test_data['rating_normalized'] = (test_data['rating'] - 1) / 4


In [63]:
X_train_user = train_data['user_idx'].values
X_train_movie = train_data['movie_idx'].values
y_train = train_data['rating_normalized'].values

X_test_user = test_data['user_idx'].values
X_test_movie = test_data['movie_idx'].values
y_test = test_data['rating_normalized'].values

num_users = len(user_ids)
num_movies = len(movie_ids)

In [64]:
def create_ncf_model(num_users, num_movies, embedding_dim=50, hidden_layers=[64, 32, 16]):
    user_input = layers.Input(shape=(1,), name='user_input')
    movie_input = layers.Input(shape=(1,), name='movie_input')
    
    user_embedding_gmf = layers.Embedding(
        num_users, embedding_dim, 
        embeddings_regularizer=l2(1e-6),
        name='user_embedding_gmf'
    )(user_input)
    movie_embedding_gmf = layers.Embedding(
        num_movies, embedding_dim,
        embeddings_regularizer=l2(1e-6),
        name='movie_embedding_gmf'
    )(movie_input)
    
    user_vec_gmf = layers.Flatten()(user_embedding_gmf)
    movie_vec_gmf = layers.Flatten()(movie_embedding_gmf)
    
    gmf_vector = layers.Multiply()([user_vec_gmf, movie_vec_gmf])
    
    user_embedding_mlp = layers.Embedding(
        num_users, embedding_dim,
        embeddings_regularizer=l2(1e-6),
        name='user_embedding_mlp'
    )(user_input)
    movie_embedding_mlp = layers.Embedding(
        num_movies, embedding_dim,
        embeddings_regularizer=l2(1e-6),
        name='movie_embedding_mlp'
    )(movie_input)
    
    user_vec_mlp = layers.Flatten()(user_embedding_mlp)
    movie_vec_mlp = layers.Flatten()(movie_embedding_mlp)
    
    mlp_vector = layers.Concatenate()([user_vec_mlp, movie_vec_mlp])
    
    for units in hidden_layers:
        mlp_vector = layers.Dense(
            units, 
            activation='relu',
            kernel_regularizer=l2(1e-6)
        )(mlp_vector)
        mlp_vector = layers.Dropout(0.2)(mlp_vector)
    
    combined = layers.Concatenate()([gmf_vector, mlp_vector])
    
    output = layers.Dense(1, activation='sigmoid', name='output')(combined)
    
    model = Model(inputs=[user_input, movie_input], outputs=output)
    
    return model

In [66]:
ncf_model = create_ncf_model(num_users, num_movies, embedding_dim=50)

ncf_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae']
)

print("\nModel Architecture:")
ncf_model.summary()


Model Architecture:


In [67]:
history = ncf_model.fit(
    [X_train_user, X_train_movie],
    y_train,
    batch_size=256,
    epochs=10,
    validation_split=0.1,
    verbose=1
)

test_loss, test_mae = ncf_model.evaluate([X_test_user, X_test_movie], y_test, verbose=0)
print(f"Test Loss (MSE): {test_loss:.4f}")
print(f"Test MAE: {test_mae:.4f}")

Epoch 1/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.0674 - mae: 0.2078 - val_loss: 0.0578 - val_mae: 0.1925
Epoch 2/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0562 - mae: 0.1880 - val_loss: 0.0561 - val_mae: 0.1874
Epoch 3/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 964us/step - loss: 0.0536 - mae: 0.1829 - val_loss: 0.0552 - val_mae: 0.1852
Epoch 4/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 976us/step - loss: 0.0480 - mae: 0.1720 - val_loss: 0.0544 - val_mae: 0.1821
Epoch 5/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 955us/step - loss: 0.0401 - mae: 0.1554 - val_loss: 0.0558 - val_mae: 0.1826
Epoch 6/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 990us/step - loss: 0.0325 - mae: 0.1379 - val_loss: 0.0574 - val_mae: 0.1853
Epoch 7/10
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms

In [68]:
def create_simple_embedding_model(num_users, num_movies, embedding_dim=50):
    user_input = layers.Input(shape=(1,), name='user_input')
    movie_input = layers.Input(shape=(1,), name='movie_input')
    
    user_embedding = layers.Embedding(
        num_users, embedding_dim,
        embeddings_regularizer=l2(1e-6)
    )(user_input)
    user_vec = layers.Flatten()(user_embedding)
    
    movie_embedding = layers.Embedding(
        num_movies, embedding_dim,
        embeddings_regularizer=l2(1e-6)
    )(movie_input)
    movie_vec = layers.Flatten()(movie_embedding)
    
    dot_product = layers.Dot(axes=1)([user_vec, movie_vec])
    
    user_bias = layers.Embedding(num_users, 1)(user_input)
    user_bias = layers.Flatten()(user_bias)
    
    movie_bias = layers.Embedding(num_movies, 1)(movie_input)
    movie_bias = layers.Flatten()(movie_bias)
    
    output = layers.Add()([dot_product, user_bias, movie_bias])
    output = layers.Activation('sigmoid')(output)
    
    model = Model(inputs=[user_input, movie_input], outputs=output)
    
    return model

In [None]:
def recommend_movies(user_id, N=10, model=ncf_model):
    if user_id not in user2idx:
        print(f"User {user_id} not found")
        return []
    
    user_idx = user2idx[user_id]
    
    user_train_movies = train_data[train_data['user_id'] == user_id]['item_id'].values
    rated_movie_indices = [movie2idx[m] for m in user_train_movies if m in movie2idx]
    
    all_movie_indices = list(range(num_movies))
    
    unrated_movie_indices = [m for m in all_movie_indices if m not in rated_movie_indices]
    
    if len(unrated_movie_indices) == 0:
        return []
    
    user_indices = np.array([user_idx] * len(unrated_movie_indices))
    movie_indices = np.array(unrated_movie_indices)
    
    predictions = model.predict([user_indices, movie_indices], verbose=0)
    predictions = predictions.flatten()
    
    predictions = predictions * 4 + 1
    
    top_indices = np.argsort(predictions)[::-1][:N]
    
    recommendations = []
    for idx in top_indices:
        movie_idx = unrated_movie_indices[idx]
        movie_id = idx2movie[movie_idx]
        movie_title = movies[movies['movie_id'] == movie_id]['title'].values
        predicted_rating = predictions[idx]
        
        if len(movie_title) > 0:
            recommendations.append((movie_id, movie_title[0], predicted_rating))
    
    return recommendations

In [70]:
def get_user_embedding(user_id, model=ncf_model):
    if user_id not in user2idx:
        return None
    
    user_idx = user2idx[user_id]
    
    embedding_layer = model.get_layer('user_embedding_gmf')
    embeddings = embedding_layer.get_weights()[0]
    
    return embeddings[user_idx]


In [71]:
def get_movie_embedding(movie_id, model=ncf_model):
    if movie_id not in movie2idx:
        return None
    
    movie_idx = movie2idx[movie_id]
    
    embedding_layer = model.get_layer('movie_embedding_gmf')
    embeddings = embedding_layer.get_weights()[0]
    
    return embeddings[movie_idx]

In [72]:
def find_similar_movies_by_embedding(movie_id, N=10):
    movie_embedding = get_movie_embedding(movie_id)
    
    if movie_embedding is None:
        return []
    
    embedding_layer = ncf_model.get_layer('movie_embedding_gmf')
    all_embeddings = embedding_layer.get_weights()[0]
    
    similarities = np.dot(all_embeddings, movie_embedding) / (
        np.linalg.norm(all_embeddings, axis=1) * np.linalg.norm(movie_embedding)
    )
    
    movie_idx = movie2idx[movie_id]
    similarities[movie_idx] = -1  
    
    top_indices = np.argsort(similarities)[::-1][:N]
    
    similar_movies = []
    for idx in top_indices:
        similar_movie_id = idx2movie[idx]
        movie_title = movies[movies['movie_id'] == similar_movie_id]['title'].values
        if len(movie_title) > 0:
            similar_movies.append((similar_movie_id, movie_title[0], similarities[idx]))
    
    return similar_movies

In [None]:
test_user = 196

print(f"\nNeural Recommendations for User {test_user}:")
neural_recs = recommend_movies(test_user, N=10)

for idx, (movie_id, title, pred_rating) in enumerate(neural_recs, 1):
    print(f"{idx}. {title} (Predicted: {pred_rating:.2f})")



Neural Recommendations for User 196:
1. First Wives Club, The (1996) (Predicted: 4.65)
2. Shawshank Redemption, The (1994) (Predicted: 4.64)
3. Schindler's List (1993) (Predicted: 4.63)
4. Titanic (1997) (Predicted: 4.61)
5. Scream (1996) (Predicted: 4.61)
6. Amadeus (1984) (Predicted: 4.57)
7. Phenomenon (1996) (Predicted: 4.54)
8. Chasing Amy (1997) (Predicted: 4.53)
9. Happy Gilmore (1996) (Predicted: 4.51)
10. Contact (1997) (Predicted: 4.51)


In [75]:
toy_story_id = 1  
print(f"\nMovies similar to 'Toy Story' (ID: {toy_story_id}):")

similar = find_similar_movies_by_embedding(toy_story_id, N=5)
for idx, (movie_id, title, similarity) in enumerate(similar, 1):
    print(f"{idx}. {title} (Similarity: {similarity:.3f})")



Movies similar to 'Toy Story' (ID: 1):
1. Lord of Illusions (1995) (Similarity: 0.543)
2. Fly Away Home (1996) (Similarity: 0.521)
3. Guantanamera (1994) (Similarity: 0.494)
4. Color of Night (1994) (Similarity: 0.449)
5. Candyman: Farewell to the Flesh (1995) (Similarity: 0.428)


In [77]:
predictions_test = ncf_model.predict([X_test_user, X_test_movie], verbose=0)
predictions_test = predictions_test.flatten() * 4 + 1  # Denormalize
actual_test = y_test * 4 + 1

rmse = np.sqrt(np.mean((predictions_test - actual_test) ** 2))
mae = np.mean(np.abs(predictions_test - actual_test))

print(f"\nNeural Collaborative Filtering:")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")




Neural Collaborative Filtering:
RMSE: 0.9840
MAE: 0.7659


In [78]:
def precision_at_k_neural(user_id, recommended_movies, test_data, k=10):
    relevant_movies = test_data[(test_data['user_id'] == user_id) & 
                                (test_data['rating'] >= 4)]['item_id'].tolist()
    
    if len(relevant_movies) == 0:
        return 0
    
    top_k = recommended_movies[:k]
    hits = len(set(top_k).intersection(set(relevant_movies)))
    
    return hits / k

In [79]:
def recall_at_k_neural(user_id, recommended_movies, test_data, k=10):
    relevant_movies = test_data[(test_data['user_id'] == user_id) & 
                                (test_data['rating'] >= 4)]['item_id'].tolist()
    
    if len(relevant_movies) == 0:
        return 0
    
    top_k = recommended_movies[:k]
    hits = len(set(top_k).intersection(set(relevant_movies)))
    
    return hits / len(relevant_movies)

In [80]:
def ndcg_at_k_neural(user_id, recommended_movies, test_data, k=10):
    user_test_ratings = test_data[test_data['user_id'] == user_id]
    
    if len(user_test_ratings) == 0:
        return 0
    
    top_k = recommended_movies[:k]
    
    dcg = 0
    for idx, movie_id in enumerate(top_k):
        rating = user_test_ratings[user_test_ratings['item_id'] == movie_id]['rating'].values
        if len(rating) > 0:
            rel = rating[0]
            dcg += (2**rel - 1) / np.log2(idx + 2)
    
    ideal_ratings = sorted(user_test_ratings['rating'].values, reverse=True)[:k]
    idcg = 0
    for idx, rating in enumerate(ideal_ratings):
        idcg += (2**rating - 1) / np.log2(idx + 2)
    
    if idcg == 0:
        return 0
    
    return dcg / idcg

In [None]:
def evaluate_neural_model(k=10, num_users=50):
    precision_scores = []
    recall_scores = []
    ndcg_scores = []
    
    test_users_with_ratings = []
    for user_id in test_data['user_id'].unique():
        if user_id in user2idx:
            user_train = train_data[train_data['user_id'] == user_id]
            user_test = test_data[(test_data['user_id'] == user_id) & 
                                 (test_data['rating'] >= 4)]
            
            if len(user_train) > 0 and len(user_test) > 0:
                test_users_with_ratings.append(user_id)
    
    test_users_sample = test_users_with_ratings[:num_users]
    
    print(f"Evaluating on {len(test_users_sample)} users...")
    
    evaluated_count = 0
    for user_id in test_users_sample:
        recs = recommend_movies(user_id, N=k)
        
        if len(recs) > 0:
            movie_ids = [movie_id for movie_id, title, rating in recs]
            
            precision = precision_at_k_neural(user_id, movie_ids, test_data, k)
            recall = recall_at_k_neural(user_id, movie_ids, test_data, k)
            ndcg = ndcg_at_k_neural(user_id, movie_ids, test_data, k)
            
            precision_scores.append(precision)
            recall_scores.append(recall)
            ndcg_scores.append(ndcg)
            evaluated_count += 1
    
    if evaluated_count == 0:
        return {'precision': 0, 'recall': 0, 'ndcg': 0}
    
    print(f"\nNEURAL (NCF) Results:")
    print(f"Precision@{k}: {np.mean(precision_scores):.4f}")
    print(f"Recall@{k}: {np.mean(recall_scores):.4f}")
    print(f"NDCG@{k}: {np.mean(ndcg_scores):.4f}")
    
    return {
        'precision': np.mean(precision_scores),
        'recall': np.mean(recall_scores),
        'ndcg': np.mean(ndcg_scores)
    }

neural_results = evaluate_neural_model(k=10, num_users=50)


Evaluating on 50 users...

NEURAL (NCF) Results:
Precision@10: 0.1800
Recall@10: 0.0751
NDCG@10: 0.1820
