# Exploring and Evaluating Recommender Systems on MovieLens

This notebook explores multiple recommender system approaches with increasing modeling complexity. Classical methods, including Content-Based Filtering and User-Based Collaborative Filtering, are presented as baseline techniques. The methods Item-Based Collaborative Filtering and the neural autoencoder—are evaluated more rigorously using standard Top-N recommendation metrics.

# ETL

In [1]:
import sys
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from collections import defaultdict
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from collections import defaultdict

In [2]:
# Add the root folder to sys.path
sys.path.append(os.path.abspath(".."))
from app.etl import run_etl

#Paths to the dataset files
file_paths = {
    'ratings': '../data/ml_100k/u.data',
    'movies': '../data/ml_100k/u.item'
}

# Run the ETL pipeline
#preprocessed_data = run_etl(file_paths, save_path='../app/preprocessed_movielens.csv')

# Check the original data
#print(preprocessed_data.head())

In [3]:
# Load the smaller and pre processed MovieLens dataset
ratings = pd.read_csv('../data/ml_100k/u.data', sep='\t',
                      names=['user_id', 'movie_id', 'rating', 'timestamp'])

movies = pd.read_csv('../data/ml_100k/u.item', sep='|', encoding='latin-1', header=None,
                     names=['movie_id', 'title', 'release_date', 'video_release_date', 'IMDb_URL',
                            'unknown', 'Action', 'Adventure', 'Animation', 'Children', 'Comedy',
                            'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
                            'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'])

# Encode genres
genre_cols = ['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary',
              'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance',
              'Sci-Fi', 'Thriller', 'War', 'Western']

genres_encoded = movies[genre_cols].copy()

# Compute similarity matrix
similarity_matrix = cosine_similarity(genres_encoded)
similarity_df = pd.DataFrame(similarity_matrix, index=movies['movie_id'], columns=movies['movie_id'])

#### Train / Test Split (Per User)

In [4]:
def train_test_split_by_user(ratings, test_ratio=0.2, min_ratings=5):
    train, test = [], []

    for user_id, group in ratings.groupby("user_id"):
        if len(group) < min_ratings:
            continue

        group = group.sample(frac=1, random_state=42)
        split_idx = int(len(group) * (1 - test_ratio))

        train.append(group.iloc[:split_idx])
        test.append(group.iloc[split_idx:])

    return pd.concat(train), pd.concat(test)

ratings_df = ratings.copy()
train_df, test_df = train_test_split_by_user(ratings_df)

# Build user-item matrix
#user_item_matrix = ratings.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)

user_item_matrix = train_df.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)

#### Ground Truth (What Each User Actually Liked)

In [5]:
def build_ground_truth(test_df, threshold=4.0):
    ground_truth = defaultdict(set)

    for row in test_df.itertuples():
        if row.rating >= threshold:
            ground_truth[row.user_id].add(row.movie_id)

    return ground_truth

ground_truth = build_ground_truth(test_df)

# 0. Content-Based Filtering

In [6]:
# Wrapper for Content-Based Filtering (per user)
def content_based_recommend(user_id, cb_matrix, similarity_df, top_n=10):
    """
    Returns top_n recommended movie IDs for a user based on content similarity
    Only works if the user exists in user_item_matrix (train_df)
    """
    if user_id not in cb_matrix.index:
        return []  # user not in training set

    user_ratings = cb_matrix.loc[user_id]
    scores = defaultdict(float)
    
    for movie_id, rating in user_ratings.items():
        if rating > 0:
            sim_scores = similarity_df[movie_id]
            for other_movie_id, sim in sim_scores.items():
                if other_movie_id not in user_ratings.index or user_ratings[other_movie_id] == 0:
                    scores[other_movie_id] += sim * rating

    recommended_movies = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    top_movies = [movie_id for movie_id, _ in recommended_movies[:top_n]]
    return top_movies

# Example usage
user_id = 1
top_n = 10
cb_matrix = user_item_matrix.copy()

cbf_recommendations = content_based_recommend(user_id, cb_matrix, similarity_df, top_n=top_n)
print(f"Top-{top_n} Content-Based Recommendations for User {user_id}: {cbf_recommendations}")

Top-10 Content-Based Recommendations for User 1: [337, 316, 345, 347, 382, 409, 481, 522, 523, 598]


# 1. User-Based Collaborative Filtering

In [7]:
# Compute User Similarity
# Cosine similarity between users
ub_matrix = user_item_matrix.copy()
user_similarity = cosine_similarity(ub_matrix)

# DataFrame for easy lookup
user_similarity_df = pd.DataFrame(user_similarity, index=ub_matrix.index, columns=ub_matrix.index)

# Wrapper for User-Based Filtering (per user)
def user_cf_recommend_wrapper(user_id, ub_matrix, user_similarity_df, top_n=10):
    """
    Returns top_n recommended movie IDs for a given user
    using User-Based Collaborative Filtering.
    """
    # Similarity scores for target user
    similar_users = user_similarity_df.loc[user_id]

    # Compute weighted ratings
    weighted_ratings = np.dot(similar_users, ub_matrix) / similar_users.sum()

    # Build recommendations DataFrame
    recommendations = pd.DataFrame({
        'movie_id': ub_matrix.columns,
        'score': weighted_ratings
    }).sort_values(by='score', ascending=False)

    # Exclude already rated movies
    user_rated_movies = ub_matrix.loc[user_id][ub_matrix.loc[user_id] > 0].index
    recommendations = recommendations[~recommendations['movie_id'].isin(user_rated_movies)]

    # Return top_n movie IDs (evaluation-ready)
    return recommendations['movie_id'].head(top_n).tolist()

# Show movie titles
def user_cf_recommend_demo(user_id, ub_matrix, user_similarity_df, movies, top_n=10):
    """
    Returns top_n recommended movie titles for demonstration.
    """
    movie_ids = user_cf_recommend_wrapper(user_id, ub_matrix, user_similarity_df, top_n)
    return movies[movies['movie_id'].isin(movie_ids)][['movie_id','title']]

# Example Usage
user_id = 1
user_recommendations_demo = user_cf_recommend_demo(user_id, ub_matrix, user_similarity_df, movies)
print("User-Based Recommendations (Titles):")
print(user_recommendations_demo)

User-Based Recommendations (Titles):
     movie_id                              title
0           1                   Toy Story (1995)
6           7              Twelve Monkeys (1995)
63         64   Shawshank Redemption, The (1994)
88         89                Blade Runner (1982)
95         96  Terminator 2: Judgment Day (1991)
120       121      Independence Day (ID4) (1996)
126       127              Godfather, The (1972)
203       204          Back to the Future (1985)
317       318            Schindler's List (1993)
422       423  E.T. the Extra-Terrestrial (1982)


# 2. Item-Based Collaborative Filtering

In [8]:
# Transpose user-item matrix to item-user matrix
item_cf_matrix = user_item_matrix.copy()
item_user_matrix = item_cf_matrix.T

# Compute cosine similarity between items
item_similarity = cosine_similarity(item_user_matrix)

# Wrap it in a DataFrame for easy access
item_similarity_df = pd.DataFrame(
    item_similarity,
    index=item_user_matrix.index,
    columns=item_user_matrix.index
)

# print(item_similarity_df.head())

# Wrapper for Item-Based Filtering (per user)
def item_cf_recommend(user_id, item_cf_matrix, item_similarity_df, top_n=10):
    """
    Returns top_n recommended movie IDs for a user using Item-Based CF.
    Excludes movies already rated by the user.
    """
    # User's ratings
    user_ratings = item_cf_matrix.loc[user_id]

    # Compute item-based scores
    scores = np.dot(user_ratings, item_similarity_df) / np.array([np.abs(item_similarity_df).sum(axis=1)])
    scores = scores.flatten()

    # Create DataFrame with movie scores
    recommendations = pd.DataFrame({
        'movie_id': item_cf_matrix.columns,
        'score': scores
    }).sort_values(by='score', ascending=False)

    # Exclude already rated movies
    user_rated_movies = user_ratings[user_ratings > 0].index
    recommendations = recommendations[~recommendations['movie_id'].isin(user_rated_movies)]

    # Return top-N movie IDs only (for evaluation)
    return recommendations['movie_id'].head(top_n).tolist()

# Example usage
user_id = 1
top_n = 10
item_recommendations = item_cf_recommend(user_id, item_cf_matrix, item_similarity_df, top_n=top_n)

print(f"Top-{top_n} Item-Based CF Recommendations for User {user_id}:")
print(item_recommendations)

Top-10 Item-Based CF Recommendations for User 1:
[1619, 1526, 1618, 1614, 711, 1682, 1476, 963, 695, 1354]


# 4. Deep Learning-Based Recommendation: Autoencoders for Collaborative Filtering

In [9]:
from tensorflow import keras
from keras import layers, regularizers
import tensorflow as tf
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.20.0


  if not hasattr(np, "object"):


In [10]:
def masked_mse(y_true, y_pred):
    mask = tf.cast(tf.not_equal(y_true, 0), tf.float32)
    squared_error = tf.square(y_true - y_pred)
    masked_error = squared_error * mask
    return tf.reduce_sum(masked_error) / tf.reduce_sum(mask)

def bpr_loss(y_true, y_pred):
    pos = tf.boolean_mask(y_pred, y_true > 0)
    neg = tf.boolean_mask(y_pred, y_true == 0)
    neg = tf.random.shuffle(neg)[:tf.shape(pos)[0]]
    return -tf.reduce_mean(tf.math.log(tf.nn.sigmoid(pos - neg)))

In [11]:
# Normalize ratings to 0-1 for autoencoder
autoencoder_matrix = user_item_matrix.copy()
autoencoder_matrix = autoencoder_matrix / autoencoder_matrix.max().max()
train_matrix = autoencoder_matrix.copy()
test_matrix = test_df.pivot(index='user_id', columns='movie_id', values='rating') \
                   .reindex(index=train_matrix.index, columns=train_matrix.columns, fill_value=0)
test_matrix = test_matrix / user_item_matrix.max().max()

noise_factor = 0.3
noisy_train = train_matrix + noise_factor * np.random.randn(*train_matrix.shape)
noisy_train = np.clip(noisy_train, 0, 1)

# Define deep autoencoder
n_movies = train_matrix.shape[1]

autoencoder = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(n_movies,)),
    
    tf.keras.layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-5)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-5)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(128, activation='relu'),

    tf.keras.layers.Dense(64, activation='relu'),  # bottleneck

    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(n_movies, activation='sigmoid')  # output between 0-1
])

autoencoder.compile(optimizer='adam', loss=bpr_loss)

# Train
history = autoencoder.fit(
    noisy_train.values, train_matrix.values,
    epochs=100,      
    batch_size=128, 
    validation_data=(test_matrix.values, test_matrix.values),
    verbose=1
)

# Predict
reconstructed_matrix = autoencoder.predict(train_matrix.values)
autoencoder_predicted_ratings = pd.DataFrame(reconstructed_matrix, index=train_matrix.index, columns=train_matrix.columns)

# Recommendation wrapper
def autoencoder_recommend_wrapper(user_id, autoencoder_predicted_ratings, train_matrix, top_n=10):
    user_autoencoder_predicted_ratings = autoencoder_predicted_ratings.loc[user_id]
    user_rated_movies = train_matrix.loc[user_id][train_matrix.loc[user_id] > 0].index
    recommendations = user_autoencoder_predicted_ratings[~user_autoencoder_predicted_ratings.index.isin(user_rated_movies)]
    return recommendations.sort_values(ascending=False).head(top_n).index.tolist()

# Example Usage
top_movies = autoencoder_recommend_wrapper(1, autoencoder_predicted_ratings, train_matrix, top_n=10)
print("Autoencoder Top-10 Recommendations (movie IDs):")
print(top_movies)

Epoch 1/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 47ms/step - loss: 0.6796 - val_loss: 0.6963
Epoch 2/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 0.6884 - val_loss: 0.6685
Epoch 3/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 0.6988 - val_loss: 0.6438
Epoch 4/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 0.6983 - val_loss: 0.6311
Epoch 5/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 0.6982 - val_loss: 0.6256
Epoch 6/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 0.6977 - val_loss: 0.6226
Epoch 7/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - loss: 0.6969 - val_loss: 0.6209
Epoch 8/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 0.6964 - val_loss: 0.6199
Epoch 9/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

# Evaluation Results

#### Top-N Metrics

In [12]:
def precision_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    if not recommended:
        return 0.0
    return len(set(recommended) & relevant) / k

def recall_at_k(recommended, relevant, k):
    recommended = recommended[:k]
    if not relevant:
        return 0.0
    return len(set(recommended) & relevant) / len(relevant)

def average_precision_at_k(recommended, relevant, k):
    score = 0.0
    hits = 0

    for i, item in enumerate(recommended[:k]):
        if item in relevant:
            hits += 1
            score += hits / (i + 1)

    return score / min(len(relevant), k) if relevant else 0.0

def ndcg_at_k(recommended, relevant, k):
    dcg = 0.0
    for i, item in enumerate(recommended[:k]):
        if item in relevant:
            dcg += 1 / np.log2(i + 2)

    ideal_dcg = sum(1 / np.log2(i + 2) for i in range(min(len(relevant), k)))
    return dcg / ideal_dcg if ideal_dcg > 0 else 0.0

def hit_rate_at_k(recommended, relevant, k):
    return int(len(set(recommended[:k]) & relevant) > 0)

#### Evaluation Loop

In [13]:
def evaluate_model(recommended_items, ground_truth, k=10):
    precisions, recalls, maps, ndcgs, hits = [], [], [], [], []

    for user_id, relevant_items in ground_truth.items():

        if user_id not in recommended_items:
            continue

        user_recs = recommended_items[user_id]

        precisions.append(precision_at_k(user_recs, relevant_items, k))
        recalls.append(recall_at_k(user_recs, relevant_items, k))
        maps.append(average_precision_at_k(user_recs, relevant_items, k))
        ndcgs.append(ndcg_at_k(user_recs, relevant_items, k))
        hits.append(hit_rate_at_k(user_recs, relevant_items, k))


    return {
        "Precision@K": np.mean(precisions),
        "Recall@K": np.mean(recalls),
        "MAP@K": np.mean(maps),
        "NDCG@K": np.mean(ndcgs),
        "HitRate@K": np.mean(hits),
    }

#### Run Comparison Across Models

In [14]:
eval_users = list(ground_truth.keys())

item_cf_recommendations = {}

for user_id in eval_users:
    item_cf_recommendations[user_id] = item_cf_recommend(
        user_id,
        item_cf_matrix,
        item_similarity_df,
        top_n=10
    )

autoencoder_recommendations = {}

for user_id in eval_users:
    autoencoder_recommendations[user_id] = autoencoder_recommend_wrapper(
        user_id,
        autoencoder_predicted_ratings,
        autoencoder_matrix,
        top_n=10
    )

In [15]:
k = 10

item_cf_metrics = evaluate_model(item_cf_recommendations, ground_truth, k)
autoencoder_metrics = evaluate_model(autoencoder_recommendations, ground_truth, k)

In [16]:
results_df = pd.DataFrame.from_dict(
    {
        "Item-Based CF": item_cf_metrics,
        "Autoencoder": autoencoder_metrics,
    },
    orient='index'
)

results_df

Unnamed: 0,Precision@K,Recall@K,MAP@K,NDCG@K,HitRate@K
Item-Based CF,0.004064,0.003527,0.00068,0.003232,0.039572
Autoencoder,0.030695,0.040101,0.010508,0.032758,0.252406
