# 🎬 Recommender System Demo using Matrix Factorization (SVD)

This notebook demonstrates a simple **movie recommender system** built with the **Surprise** library. It uses matrix factorization (SVD) to predict user–movie ratings and recommend top movies.

In [None]:
# =======================================
# STEP 2: Import libs & read csv file
# =======================================

import pandas as pd
from surprise import SVD, Dataset, Reader, accuracy
from surprise.model_selection import train_test_split
import numpy as np
from collections import defaultdict

# Load datasets (change paths as needed)
ratings = pd.read_csv("./ratings.csv")
movies = pd.read_csv("./movies.csv")

# Merge on 'movieId'
df = pd.merge(ratings, movies[['movieId', 'title']], on='movieId', how='left')

# Load dataset
df.head()

In [None]:
# =======================================
# STEP 3: Prepare data for Surprise
# =======================================

# Prepare data for Surprise
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)

# Split data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [None]:
# =============================================
# STEP 4: Train matrix factorization model (SVD)
# ==============================================

model = SVD()
model.fit(trainset)


In [None]:
# ===================================
# STEP 5: Evaluate on test set
# ===================================

predictions = model.test(testset)
rmse = accuracy.rmse(predictions)



# Convert test predictions into dictionary format for evaluation
user_actual = defaultdict(list)
user_pred = defaultdict(list)

# Build lists of actual and predicted ratings per user
for uid, iid, true_r, est, _ in predictions:
    user_actual[uid].append((iid, true_r))
    user_pred[uid].append((iid, est))

# Parameters
K = 5  # top-K cutoff
threshold = 4.0  # rating threshold to consider "liked"

# Initialize metrics
precision_list, recall_list = [], []


# Compute metrics per user
for uid in user_actual.keys():
    actual_items = [iid for iid, rating in user_actual[uid] if rating >= threshold]
    pred_sorted = sorted(user_pred[uid], key=lambda x: x[1], reverse=True)
    pred_items = [iid for iid, _ in pred_sorted]

    # Top-K predictions
    top_k_pred = pred_items[:K]
    true_positives = len(set(top_k_pred) & set(actual_items))
    precision = true_positives / K if K else 0
    recall = true_positives / len(actual_items) if actual_items else 0


    # Append metrics
    precision_list.append(precision)
    recall_list.append(recall)


# Compute averages
results = {
    "Precision@5": np.mean(precision_list),
    "Recall@5": np.mean(recall_list)
}

for metric, value in results.items():
    print(f"{metric}: {value:.4f}")

In [None]:
# =====================================================
# STEP 6: Generate top-N recommendations for a few users
# ======================================================

from collections import defaultdict

def get_top_n(predictions, n=5):
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

top_n = get_top_n(predictions, n=5)


# Display sample recommendations
for uid, user_ratings in list(top_n.items())[:3]:
    print(f'\nTop recommendations for user {uid}:')
    for (iid, rating) in user_ratings:
        title = df.loc[df['movieId'] == iid, 'title'].values
        title = title[0] if len(title) > 0 else 'Unknown'
        print(f'  {title} (predicted rating: {rating:.2f})')

In [None]:
# ==============================================================
# STEP 7: Predict ratings for unseen movies for a specific user

# What This Code Does
    # Finds which movies a user hasn’t rated.
    # Uses the SVD model’s predict() method to estimate ratings for each unseen movie.
    # Sorts them by predicted rating.
    # Prints the top 5 highest predicted movies.
# ==============================================================

def recommend_for_user(user_id, df, model, n=5):
    """
    Recommend top-N movies for a given user based on predicted ratings.
    """
    # All movies
    all_movie_ids = df['movieId'].unique()

    # Movies already rated by the user
    rated_movie_ids = df[df['userId'] == user_id]['movieId'].unique()

    # Movies not yet rated
    unseen_movie_ids = [mid for mid in all_movie_ids if mid not in rated_movie_ids]

    # Predict ratings for unseen movies
    predictions = []
    for mid in unseen_movie_ids:
        pred = model.predict(user_id, mid)
        predictions.append((mid, pred.est))

    # Sort by estimated rating, descending
    predictions.sort(key=lambda x: x[1], reverse=True)

    # Get top-N
    top_n = predictions[:n]

    # Display results
    print(f"\n🎬 Top {n} recommended movies for user {user_id}:")
    for mid, rating in top_n:
        title = df.loc[df['movieId'] == mid, 'title'].values
        title = title[0] if len(title) > 0 else "Unknown"
        print(f"  {title} (predicted rating: {rating:.2f})")




# Example: Get recommendations for user ID 196
recommend_for_user(user_id=196, df=df, model=model, n=5)
