<a href="https://colab.research.google.com/github/Baibhavi-rgh/CSE-Project/blob/main/SVD_Movie_Recommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from surprise import Dataset, Reader
from surprise import SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# --- Project Configuration ---
# The Surprise library allows loading built-in datasets for quick testing.
# We will use the 'ml-100k' dataset (100,000 ratings from 943 users on 1682 movies).
DATASET_NAME = 'ml-100k'

# --- Helper Function for Top-N Recommendations ---

def get_top_n(predictions, n=10):
    """
    Returns the top N recommendations for each user from a set of predictions.

    :param predictions: The list of predictions (user, item, true_rating, predicted_rating, metadata).
    :param n: The number of recommendations to return.
    :return: A dictionary mapping user ID to a list of (item ID, predicted rating) tuples.
    """
    # 1. Map the predictions to each user
    top_n = {}
    for uid, iid, true_r, est, _ in predictions:
        # Check if the user ID is in the dictionary, if not, initialize it.
        if uid not in top_n:
            top_n[uid] = []

        # Store the item ID and the estimated rating
        top_n[uid].append((iid, est))

    # 2. Sort the predictions for each user and retrieve the k highest ones
    for uid, user_ratings in top_n.items():
        # Sort by predicted rating (est) in descending order
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        # Keep only the top N
        top_n[uid] = user_ratings[:n]

    return top_n

# --- Main Recommender System Logic ---

def run_recommender_system():
    """
    Executes the movie recommendation system using the SVD algorithm.
    """
    print("--- Movie Recommendation System using SVD (Collaborative Filtering) ---")

    # 1. Load the Dataset
    # Load the MovieLens 100k dataset built into the Surprise library.
    # This automatically downloads the data if not present.
    try:
        data = Dataset.load_builtin(DATASET_NAME)
        print(f"Dataset '{DATASET_NAME}' loaded successfully.")
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return

    # 2. Split Data for Training and Testing
    # Split the data into 80% training set and 20% test set for evaluation.
    trainset, testset = train_test_split(data, test_size=0.2, random_state=42)
    print(f"Data split: {len(trainset)} ratings for training, {len(testset)} ratings for testing.")

    # 3. Initialize and Train the Model (SVD)
    # SVD is a Matrix Factorization technique (Model-Based Collaborative Filtering).
    # We use default parameters, but these can be tuned (e.g., n_factors, n_epochs).
    algo = SVD(random_state=42)
    print("\nTraining SVD model...")
    algo.fit(trainset)
    print("Training complete.")

    # 4. Model Evaluation
    # Make predictions on the test set.
    predictions = algo.test(testset)

    # Compute Root Mean Squared Error (RMSE) to assess prediction accuracy.
    # RMSE measures how far off the predicted ratings are from the true ratings.
    rmse = accuracy.rmse(predictions, verbose=True)
    print(f"Model Accuracy (RMSE): {rmse:.4f}")

    # 5. Generate Top-N Recommendations for a Sample User

    # To get human-readable movie titles, we need the raw data.
    # The 'ml-100k' dataset requires manually loading the 'u.item' file for titles.
    # We'll use a simplified mapping for demonstration.
    # In a real project, you would load the full movie metadata.

    # Example: Look up movie titles (simplified for built-in dataset)
    # For ml-100k, the movie ID (iid) is an internal ID. We need a mapping.
    # Since the built-in loader doesn't give us the item file easily,
    # we'll just show the recommended Internal IDs for this runnable demo.

    # Find the User-Item rating mapping from the full dataset
    full_trainset = data.build_full_trainset()

    # Let's pick a sample user ID (e.g., raw user ID '1')
    sample_user_id = '1'

    # 5a. Identify items the user has NOT rated
    all_iids = full_trainset.all_items() # All internal item IDs
    rated_iids = {iid for (iid, _) in full_trainset.ur[full_trainset.to_inner_uid(sample_user_id)]}
    unrated_iids = [iid for iid in all_iids if iid not in rated_iids]

    # 5b. Predict ratings for unrated items
    unrated_predictions = []
    for iid in unrated_iids:
        # We need the 'raw' item ID, which is the actual movie ID.
        raw_iid = full_trainset.to_raw_iid(iid)
        # Predict the rating: user_id, movie_id, verbose=False
        pred = algo.predict(sample_user_id, raw_iid)
        unrated_predictions.append(pred)

    # 5c. Get the top 10 recommendations
    top_recommendations = get_top_n(unrated_predictions, n=10)

    print(f"\n--- Top 10 Movie Recommendations for User ID {sample_user_id} ---")

    # top_recommendations is a dictionary, but we only generated for one user.
    if sample_user_id in top_recommendations:
        rank = 1
        for iid, est_rating in top_recommendations[sample_user_id]:
            print(f"Rank {rank}: Movie ID {iid} (Predicted Rating: {est_rating:.3f})")
            rank += 1
    else:
        print("Could not generate recommendations for the sample user.")

if __name__ == "__main__":
    # Ensure you have the 'scikit-surprise' and 'pandas' libraries installed:
    # pip install scikit-surprise pandas
    run_recommender_system()