In [2]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2505186 sha256=247d1f98d973f5d59cfe26ff77e165e6d5a1ee644030282f31cc612f78bd204f
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Succe

In [4]:
# Import Required Libraries
import numpy as np
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse, mae

# Update Dataset Paths
ratings_path = "/ratings.csv"
movies_path = "/movies.csv"

# Load the MovieLens 20M Dataset (CSV Format)
ratings = pd.read_csv(ratings_path, usecols=["userId", "movieId", "rating"])
movies = pd.read_csv(movies_path, usecols=["movieId", "title"])
movies_dict = dict(zip(movies["movieId"], movies["title"]))

# Prepare Data for Surprise
reader = Reader(line_format="user item rating", sep=",", rating_scale=(0.5, 5.0))
dataset = Dataset.load_from_df(ratings[["userId", "movieId", "rating"]], reader)

# Count ratings per movie
movie_counts = ratings["movieId"].value_counts()

# Keep only movies with at least 10 ratings
popular_movies = movie_counts[movie_counts >= 10].index
filtered_ratings = ratings[ratings["movieId"].isin(popular_movies)]

# Split Data into Training and Testing Sets
trainset, testset = train_test_split(dataset, test_size=0.1)

# Train the SVD Model
algo = SVD(n_factors=150, lr_all=0.007, reg_all=0.02)
algo.fit(trainset)

# Evaluate Model Performance
predictions = algo.test(testset)
print("\nEvaluation Metrics:")
print("RMSE:", rmse(predictions))
print("MAE:", mae(predictions))

# Generate Top-N Recommendations for Each User
def get_top_n_recommendations(predictions, n=5):
    top_n = {}
    for uid, iid, _, est, _ in predictions:
        if uid not in top_n:
            top_n[uid] = []
        top_n[uid].append((iid, est))

    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]  # Keep Top-N movies

    return top_n

top_recommendations = get_top_n_recommendations(predictions)

# Convert Movie IDs to Titles and Display Sample Recommendations
def display_recommendations(user_id, top_recommendations):
    if user_id not in top_recommendations:
        print(f"No recommendations found for User {user_id}")
        return

    print(f"\n🎬 Top 5 Recommendations for User {user_id}:")
    for movie_id, score in top_recommendations[user_id]:
        print(f"{movies_dict.get(movie_id, 'Unknown Movie')} (Predicted Rating: {score:.2f})")

# Show recommendations for 5 random users
import random
random_users = random.sample(list(top_recommendations.keys()), 5)
for user in random_users:
    display_recommendations(user, top_recommendations)



Evaluation Metrics:
RMSE: 1.8157
RMSE: 1.8157252621115785
MAE:  1.4770
MAE: 1.4769743155919215

🎬 Top 5 Recommendations for User 6538:
Cocoon (1985) (Predicted Rating: 5.00)
Navigator: A Mediaeval Odyssey, The (1988) (Predicted Rating: 5.00)
Thin Red Line, The (1998) (Predicted Rating: 5.00)
Small Time Crooks (2000) (Predicted Rating: 5.00)
Seven Days in May (1964) (Predicted Rating: 5.00)

🎬 Top 5 Recommendations for User 6648:
Jurassic Park (1993) (Predicted Rating: 5.00)

🎬 Top 5 Recommendations for User 1519:
Original Gangstas (1996) (Predicted Rating: 5.00)
Craft, The (1996) (Predicted Rating: 5.00)
Diabolique (1996) (Predicted Rating: 5.00)
Don't Be a Menace to South Central While Drinking Your Juice in the Hood (1996) (Predicted Rating: 5.00)
City of Lost Children, The (Cité des enfants perdus, La) (1995) (Predicted Rating: 5.00)

🎬 Top 5 Recommendations for User 7498:
Sin City (2005) (Predicted Rating: 5.00)
Heavenly Creatures (1994) (Predicted Rating: 5.00)
Mystery Men (1999)