In [2]:
!pip install scikit-surprise



In [1]:
# Downgrade numpy to a compatible version
!pip install numpy==1.26
#restart the runtime after running it



In [3]:
from google.colab import files
uploaded = files.upload()

Saving movies.csv to movies.csv
Saving ratings.csv to ratings.csv


In [4]:
# Fix NumPy version compatibility error

import numpy as np
print("NumPy version:", np.__version__)

# ================================
# Recommendation System - MovieLens Latest Dataset
# Internship Task 4
# ================================

# Step 1: Install and Import Libraries
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Step 2: Load Data
# Upload the dataset files (ratings.csv, movies.csv) to Colab using the file upload tool
import os

  # Upload ratings.csv and movies.csv

ratings_file = 'ratings.csv'
movies_file = 'movies.csv'

# Load ratings
ratings = pd.read_csv(ratings_file)
print("Ratings Sample:")
display(ratings.head())

# Load movies
movies = pd.read_csv(movies_file)
print("Movies Sample:")
display(movies.head())

# Step 3: Prepare Data for Surprise
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Step 4: Train-Test Split
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Step 5: Build and Train SVD Model (Matrix Factorization)
model = SVD()
model.fit(trainset)

# Step 6: Model Evaluation (RMSE, MAE)
predictions = model.test(testset)
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

# Step 7: Generate Top 5 Recommendations for a User (with Movie Titles)
user_id = ratings['userId'].iloc[0]  # Example: first user in dataset
all_movie_ids = movies['movieId'].unique()
rated_movie_ids = ratings[ratings['userId'] == user_id]['movieId'].tolist()
unrated_movie_ids = [mid for mid in all_movie_ids if mid not in rated_movie_ids]

recommendations = []
for mid in unrated_movie_ids[:1000]:  # limit for speed, use all for real run
    pred = model.predict(str(user_id), mid)
    recommendations.append((mid, pred.est))
recommendations.sort(key=lambda x: x[1], reverse=True)
top_5 = recommendations[:5]

print(f"\nTop 5 recommendations for user {user_id}:")
for mid, score in top_5:
    title = movies[movies['movieId'] == mid]['title'].values[0]
    print(f"Movie: {title}, Predicted Rating: {score:.2f}")

# Step 8: Analysis
print(f"""
Analysis:
- The SVD-based recommendation system predicts ratings for unseen movies for a user.
- RMSE: {rmse:.3f}, MAE: {mae:.3f} (lower is better)
- Recommendations are shown with movie titles.
- You can try other algorithms (KNN, NMF) or tune SVD hyperparameters.
""")

NumPy version: 1.26.0
Ratings Sample:


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


Movies Sample:


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


RMSE: 0.8796
MAE:  0.6754

Top 5 recommendations for user 1:
Movie: Lawrence of Arabia (1962), Predicted Rating: 4.38
Movie: Shawshank Redemption, The (1994), Predicted Rating: 4.36
Movie: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964), Predicted Rating: 4.34
Movie: Rear Window (1954), Predicted Rating: 4.32
Movie: Cinema Paradiso (Nuovo cinema Paradiso) (1989), Predicted Rating: 4.31

Analysis:
- The SVD-based recommendation system predicts ratings for unseen movies for a user.
- RMSE: 0.880, MAE: 0.675 (lower is better)
- Recommendations are shown with movie titles.
- You can try other algorithms (KNN, NMF) or tune SVD hyperparameters.

