In [1]:
# Install surprise library (used for building recommendation models)
!pip install scikit-surprise




In [2]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy


In [3]:
# Load the built-in MovieLens 100k dataset
data = Dataset.load_builtin('ml-100k')


Dataset ml-100k could not be found. Do you want to download it? [Y/n] Y
Trying to download dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to /root/.surprise_data/ml-100k


In [5]:
# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)


In [6]:
# Create SVD model (Singular Value Decomposition)
model = SVD()
model.fit(trainset)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7b5270b17490>

In [7]:
# Predict ratings for the test set
predictions = model.test(testset)


In [8]:
# Root Mean Squared Error (lower = better)
rmse = accuracy.rmse(predictions)
print(f"Test RMSE: {rmse:.4f}")


RMSE: 0.9332
Test RMSE: 0.9332


In [9]:
# Show a few actual vs predicted ratings
for prediction in predictions[:5]:
    print(f"User {prediction.uid} → Item {prediction.iid}:")
    print(f"  Actual Rating: {prediction.r_ui}")
    print(f"  Predicted Rating: {prediction.est:.2f}\n")


User 907 → Item 143:
  Actual Rating: 5.0
  Predicted Rating: 4.75

User 371 → Item 210:
  Actual Rating: 4.0
  Predicted Rating: 4.23

User 218 → Item 42:
  Actual Rating: 4.0
  Predicted Rating: 3.39

User 829 → Item 170:
  Actual Rating: 4.0
  Predicted Rating: 4.08

User 733 → Item 277:
  Actual Rating: 1.0
  Predicted Rating: 3.14



In [10]:
# Get top N movie recommendations for a given user
from collections import defaultdict

def get_top_n(predictions, n=5):
    top_n = defaultdict(list)

    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Sort predictions for each user and return top n
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

top_n = get_top_n(predictions, n=5)

# Example: print recommendations for 3 users
for uid, user_ratings in list(top_n.items())[:3]:
    print(f"\nTop 5 recommendations for User {uid}:")
    for iid, rating in user_ratings:
        print(f"  Movie ID: {iid} | Predicted Rating: {rating:.2f}")



Top 5 recommendations for User 907:
  Movie ID: 173 | Predicted Rating: 5.00
  Movie ID: 813 | Predicted Rating: 5.00
  Movie ID: 79 | Predicted Rating: 5.00
  Movie ID: 125 | Predicted Rating: 5.00
  Movie ID: 647 | Predicted Rating: 5.00

Top 5 recommendations for User 371:
  Movie ID: 97 | Predicted Rating: 4.27
  Movie ID: 186 | Predicted Rating: 4.26
  Movie ID: 210 | Predicted Rating: 4.23
  Movie ID: 237 | Predicted Rating: 4.21
  Movie ID: 746 | Predicted Rating: 4.10

Top 5 recommendations for User 218:
  Movie ID: 12 | Predicted Rating: 4.38
  Movie ID: 654 | Predicted Rating: 3.94
  Movie ID: 209 | Predicted Rating: 3.78
  Movie ID: 47 | Predicted Rating: 3.44
  Movie ID: 42 | Predicted Rating: 3.39


In [12]:
print("Collaborative Filtering using SVD.")


Collaborative Filtering using SVD.
