<a href="https://colab.research.google.com/github/AbderrahmanS/collaborative-filtering-with-SVD/blob/main/collaborative_filtering_with_SVD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('/content/ratings_small.csv')
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [None]:
!pip install scikit-surprise


Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3163758 sha256=1d4407be6faa611f1814a497f3bd65a93eef5cacc5caf43182c4acf11504686d
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3


In [None]:
from surprise import Dataset, Reader
from surprise import SVD
from surprise.model_selection import cross_validate, train_test_split
from surprise.accuracy import rmse
from surprise.dataset import DatasetAutoFolds

In [None]:
# Load the dataset into the surprise library format
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)

In [None]:
# Split the dataset into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [None]:
# Initialize the SVD algorithm
algo = SVD()

# Train the algorithm on the training set
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x781dbbc89390>

In [None]:
# Make predictions on the test set
predictions = algo.test(testset)

# Evaluate the performance using Root Mean Squared Error (RMSE)
accuracy = rmse(predictions)
print(f'RMSE: {accuracy}')

RMSE: 0.9025
RMSE: 0.9024780227984405


In [None]:
# Example: Get movie recommendations for a specific user
user_id = 1
user_movies = df[df['userId'] == user_id]['movieId'].unique()

In [None]:
# Exclude movies that the user has already rated
movies_to_predict = df['movieId'].unique()
movies_to_predict = [movie_id for movie_id in movies_to_predict if movie_id not in user_movies]

In [None]:
# Predict ratings for the movies the user has not rated
user_predictions = [algo.predict(user_id, movie_id) for movie_id in movies_to_predict]

In [None]:
# Get the top N recommendations based on predicted ratings
top_n = 5
user_recommendations = sorted(user_predictions, key=lambda x: x.est, reverse=True)[:top_n]

# Print the top N recommendations
print(f"Top {top_n} recommendations for user {user_id}:")
for recommendation in user_recommendations:
    print(f"Movie ID: {recommendation.iid}, Predicted Rating: {recommendation.est}")

Top 5 recommendations for user 1:
Movie ID: 318, Predicted Rating: 3.7036516422226877
Movie ID: 1217, Predicted Rating: 3.6787182597445374
Movie ID: 969, Predicted Rating: 3.6769318358100143
Movie ID: 1252, Predicted Rating: 3.673531289765051
Movie ID: 527, Predicted Rating: 3.673263467183577


In [None]:
# Get the actual ratings in the test set for user 1
actual_ratings_user_1 = [(uid, iid, r_ui) for uid, iid, r_ui in testset if uid == 1]

# Get the predicted ratings for user 1
predicted_ratings_user_1 = [algo.predict(1, iid).est for uid, iid, _ in testset if uid == 1]

# Print the actual and predicted ratings for user 1
print(f"Actual Ratings for User 1 in the Test Set:")
print(actual_ratings_user_1)

print("\nPredicted Ratings for User 1:")
print(predicted_ratings_user_1)

Actual Ratings for User 1 in the Test Set:
[(1, 2968, 1.0), (1, 1263, 2.0), (1, 2193, 2.0), (1, 1061, 3.0)]

Predicted Ratings for User 1:
[3.006181474941189, 3.2772772460529946, 2.767855078624322, 2.928335628371074]
