# Recommendation System for MoveLens Dataset using SVD

In [1]:
# Import libraries
import numpy as np
import pandas as pd

1. Implementing User-Based Recommender System using SVD (Singular Value Decomposition)
method:
a. Load the ‘ratings’ and ‘movies’ datasets which is a part of ‘MovieLense’

# To load the 'ratings' and 'movies' dataset after uploading them to Jupyter notebook

In [2]:
# Reading ratings file

ratings = pd.read_csv('ratings.csv', usecols=['userId','movieId','rating','timestamp'])

In [3]:
# Reading movies file

movies = pd.read_csv('movies.csv', usecols=['movieId','title','genres'])

In [4]:
# Print first five rows of movies dataset

movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [None]:
# Print first five rows of ratings dataset

ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


b. Find the unique number of users and movies in the ‘ratings’ dataset

# To find the unique number of users and movies in the 'ratings' dataset

In [13]:
n_users = ratings['userId'].nunique()
n_movies = ratings['movieId'].nunique()

print(f'Number of users = {n_users} and Number of movies = {n_movies}')

Number of users = 1291 and Number of movies = 10089


c. Create a rating matrix for the ‘ratings’ dataset and store it in ‘Ratings’

# To create a rating matrix for the 'ratings' dataset

In [15]:
Ratings = ratings.pivot_table(index=['userId'], columns=['movieId'], values='rating', fill_value=0)

Ratings.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,120823,121117,121135,121583,125916,128488,128594,128686,128715,128832
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,3.5,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0.0,0.0,0,0.0,0,0.0,0,0,0.0
2,0.0,0.0,4.0,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0.0,0.0,0,0.0,0,0.0,0,0,0.0
3,4.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0.0,0.0,0,0.0,0,0.0,0,0,0.0
4,0.0,0.0,0.0,0,0.0,3.0,0.0,0.0,0.0,4.0,...,0,0.0,0.0,0,0.0,0,0.0,0,0,0.0
5,0.0,3.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0.0,0.0,0,0.0,0,0.0,0,0,0.0


d. Load the ‘ratings’ dataset as SVD’s Dataset object and compute 3-fold cross-validation using the SVD object 

# To install the scikit-surprise library for implementing SVD

### Run the following command in the Anaconda Prompt to install surprise package

In [18]:
!pip install scikit-surprise

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3095429 sha256=48c40e4de7dac856e60c32a47ddd3dfbd7e9f5d1cbca43c376fb9a2f2442cc4b
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3


In [20]:
# Import libraries from Surprise package
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

# Load Reader library
reader = Reader()

# Load ratings dataset with Dataset library
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Use the SVD algorithm.
svd = SVD()

# Compute the RMSE of the SVD algorithm.
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9024  0.9028  0.8982  0.9011  0.0021  
MAE (testset)     0.6947  0.6954  0.6913  0.6938  0.0018  
Fit time          2.32    2.37    2.40    2.36    0.03    
Test time         0.73    0.68    0.99    0.80    0.14    


{'test_rmse': array([0.90235348, 0.90278026, 0.89817157]),
 'test_mae': array([0.69469116, 0.69542461, 0.69126275]),
 'fit_time': (2.324098587036133, 2.3703832626342773, 2.396501064300537),
 'test_time': (0.7253642082214355, 0.6794612407684326, 0.9885659217834473)}

In [21]:
# Print the head of ratings dataset
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486000.0
1,1,29,3.5,1112485000.0
2,1,32,3.5,1112485000.0
3,1,47,3.5,1112485000.0
4,1,50,3.5,1112485000.0


e. Find all the movies rated as 5 stars by user id ‘5’ and store it in ‘ratings_1’ data frame 

# To find all the movies rated as more than 5 stars by user with userId = 1

In [24]:
ratings_1 = ratings[(ratings['userId'] == 5) & (ratings['rating'] == 5)]
ratings_1 = ratings_1.set_index('movieId')
ratings_1 = ratings_1.join(movies)['title']
ratings_1.head(10)

movieId
11                    Dracula: Dead and Loving It (1995)
62     Don't Be a Menace to South Central While Drink...
141                                         Gospa (1995)
150                                Addiction, The (1995)
260                             Ladybird Ladybird (1994)
318    Strawberry and Chocolate (Fresa y chocolate) (...
364                                      Maverick (1994)
368                                 Reality Bites (1994)
377                      When a Man Loves a Woman (1994)
380                                   Bad Company (1995)
Name: title, dtype: object

f. Create a shallow copy of the ‘movies’ dataset and store the result in ‘user_5’

# Train an SVD to predict ratings for user with userId = 1

In [28]:
# Create a shallow copy for the movies dataset
user_5 = movies.copy()

#Reset the index for user_5 dataset
user_5 = user_5.reset_index()



# getting full dataset
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)


#create a training set for svd
trainset = data.build_full_trainset()
svd.fit(trainset)

#Predict the ratings for user1
user_5['Estimate_Score'] = user_5['movieId'].apply(lambda x: svd.predict(1, x).est)

#Drop extra columns from the user1 data frame
user_5 = user_5.drop(['movieId','genres','index'], axis = 1)

# Sort predicted ratings for user1 in descending order
user_5 = user_5.sort_values('Estimate_Score', ascending=False)

h. Print the top10 movie recommendations for the user id ‘5’

In [29]:
top_10_recommendations = user_5.sort_values('Estimate_Score', ascending=False).head(10)
print(top_10_recommendations)

                                                   title  Estimate_Score
13522              Dr. Horrible's Sing-Along Blog (2008)        4.575576
5853       Lord of the Rings: The Two Towers, The (2002)        4.554906
7041   Lord of the Rings: The Return of the King, The...        4.545676
1155                               Paths of Glory (1957)        4.490258
5917                 City of God (Cidade de Deus) (2002)        4.468199
6274                      Capturing the Friedmans (2003)        4.466734
10886                              V for Vendetta (2006)        4.453568
3008                         Grapes of Wrath, The (1940)        4.449933
12525                            Dark Knight, The (2008)        4.448513
887                                   Rear Window (1954)        4.437224
