In [1]:
# 1. Implementing User-Based Recommender System using SVD (Singular
# Value Decomposition) method:
# a. Load the ‘ratings’ and ‘movies’ datasets which is a part of ‘MovieLense’

import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import cross_validate

# Task 1a: Load the 'ratings' and 'movies' datasets
ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')

In [2]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


In [3]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
# b. Find the unique number of users and movies in the ‘ratings’ dataset

# Task 1b: Find unique number of users and movies
num_users = ratings['userId'].nunique()
num_movies = ratings['movieId'].nunique()

print(f'unique number of users: {num_users}')
print(f'unique number of moives: {num_movies}')

unique number of users: 7120
unique number of moives: 14026


In [5]:
# c. Create a rating matrix for the ‘ratings’ dataset and store it in ‘Ratings’

# Task 1c: Create a rating matrix
Ratings = ratings.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)
Ratings.head()


movieId,1,2,3,4,5,6,7,8,9,10,...,129350,129354,129428,129707,130052,130073,130219,130462,130490,130642
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# d. Load the ‘ratings’ dataset as SVD’s Dataset object and compute 3-fold
# cross-validation using the SVD object

# Task 1d: Load the 'ratings' dataset as SVD's Dataset object and compute 3-fold cross-validation
reader = Reader()
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
svd = SVD()
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)


Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8454  0.8459  0.8444  0.8452  0.0006  
MAE (testset)     0.6478  0.6483  0.6472  0.6478  0.0004  
Fit time          11.38   11.64   12.23   11.75   0.36    
Test time         4.17    4.43    4.52    4.37    0.15    


{'test_rmse': array([0.84539487, 0.84585946, 0.84440343]),
 'test_mae': array([0.64776487, 0.64828487, 0.64724804]),
 'fit_time': (11.37598705291748, 11.644625425338745, 12.226615190505981),
 'test_time': (4.1745524406433105, 4.43005108833313, 4.518152713775635)}

In [11]:
# Print the head of ratings dataset
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


In [14]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [12]:
# e. Find all the movies rated as 5 stars by user id ‘5’ and store it in
# ‘ratings_1’ data frame

ratings_1 = ratings[(ratings['userId'] == 5) & (ratings['rating'] == 5)]
ratings_1 = ratings_1.set_index('movieId')
ratings_1 = ratings_1.join(movies)['title']
ratings_1.head(10)

movieId
11                    Dracula: Dead and Loving It (1995)
62     Don't Be a Menace to South Central While Drink...
141                                         Gospa (1995)
150                                Addiction, The (1995)
260                             Ladybird Ladybird (1994)
318    Strawberry and Chocolate (Fresa y chocolate) (...
364                                      Maverick (1994)
368                                 Reality Bites (1994)
377                      When a Man Loves a Woman (1994)
380                                   Bad Company (1995)
Name: title, dtype: object

In [15]:
# f. Create a shallow copy of the ‘movies’ dataset and store the result in
# ‘user_5’
# g. Train a recommender system using the SVD object and predict the
# ratings for user id ‘5’.
# h. Print the top10 movie recommendations for the user id ‘5’


# Create a shallow copy for the movies dataset
user_5 = movies.copy()

#Reset the index for user_5 dataset
user_5 = user_5.reset_index()



# getting full dataset
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)


#create a training set for svd
trainset = data.build_full_trainset()
svd.fit(trainset)

#Predict the ratings for user5
user_5['Estimate_Score'] = user_5['movieId'].apply(lambda x: svd.predict(5, x).est)

#Drop extra columns from the user5 data frame
user_5 = user_5.drop(['movieId','genres','index'], axis = 1)

# Sort predicted ratings for user5 in descending order
user_5 = user_5.sort_values('Estimate_Score', ascending=False)

#Print top 10 recommendations
print(user_5.head(10))



                                                   title  Estimate_Score
5853       Lord of the Rings: The Two Towers, The (2002)        5.000000
4897   Lord of the Rings: The Fellowship of the Ring,...        5.000000
7041   Lord of the Rings: The Return of the King, The...        5.000000
1172                          Princess Bride, The (1987)        5.000000
1173   Raiders of the Lost Ark (Indiana Jones and the...        5.000000
18312                      Dark Knight Rises, The (2012)        4.981964
1016                          Sound of Music, The (1965)        4.967010
523                              Schindler's List (1993)        4.949644
1212                              Terminator, The (1984)        4.928775
3487                                    Gladiator (2000)        4.923575
