In [24]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise.model_selection import train_test_split


In [13]:
#a. Load the ‘ratings’ and ‘movies’ datasets which is a part of ‘MovieLense’

ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')


In [19]:
#b. Find the unique number of users and movies in the ‘ratings’ dataset

users = ratings['userId'].unique()
uni_movies = ratings['movieId'].unique()

num_users = len(users)
num_movies = len(uni_movies)

print(f'number of unique users: {num_users}')
print(f'number of unique movies: {num_movies}')

number of unique users: 7120
number of unique movies: 14026


In [16]:
#c. Create a rating matrix for the ‘ratings’ dataset and store it in ‘Ratings’

Ratings = np.zeros((num_users,num_movies))

# Filling the rating matrix with actual ratings
for index, row in ratings.iterrows():
    user_index = np.where(users == row['userId'])[0][0]
    movie_index = np.where(uni_movies == row['movieId'])[0][0]
    Ratings[user_index, movie_index] = row['rating']

In [26]:
#d. Load the ‘ratings’ dataset as SVD’s Dataset object and compute 3-fold cross-validation
# using the SVD object


# Define the Reader object
reader = Reader(rating_scale=(1, 5))

# Load the data into the Dataset object
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Define the SVD algorithm
algo = SVD()

# Run 3-fold cross-validation
results = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

# Print the results
print('RMSE:', results['test_rmse'].mean())
print('MAE:', results['test_mae'].mean())


Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8463  0.8441  0.8452  0.8452  0.0009  
MAE (testset)     0.6477  0.6475  0.6471  0.6474  0.0002  
Fit time          20.26   20.36   20.92   20.52   0.29    
Test time         6.93    7.61    8.16    7.56    0.50    
RMSE: 0.845201903054532
MAE: 0.6474057786119753


In [32]:
#e. Find all the movies rated as 5 stars by user id ‘5’ and store it in ‘ratings_1’ data frame

# Filter ratings dataframe by user id 5 and rating of 5
ratings_1 = ratings[(ratings['userId'] == 5) & (ratings['rating'] == 5)]

# Join with movies dataframe to get movie names
ratings_1 = pd.merge(ratings_1, movies, on='movieId', how='inner')[['title', 'genres']]

ratings_1


Unnamed: 0,title,genres
0,"American President, The (1995)",Comedy|Drama|Romance
1,Mr. Holland's Opus (1995),Drama
2,"Birdcage, The (1996)",Comedy
3,Apollo 13 (1995),Adventure|Drama|IMAX
4,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi
5,"Shawshank Redemption, The (1994)",Crime|Drama
6,"Lion King, The (1994)",Adventure|Animation|Children|Drama|Musical|IMAX
7,Maverick (1994),Adventure|Comedy|Western
8,Speed (1994),Action|Romance|Thriller
9,True Lies (1994),Action|Adventure|Comedy|Romance|Thriller


In [20]:
#f. Create a shallow copy of the ‘movies’ dataset and store the result in ‘user_5’

user_5 = movies.copy()


In [27]:
#g. Train a recommender system using the SVD object and predict the ratings for user id ‘5’

user_5['predicted_rating'] = user_5['movieId'].apply(lambda x: algo.predict(5, x).est)

In [31]:
#h. Print the top10 movie recommendations for the user id ‘5’

top10_movies = user_5.sort_values('predicted_rating', ascending=False)[:10]

top10_movies

Unnamed: 0,movieId,title,genres,predicted_rating
523,527,Schindler's List (1993),Drama|War,5.0
3340,3429,Creature Comforts (1989),Animation|Comedy,5.0
352,356,Forrest Gump (1994),Comedy|Drama|Romance|War,5.0
1173,1198,Raiders of the Lost Ark (Indiana Jones and the...,Action|Adventure,5.0
4897,4993,"Lord of the Rings: The Fellowship of the Ring,...",Adventure|Fantasy,5.0
2500,2585,"Lovers of the Arctic Circle, The (Los Amantes ...",Drama|Romance,4.990621
108,110,Braveheart (1995),Action|Drama|War,4.97691
257,260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi,4.972063
5853,5952,"Lord of the Rings: The Two Towers, The (2002)",Adventure|Fantasy,4.951999
315,318,"Shawshank Redemption, The (1994)",Crime|Drama,4.939736
