# Import libraries and data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from surprise import Reader, Dataset

from surprise.model_selection import cross_validate
from surprise.prediction_algorithms import SVD
from surprise.prediction_algorithms import KNNWithMeans, KNNBasic, KNNBaseline
from surprise.model_selection import GridSearchCV

In [2]:
df = pd.read_csv('ratings.csv')

In [3]:
df = df.drop(columns='timestamp')

In [4]:
r = Reader()
d = Dataset.load_from_df(df, r)

# Fit SVD algorithm with grid search

In [5]:
params = {'n_factors': [20, 50, 100],
         'reg_all': [0.02, 0.05, 0.1]}
g_s_svd = GridSearchCV(SVD,param_grid=params,n_jobs=-1)
g_s_svd.fit(d)

In [6]:
print(g_s_svd.best_score)
print(g_s_svd.best_params)

{'rmse': 0.8686280454998142, 'mae': 0.6673052785841285}
{'rmse': {'n_factors': 100, 'reg_all': 0.05}, 'mae': {'n_factors': 100, 'reg_all': 0.05}}


In [7]:
knn_basic = KNNBasic(sim_options={'name':'pearson', 'user_based':True})
cv_knn_basic = cross_validate(knn_basic, d, n_jobs=-1)

# Scoring (root mean squared error)

In [8]:
for i in cv_knn_basic.items():
    print(i)
print('-----------------------')
print(np.mean(cv_knn_basic['test_rmse']))

('test_rmse', array([0.9723378 , 0.96846583, 0.9683936 , 0.97535704, 0.9757883 ]))
('test_mae', array([0.75157743, 0.74628449, 0.74847547, 0.75283182, 0.75401028]))
('fit_time', (0.8328487873077393, 0.7456421852111816, 0.5231609344482422, 0.6437911987304688, 0.54026198387146))
('test_time', (1.9139373302459717, 1.9061760902404785, 1.8245539665222168, 2.1783599853515625, 1.474107027053833))
-----------------------
0.9720685129049051


In [9]:
knn_baseline = KNNBaseline(sim_options={'name':'pearson', 'user_based':True})
cv_knn_baseline = cross_validate(knn_baseline, d)

Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.


In [10]:
for i in cv_knn_baseline.items():
    print(i)

np.mean(cv_knn_baseline['test_rmse'])

('test_rmse', array([0.88046902, 0.87387412, 0.87644552, 0.87478058, 0.87915189]))
('test_mae', array([0.67162817, 0.66776759, 0.66989094, 0.66810209, 0.67167676]))
('fit_time', (1.1462011337280273, 0.8970060348510742, 0.7907979488372803, 0.6389331817626953, 0.640084981918335))
('test_time', (2.799245834350586, 2.305368661880493, 2.1404120922088623, 2.064384937286377, 2.3926210403442383))


0.8769442256379069

# Import movie titles

In [11]:
m = pd.read_csv('movies.csv')

In [12]:
dataset = d.build_full_trainset()
print('Number of users: ', dataset.n_users, '\n')
print('Number of items: ', dataset.n_items)

Number of users:  610 

Number of items:  9724


In [14]:
svd = SVD(n_factors= 50, reg_all=0.05)
svd.fit(dataset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1216319e8>

In [15]:
svd.predict(2, 4)

Prediction(uid=2, iid=4, r_ui=None, est=3.0609897425443497, details={'was_impossible': False})

In [24]:
def movie_rater(movie_df,num, genre=None):
    userID = 1000
    rating_list = []
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen :\n')
        if rating == 'n':
            continue
        else:
            rating_one_movie = {'userId':userID,'movieId':movie['movieId'].values[0],'rating':rating}
            rating_list.append(rating_one_movie) 
            num -= 1
    return rating_list

# User input for movie ratings

In [25]:
user_rating = movie_rater(m, 5, 'Comedy')

      movieId                        title          genres
8454   112303  Think Like a Man Too (2014)  Comedy|Romance
How do you rate this movie on a scale of 1-5, press n if you have not seen :
n
      movieId           title        genres
2681     3591  Mr. Mom (1983)  Comedy|Drama
How do you rate this movie on a scale of 1-5, press n if you have not seen :
n
   movieId                    title          genres
2        3  Grumpier Old Men (1995)  Comedy|Romance
How do you rate this movie on a scale of 1-5, press n if you have not seen :
n
      movieId                                     title                genres
3916     5503  Last Kiss, The (Ultimo bacio, L') (2001)  Comedy|Drama|Romance
How do you rate this movie on a scale of 1-5, press n if you have not seen :
n
      movieId                             title                  genres
1510     2038  Cat from Outer Space, The (1978)  Children|Comedy|Sci-Fi
How do you rate this movie on a scale of 1-5, press n if you have not seen

How do you rate this movie on a scale of 1-5, press n if you have not seen :
5


In [26]:
user_rating

[{'userId': 1000, 'movieId': 84637, 'rating': '5'},
 {'userId': 1000, 'movieId': 31700, 'rating': '5'},
 {'userId': 1000, 'movieId': 2987, 'rating': '5'},
 {'userId': 1000, 'movieId': 42009, 'rating': '5'},
 {'userId': 1000, 'movieId': 2011, 'rating': '5'}]

In [27]:
new_ratings_df = df.append(user_rating,ignore_index=True)
new_data = Dataset.load_from_df(new_ratings_df,r)

In [28]:
svd_ = SVD(n_factors= 50, reg_all=0.05)
svd_.fit(new_data.build_full_trainset())

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x121631eb8>

In [29]:
list_of_movies = []
for m_id in df['movieId'].unique():
    list_of_movies.append( (m_id,svd_.predict(1000,m_id)[3]))

In [30]:
ranked_movies = sorted(list_of_movies, key=lambda x:x[1], reverse=True)

# Movie recommendations

In [31]:
def recommended_movies(user_ratings,movie_title_df,n):
        for idx, rec in enumerate(user_ratings):
            title = movie_title_df.loc[movie_title_df['movieId'] == int(rec[0])]['title']
            print('Recommendation # ', idx+1, ': ', title, '\n')
            n-= 1
            if n == 0:
                break
            
recommended_movies(ranked_movies,m,5)

Recommendation #  1 :  906    Lawrence of Arabia (1962)
Name: title, dtype: object 

Recommendation #  2 :  277    Shawshank Redemption, The (1994)
Name: title, dtype: object 

Recommendation #  3 :  3622    Amelie (Fabuleux destin d'Amélie Poulain, Le) ...
Name: title, dtype: object 

Recommendation #  4 :  841    Streetcar Named Desire, A (1951)
Name: title, dtype: object 

Recommendation #  5 :  686    Rear Window (1954)
Name: title, dtype: object 

