# Building a Simple Recommender System

In [1]:
import surprise

In [2]:
from surprise import Dataset

movie_data = Dataset.load_builtin('ml-100k')
movie_data

Trying to download dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to /Users/philipacquaye-mensah/.surprise_data/ml-100k


In [3]:
# Train & Test Split
from surprise.model_selection import train_test_split
trainset, testset = train_test_split(movie_data, test_size=.2, random_state=42)

In [4]:
# The method ur returns a dictionary where the keys are user ids, and the values are a list of tuples, where each tuple is in the form ({item_id}, {rating}).
trainset.ur[590]


[(699, 4.0),
 (114, 4.0),
 (339, 5.0),
 (769, 5.0),
 (230, 4.0),
 (212, 5.0),
 (21, 5.0),
 (33, 3.0),
 (94, 4.0),
 (74, 2.0),
 (22, 4.0),
 (471, 5.0),
 (8, 3.0),
 (305, 3.0),
 (645, 3.0),
 (558, 2.0),
 (166, 4.0),
 (129, 4.0),
 (100, 5.0),
 (232, 3.0),
 (391, 3.0),
 (204, 3.0),
 (508, 4.0),
 (79, 3.0),
 (83, 4.0),
 (426, 1.0),
 (6, 3.0),
 (58, 3.0),
 (1119, 5.0),
 (76, 5.0)]

In [5]:
# Training A Recommender System
from surprise import KNNBasic

movie_recommender = KNNBasic()
movie_recommender.fit(trainset)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x1098ad4d0>

In [6]:
# Evaluating Recommender System Performance
# create some predictions on the test set
predictions = movie_recommender.test(testset)

In [7]:
# See object from predictions
predictions[0]

Prediction(uid='907', iid='143', r_ui=5.0, est=4.039960584359155, details={'actual_k': 40, 'was_impossible': False})

In [8]:
# Measure the accuracy of the recommender system
from surprise import accuracy
accuracy.rmse(predictions)


RMSE: 0.9802


0.980150596704479

## Trying a different algorithm on the same train dataset

In [9]:
# train a simple SVD recommender system using Surprise’s SVD algorithm
from surprise import SVD
from surprise import accuracy

svd_recommender = SVD()
svd_recommender.fit(trainset)

svd_predictions = svd_recommender.test(testset)

accuracy.rmse(svd_predictions)

RMSE: 0.9357


0.9357268153409254

In [10]:
# Make Recommendations
# Get the list of all movie ids
all_movie_ids = movie_data.raw_ratings
all_movie_ids = [movie_id for user_id, movie_id, rating, timestamp in all_movie_ids]

In [11]:
# Get the list of all movie ids that the user with user_id 590 has rated
movie_ids = trainset.ur[590]
movie_ids = [movie_id for movie_id, rating in movie_ids]

In [12]:
# Remove the rated movies from the list of all movie ids
unrated_movies = [movie_id for movie_id in all_movie_ids if movie_id not in movie_ids]

In [14]:
# Create a list of tuples where the first element is the user id 590 and the second element is a movie id from the list of unrated movies
testset = [(590, movie_id, 4) for movie_id in unrated_movies]

# Make predictions
predictions = svd_recommender.test(testset)
predictions[0]

Prediction(uid=590, iid='242', r_ui=4, est=4.1275502538771605, details={'was_impossible': False})