In [None]:
# 1. Imports

In [None]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

In [None]:
# 2. Data loading

In [None]:
ratings = pd.read_csv('../data/ratings.csv')

In [None]:
# 3. Prepare Surprise dataset

In [None]:
reader = Reader(rating_scale=(ratings.rating.min(), ratings.rating.max()))
data = Dataset.load_from_df(ratings[['userid', 'movieid', 'rating']], reader)

In [None]:
# 4. Train-test split

In [None]:
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [None]:
# 5. SVD training

In [None]:
algo = SVD()
algo.fit(trainset)

In [None]:
# 6. Evaluation

In [None]:
predictions = algo.test(testset)
print('Test RMSE:', accuracy.rmse(predictions))

In [None]:
# 7. Recommend for a demo user

In [None]:
def recommend_for_user(algo, user_id, movies, ratings, n=5):
    seen_movieids = set(ratings[ratings.userid == user_id].movieid)
    all_movieids = set(movies.movieid)
    unseen = list(all_movieids - seen_movieids)
    preds = [(mid, algo.predict(user_id, mid).est) for mid in unseen]
    topn = sorted(preds, key=lambda x: x[1], reverse=True)[:n]
    titles = movies.set_index('movieid').loc[[i[0] for i in topn]].title
    return titles.reset_index(drop=True)

In [None]:
demo_user_id = ratings.userid.sample(1).iloc[0]
print(recommend_for_user(algo, demo_user_id, movies, ratings))