In [None]:
import numpy as np
import pandas as pd

from database import Movie
from database import Rating
from surprise import Dataset
from surprise import Reader
from surprise import SVD
from surprise import accuracy
from surprise.model_selection import train_test_split

np.random.seed(42)
pd.set_option('precision', 1)

In [None]:
movie_database = Movie.load()
rating_database = Rating.load(nrows=10**6)

users = rating_database.data.groupby('userId')['movieId'].agg('count')

In [None]:
reader = Reader(rating_scale=(1, 5))
rating_dataset = Dataset.load_from_df(rating_database.data, reader)
data_train, data_test = train_test_split(rating_dataset, test_size=0.2)

In [None]:
model = SVD()
model.fit(data_train)
predictions = model.test(data_test)
accuracy.rmse(predictions)

In [None]:
user = np.random.choice(users[users > 10].index)
print('Chosen user: {}'.format(user))

In [None]:
ratings = rating_database.find_by_user(user)
movies = movie_database.find(ratings.index).join(ratings)
predictions = [model.predict(user, movie) for movie in movies.index]
estimates = [prediction.est if not prediction.details['was_impossible'] else np.nan
             for prediction in predictions]
movies.join(pd.DataFrame({'estimate': estimates}, index=movies.index))