In [None]:
import numpy as np
import pandas as pd

from database import Rating
from surprise import SVD
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise.model_selection import train_test_split

np.random.seed(42)

In [None]:
rating_database = Rating.load(nrows=10**6)
users = rating_database.data.groupby('userId')['movieId'].agg('count')

In [None]:
reader = Reader(rating_scale=(1, 5))
rating_dataset = Dataset.load_from_df(rating_database.data, reader)
data_train, data_test = train_test_split(rating_dataset, test_size=0.2)

In [None]:
model = SVD()
model.fit(data_train)
predictions = model.test(data_test)
accuracy.rmse(predictions)

In [None]:
user = np.random.choice(users[users > 10].index)
print('Chosen user: {}'.format(user))

In [None]:
movies = rating_database.find_by_user(user)
predictions = [model.predict(user, movie) for movie in movies.index]
estimates = [np.nan if prediction.details['was_impossible'] else prediction.est
             for prediction in predictions]
movies.join(pd.DataFrame({'estimate': estimates}, index=movies.index))