# References

* [MovieLens](https://grouplens.org/datasets/movielens/)

In [None]:
import numpy as np
import pandas as pd

from problem.database import Movie
from problem.database import Rating
from surprise import Dataset
from surprise import KNNBaseline
from surprise import Reader

np.random.seed(42)
pd.set_option('precision', 1)

In [None]:
movie_database = Movie.load()
rating_database = Rating.load(nrows=10**6 // 2)
train_database, _ = rating_database.split()

users = rating_database.data.groupby('userId')['movieId'].agg('count')

user = np.random.choice(users[users > 10].index)
ratings = rating_database.find_by_user(user)
movies = movie_database.find(ratings.index).join(ratings)

In [None]:
reader = Reader(rating_scale=(1, 5))
train_database = Dataset.load_from_df(train_database.data, reader)
train_database = train_database.construct_trainset(train_database.raw_ratings)

# User-based Filtering

In [None]:
model = KNNBaseline(sim_options={'name': 'msd', 'user_based': True})
model.fit(train_database)

estimates = [prediction.est if not prediction.details['was_impossible'] else np.nan
             for prediction in [model.predict(user, movie) for movie in movies.index]]
movies.join(pd.DataFrame({'estimate': estimates}, index=movies.index))

# Item-based Filtering

In [None]:
model = KNNBaseline(sim_options={'name': 'msd', 'user_based': False})
model.fit(train_database)

estimates = [prediction.est if not prediction.details['was_impossible'] else np.nan
             for prediction in [model.predict(user, movie) for movie in movies.index]]
movies.join(pd.DataFrame({'estimate': estimates}, index=movies.index))