In [None]:
import numpy as np
import pandas as pd

from database import Movie
from database import Rating
from recommender import NearestNeighbor

np.random.seed(42)
pd.set_option('precision', 1)

In [None]:
movie_database = Movie.load()
rating_database = Rating.load(nrows=10**6)

users = rating_database.data.groupby('userId')['movieId'].agg('count')
movies = rating_database.data.groupby('movieId')['userId'].agg('count')

user = np.random.choice(users[users > 10].index)
movie = np.random.choice(movies[movies > 10].index)

print('User: {}'.format(user))
print('Movie: {}'.format(movie))

# User-based Filtering

In [None]:
user_model = NearestNeighbor()
user_model.fit((rating_database.data['userId'],
                rating_database.data['movieId'],
                rating_database.data['rating']))

In [None]:
ratings = rating_database.find_by_user(user)
movies = movie_database.find(ratings.index).join(ratings)
estimates = [user_model.predict(user, movie) for movie in movies.index]
movies.join(pd.DataFrame({'estimate': estimates}, index=movies.index))

# Item-based Filtering

In [None]:
movie_model = NearestNeighbor()
movie_model.fit((rating_database.data['movieId'],
                 rating_database.data['userId'],
                 rating_database.data['rating']))

In [None]:
movie_database.find([movie]).join(
    rating_database.find_by_movie(movie)
                   .aggregate(['mean', 'count']).T
                   .rename({'rating': movie}, axis=0)
                   .rename({'mean': 'rating', 'count': 'users'}, axis=1))

In [None]:
movies, similarities = movie_model.connect(movie)
movies = movie_database.find(movies).join(
    pd.DataFrame({'similarity': similarities}, index=movies))

movies.sort_values(by='title')