In [None]:
import numpy as np
import pandas as pd

from data import Movie, Rating
from recommender import NearestNeighbor

np.random.seed(42)

In [None]:
movie_database = Movie.load()
rating_database = Rating.load(nrows=10**6)

movies = rating_database.data.groupby('movieId')['userId'].agg('count').sort_values(ascending=False)
users = rating_database.data.groupby('userId')['movieId'].agg('count').sort_values(ascending=False)

# User-based Recommendation

In [None]:
user_model = NearestNeighbor(proxies=rating_database.data['userId'],
                             targets=rating_database.data['movieId'],
                             ratings=rating_database.data['rating'])

user = np.random.choice(users[users > 10].index)

## Already Watched

In [None]:
ratings = rating_database.find_by_user(user)
movie_database.find(ratings.index).join(ratings).sort_values(by='title')

## Should Watch

In [None]:
movies, ratings = user_model.recommend_targets(user)
movies = movie_database.find(movies).join(
    pd.DataFrame(data={'rating': ratings}, index=movies))

movies.sort_values(by='title')

# Item-based Recommendation

In [None]:
movie_model = NearestNeighbor(proxies=rating_database.data['movieId'],
                              targets=rating_database.data['userId'],
                              ratings=rating_database.data['rating'])

movie = np.random.choice(movies[movies > 10].index)

## Already Watched

In [None]:
movie_database.find([movie]).join(
    rating_database.find_by_movie(movie)
                   .aggregate(['mean', 'count']).T
                   .rename({'rating': movie}, axis=0)
                   .rename({'mean': 'rating', 'count': 'users'}, axis=1))

## Should Watch

In [None]:
movies, similarities = movie_model.recommend_proxies(movie)
movies = movie_database.find(movies).join(
    pd.DataFrame(data={'similarity': similarities}, index=movies))

movies.sort_values(by='title')