In [None]:
import numpy as np

from data import Movie as MovieData
from data import Rating as RatingData
from recommender import NearestNeighbor
from recommender import Rating as RatingMatrix
from recommender import Similarity as SimilarityMatrix
from pipeline import Builder

np.random.seed(42)

In [None]:
movie_data = MovieData.load()
rating_data = RatingData.load(nrows=10**6)

In [None]:
pipeline = Builder().encode('movieId') \
                    .encode('userId') \
                    .build()

data = pipeline.fit_transform(rating_data.data.copy())

user_encoder = pipeline.named_steps['encode_userId'].encoder
movie_encoder = pipeline.named_steps['encode_movieId'].encoder

In [None]:
rating_matrix = RatingMatrix(
    data, proxy='userId', target='movieId',
    shape=(len(user_encoder.classes_), len(movie_encoder.classes_)))

similarity_matrix = SimilarityMatrix(rating_matrix.data)

model = NearestNeighbor(rating_matrix.data, similarity_matrix.data,
                        proxy_encoder=user_encoder, target_encoder=movie_encoder)

In [None]:
user_id = np.random.choice(user_encoder.classes_)
movie_data.find(model.recommend(user_id)).sort_values(by='title')

In [None]:
ratings = rating_data.find(user_id)
movie_data.find(ratings.index).join(ratings).sort_values(by='title')