# Recommendations

1. Classic Methods
1. Content-Based


In [2]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("../src")

In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy

# Classic Recommendations

1. 가장 많이 본 영화
2. 가장 평점이 높은 영화


In [34]:

class ClassicRecommender:
    def __init__(self, data):
        self.data = data
        
    def get_most_rated(self, top_k=100):
        x = self.data.sort_values(["ratings_count", "ratings_mean"], ascending=[False, False])
        return x.head(top_k)
    
    def get_top_rated(self, top_k=100, min_ratings_count=50):
        x = self.data.sort_values(["ratings_mean", "ratings_count"], ascending=[False, False])
        x = x[x.ratings_count >= min_ratings_count]
        return x.head(top_k)

In [35]:
rec = ClassicRecommender(pd.read_csv("../data/movies.csv", index_col=0))

In [36]:
rec.get_most_rated(10)
# rec.get_top_rated(10)

Unnamed: 0_level_0,title,action,adventure,animation,children,comedy,crime,documentary,drama,fantasy,...,no,noir,romance,sci,thriller,war,western,ratings_mean,ratings_count,years
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
356,Forrest Gump (1994),0,0,0,0,1,0,0,1,0,...,0,0,1,0,0,1,0,4.164134,329.0,1994
318,"Shawshank Redemption, The (1994)",0,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,4.429022,317.0,1994
296,Pulp Fiction (1994),0,0,0,0,1,1,0,1,0,...,0,0,0,0,1,0,0,4.197068,307.0,1994
593,"Silence of the Lambs, The (1991)",0,0,0,0,0,1,0,0,0,...,0,0,0,0,1,0,0,4.16129,279.0,1991
2571,"Matrix, The (1999)",1,0,0,0,0,0,0,0,0,...,0,0,0,1,1,0,0,4.192446,278.0,1999
260,Star Wars: Episode IV - A New Hope (1977),1,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,4.231076,251.0,1977
480,Jurassic Park (1993),1,1,0,0,0,0,0,0,0,...,0,0,0,1,1,0,0,3.75,238.0,1993
110,Braveheart (1995),1,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,4.031646,237.0,1995
589,Terminator 2: Judgment Day (1991),1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,3.970982,224.0,1991
527,Schindler's List (1993),0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,4.225,220.0,1993


## Content-Based

1. 유사한 Genre를 갖고 있는 영화

In [109]:
from sklearn.neighbors import NearestNeighbors

class ContentBasedRecommender:
    
    def __init__(self, data, max_neighbors):
        self.data = data
        self.genre_columns = data.columns[1:25]
        self.nn = NearestNeighbors(max_neighbors)
        self.nn.fit(self.data[self.genre_columns])
        
    def recommend_by_genre(self, movie_id):
        movie = self.data.loc[[movie_id], self.genre_columns]
        dists, ids = self.nn.kneighbors(movie)
        
        dists = pd.Series(dists[0], index=self.data.index[ids[0]], name="distance")
        movies = self.data.iloc[ids[0], :]
        movies = pd.concat([movies, dists], axis=1)
        movies = movies.sort_values(by=["distance", "ratings_mean", "ratings_count"], ascending=[True,False,False])
        return movies
    

In [110]:
rec = ContentBasedRecommender(pd.read_csv("../data/movies.csv", index_col=0), 100)

In [112]:
rec.recommend_by_genre(1)

Unnamed: 0_level_0,title,action,adventure,animation,children,comedy,crime,documentary,drama,fantasy,...,noir,romance,sci,thriller,war,western,ratings_mean,ratings_count,years,distance
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
91355,Asterix and the Vikings (Astérix et les Viking...,0,1,1,1,1,0,0,0,1,...,0,0,0,0,0,0,5.000000,1.0,2006,0.000000
1,Toy Story (1995),0,1,1,1,1,0,0,0,1,...,0,0,0,0,0,0,3.920930,215.0,1995,0.000000
4886,"Monsters, Inc. (2001)",0,1,1,1,1,0,0,0,1,...,0,0,0,0,0,0,3.871212,132.0,2001,0.000000
3114,Toy Story 2 (1999),0,1,1,1,1,0,0,0,1,...,0,0,0,0,0,0,3.860825,97.0,1999,0.000000
4016,"Emperor's New Groove, The (2000)",0,1,1,1,1,0,0,0,1,...,0,0,0,0,0,0,3.716216,37.0,2000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3945,Digimon: The Movie (2000),0,1,1,1,0,0,0,0,0,...,0,0,0,0,0,0,3.000000,1.0,2000,1.414214
8907,Shark Tale (2004),0,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,2.346154,13.0,2004,1.414214
1806,Paulie (1998),0,1,0,1,1,0,0,0,0,...,0,0,0,0,0,0,2.312500,8.0,1998,1.414214
5538,"Care Bears Movie, The (1985)",0,0,1,1,0,0,0,0,1,...,0,0,0,0,0,0,1.500000,2.0,1985,1.414214


In [42]:
from scipy.spatial.distance import squareform, pdist
pd.DataFrame(squareform(pdist(genres)), index=genres.index, columns=genres.index)

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.000000,1.414214,2.236068,2.449490,2.000000,2.828427,2.236068,1.732051,2.449490,2.449490,...,2.449490,2.236068,2.236068,2.000000,2.449490,1.732051,1.414214,2.449490,2.236068,2.000000
2,1.414214,0.000000,2.236068,2.449490,2.000000,2.449490,2.236068,1.000000,2.000000,2.000000,...,2.828427,2.236068,2.236068,2.000000,2.000000,2.236068,2.000000,2.000000,2.236068,2.000000
3,2.236068,2.236068,0.000000,1.000000,1.000000,2.236068,0.000000,2.000000,1.732051,2.236068,...,2.236068,2.000000,1.414214,1.732051,1.732051,2.000000,1.732051,1.732051,2.000000,1.000000
4,2.449490,2.449490,1.000000,0.000000,1.414214,2.449490,1.000000,2.236068,2.000000,2.449490,...,2.449490,1.732051,1.000000,2.000000,2.000000,2.236068,2.000000,1.414214,2.236068,1.414214
5,2.000000,2.000000,1.000000,1.414214,0.000000,2.000000,1.000000,1.732051,1.414214,2.000000,...,2.000000,1.732051,1.000000,1.414214,1.414214,1.732051,1.414214,1.414214,1.732051,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,1.732051,2.236068,2.000000,2.236068,1.732051,2.236068,2.000000,2.449490,1.732051,2.236068,...,1.732051,2.000000,2.000000,1.732051,2.236068,0.000000,1.000000,2.236068,1.414214,1.732051
193583,1.414214,2.000000,1.732051,2.000000,1.414214,2.449490,1.732051,2.236068,2.000000,2.449490,...,2.000000,1.732051,1.732051,1.414214,2.000000,1.000000,0.000000,2.000000,1.732051,1.414214
193585,2.449490,2.000000,1.732051,1.414214,1.414214,2.000000,1.732051,1.732051,1.414214,2.000000,...,2.449490,1.000000,1.000000,1.414214,1.414214,2.236068,2.000000,0.000000,1.732051,1.414214
193587,2.236068,2.236068,2.000000,2.236068,1.732051,1.732051,2.000000,2.000000,1.000000,1.732051,...,1.732051,1.414214,2.000000,1.000000,1.732051,1.414214,1.732051,1.732051,0.000000,1.732051
