In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [25]:
movies = pd.read_csv('movies.csv', usecols = ['movieId', 'title'],
                    dtype = {'movieId': 'int32', 'title': 'str'})
rating = pd.read_csv('ratings.csv', usecols = ['userId', 'movieId', 'rating'],
                    dtype = {'userId': 'int32', 'movieId': 'int32', 'rating': 'float32'})

In [26]:
movies.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [27]:
rating.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [28]:
df = pd.merge(movies, rating, left_on = 'movieId', right_on = 'movieId', how = 'left')
df.dropna(inplace = True)
df.head()

Unnamed: 0,movieId,title,userId,rating
0,1,Toy Story (1995),1.0,4.0
1,1,Toy Story (1995),5.0,4.0
2,1,Toy Story (1995),7.0,4.5
3,1,Toy Story (1995),15.0,2.5
4,1,Toy Story (1995),17.0,4.5


In [47]:
rating_count = pd.DataFrame(df.groupby('title')['rating'].count()).reset_index().rename(columns = {'rating': 'rating_count'})
rating_count.head()

Unnamed: 0,title,rating_count
0,'71 (2014),1
1,'Hellboy': The Seeds of Creation (2004),1
2,'Round Midnight (1986),2
3,'Salem's Lot (2004),1
4,'Til There Was You (1997),2


In [48]:
df = df.merge(rating_count, on = 'title')
df.head()

Unnamed: 0,movieId,title,userId,rating,rating_count
0,1,Toy Story (1995),1.0,4.0,215
1,1,Toy Story (1995),5.0,4.0,215
2,1,Toy Story (1995),7.0,4.5,215
3,1,Toy Story (1995),15.0,2.5,215
4,1,Toy Story (1995),17.0,4.5,215


In [76]:
df = df[df['rating_count'] > 50]
df.head()

Unnamed: 0,movieId,title,userId,rating,rating_count
0,1,Toy Story (1995),1.0,4.0,215
1,1,Toy Story (1995),5.0,4.0,215
2,1,Toy Story (1995),7.0,4.5,215
3,1,Toy Story (1995),15.0,2.5,215
4,1,Toy Story (1995),17.0,4.5,215


In [77]:
moviemat = df.pivot_table(index = 'title', columns = 'userId', values = 'rating').fillna(0)
moviemat.head()

userId,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,...,601.0,602.0,603.0,604.0,605.0,606.0,607.0,608.0,609.0,610.0
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10 Things I Hate About You (1999),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,3.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0
12 Angry Men (1957),0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2001: A Space Odyssey (1968),0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,...,0.0,0.0,5.0,0.0,0.0,5.0,0.0,3.0,0.0,4.5
28 Days Later (2002),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0,5.0
300 (2007),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,...,0.0,0.0,0.0,0.0,3.0,0.0,0.0,5.0,0.0,4.0


In [78]:
from sklearn.neighbors import NearestNeighbors

In [109]:
movie_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
movie_knn.fit(moviemat)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

In [110]:
distances, indices = movie_knn.kneighbors([moviemat.loc['Robin Hood: Men in Tights (1993)']], n_neighbors = 10)

In [111]:
distances = distances.ravel()
indices = indices.ravel()

In [116]:
result = pd.DataFrame(moviemat.iloc[indices].index)
result['relation'] = pd.DataFrame(distances)
result

Unnamed: 0,title,relation
0,Robin Hood: Men in Tights (1993),0.0
1,"Three Musketeers, The (1993)",0.55043
2,Hot Shots! Part Deux (1993),0.551067
3,Ace Ventura: Pet Detective (1994),0.554951
4,Last Action Hero (1993),0.57111
5,Aladdin (1992),0.574367
6,"Nightmare Before Christmas, The (1993)",0.577532
7,"Mask, The (1994)",0.578392
8,Addams Family Values (1993),0.588177
9,Ace Ventura: When Nature Calls (1995),0.590773
