# Recommendation System using Nearest Neighbor 

In [1]:
import numpy as np
import pandas as pd

In [2]:
movies_data = pd.read_csv('C:/Users/Chirag Chauhan/Desktop/movies_data/ml-latest-small/movies.csv',usecols=['movieId','title'])
ratings_data = pd.read_csv('C:/Users/Chirag Chauhan/Desktop/movies_data/ml-latest-small/ratings.csv',usecols=['userId','movieId','rating'])

In [3]:
movies_data.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [4]:
ratings_data.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [5]:
data = pd.merge(ratings_data,movies_data,on='movieId')
data.head()

Unnamed: 0,userId,movieId,rating,title
0,1,1,4.0,Toy Story (1995)
1,5,1,4.0,Toy Story (1995)
2,7,1,4.5,Toy Story (1995)
3,15,1,2.5,Toy Story (1995)
4,17,1,4.5,Toy Story (1995)


In [6]:
rating_count = data.dropna(axis=0,subset=['title'])
movie_rating_count = (rating_count.
                      groupby(by=['title'])['rating'].
                      count().
                      reset_index().
                      rename(columns={'rating':'totalRatings'})
                      [['title','totalRatings']]
                     )
movie_rating_count.head()

Unnamed: 0,title,totalRatings
0,'71 (2014),1
1,'Hellboy': The Seeds of Creation (2004),1
2,'Round Midnight (1986),2
3,'Salem's Lot (2004),1
4,'Til There Was You (1997),2


In [7]:
rating_with_totalRatings = pd.merge(rating_count,movie_rating_count,left_on='title',right_on='title',how='left')
rating_with_totalRatings.head()

Unnamed: 0,userId,movieId,rating,title,totalRatings
0,1,1,4.0,Toy Story (1995),215
1,5,1,4.0,Toy Story (1995),215
2,7,1,4.5,Toy Story (1995),215
3,15,1,2.5,Toy Story (1995),215
4,17,1,4.5,Toy Story (1995),215


In [8]:
#movie_rating_count['totalRatings'].describe()
movie_rating_count.describe()

Unnamed: 0,totalRatings
count,9719.0
mean,10.375141
std,22.40622
min,1.0
25%,1.0
50%,3.0
75%,9.0
max,329.0


In [9]:
movie_features = rating_with_totalRatings.pivot_table(index='title',columns='userId',values='rating').fillna(0)
movie_features.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'71 (2014),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
'Hellboy': The Seeds of Creation (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Round Midnight (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Salem's Lot (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Til There Was You (1997),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
from scipy.sparse import csr_matrix
movie_matrix_features = csr_matrix(movie_features.values)


In [11]:
from sklearn.neighbors import NearestNeighbors
model_knn = NearestNeighbors(metric='cosine',algorithm='brute')
model_knn.fit(movie_matrix_features)

NearestNeighbors(algorithm='brute', metric='cosine')

In [12]:
movie_features.shape

(9719, 610)

In [13]:
query_index = np.random.choice(movie_features.shape[0])
print(query_index)
distances,indices = model_knn.kneighbors(movie_features.iloc[query_index,:].values.reshape(1,-1),n_neighbors=6)

5840


In [22]:
for i in range(0,len(distances.flatten())):
    if i == 0:
        print('Recommendation for {0}'.format(movie_features.index[query_index]))
    else:
        print('{0}:{1}, at distance of {2} from {3}'.format(i,movie_features.index[indices.flatten()[i]],distances.flatten()[i],movie_features.index[query_index]))

Recommendation for Mr. Deeds (2002)
1:Rundown, The (2003), at distance of 0.41750477730794644 from Mr. Deeds (2002)
2:Click (2006), at distance of 0.44382572498282513 from Mr. Deeds (2002)
3:Daredevil (2003), at distance of 0.4537740052859389 from Mr. Deeds (2002)
4:S.W.A.T. (2003), at distance of 0.46470438219439925 from Mr. Deeds (2002)
5:Tuxedo, The (2002), at distance of 0.4765532878489456 from Mr. Deeds (2002)
