# Project Title: Movie Recommendation System Using KNN
# Group Members: Mansoor Shakeel (cs172053) | Abdul Samad Vohra (cs162071)


In [16]:
#importing the required liabraries..

import pandas as pd #used to read csv files and display the dataframes.
from scipy.sparse import csr_matrix #for creating sparse Matrix.
from fuzzywuzzy import process #for efficient searching.
from sklearn.neighbors import NearestNeighbors #to get the nearest neighbors.


In [25]:
# reading csv files to create movies and ratings datasets.
# usecols is used to get only required columns from the dataset.
# dtypes is used to convert the datatypes of the required columns into the required datatype.

movies = pd.read_csv(r'D:\Movie_recommendation_system\movies.csv',
                     usecols = ['movieId', 'title'],
                    dtype={'movieId':'int32','title':'str'})

ratings = pd.read_csv(r'D:\Movie_recommendation_system\ratings.csv',
                      usecols = ['userId', 'movieId', 'rating'],
                     dtype={'userId':'int32','movieId':'int32','rating':'float32'})

In [26]:
print(movies.head()) #printing Dataset

   movieId                               title
0        1                    Toy Story (1995)
1        2                      Jumanji (1995)
2        3             Grumpier Old Men (1995)
3        4            Waiting to Exhale (1995)
4        5  Father of the Bride Part II (1995)


In [27]:
print(ratings.head()) #printing Dataset

   userId  movieId  rating
0       1        1     4.0
1       1        3     4.0
2       1        6     4.0
3       1       47     5.0
4       1       50     5.0


In [21]:
# using csr_matrix function to convert our pivot table into a sparse matrix, this will eleminate all the values which has zero ra

movies_users = ratings.pivot(index = 'movieId',
                             columns = 'userId',
                             values = 'rating').fillna(0)

print(movies_users.head()) #printing Dataset

movies_users_matrix = csr_matrix(movies_users.values)



userId   1    2    3    4    5    6    7    8    9    10   ...  601  602  603  \
movieId                                                    ...                  
1        4.0  0.0  0.0  0.0  4.0  0.0  4.5  0.0  0.0  0.0  ...  4.0  0.0  4.0   
2        0.0  0.0  0.0  0.0  0.0  4.0  0.0  4.0  0.0  0.0  ...  0.0  4.0  0.0   
3        4.0  0.0  0.0  0.0  0.0  5.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
4        0.0  0.0  0.0  0.0  0.0  3.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   
5        0.0  0.0  0.0  0.0  0.0  5.0  0.0  0.0  0.0  0.0  ...  0.0  0.0  0.0   

userId   604  605  606  607  608  609  610  
movieId                                     
1        3.0  4.0  2.5  4.0  2.5  3.0  5.0  
2        5.0  3.5  0.0  0.0  2.0  0.0  0.0  
3        0.0  0.0  0.0  0.0  2.0  0.0  0.0  
4        0.0  0.0  0.0  0.0  0.0  0.0  0.0  
5        3.0  0.0  0.0  0.0  0.0  0.0  0.0  

[5 rows x 610 columns]


In [28]:
# Applying KNN
# Cosine similarity find colinearity
# We have used brute force algorithm not auto because we want to iterate each cell in our algorithm.
    
model = NearestNeighbors(metric = 'cosine',
                         algorithm = 'brute',
                         n_neighbors = 20)

model.fit(movies_users_matrix) #this will fit our model into our sparse matrix 

NearestNeighbors(algorithm='brute', metric='cosine', n_neighbors=20)

In [29]:
#Creating function for searching.

def movie_recommender(movie_name):
    
    idx = process.extractOne(movie_name, movies['title'])[2]
    print('Selected Movie: ',movies['title'][idx], 'Index: ',idx)
    print('Searching for recommendation.....')
    distance, indices = model.kneighbors(movies_users_matrix[idx], n_neighbors = 10)
    print(distance, indices)
    
    for i in indices:
        print(movies['title'][i].where(i != idx))



In [30]:
#using input function to Enter the name of the movie

name = input('Enter a movie name: ')

movie_recommender(name) #calling function


Enter a movie name: batman
Selected Movie:  Batman Forever (1995) Index:  126
Searching for recommendation.....
[[5.3644180e-07 2.9436135e-01 3.5725552e-01 3.7956768e-01 3.9813352e-01
  4.1406554e-01 4.2324233e-01 4.2645985e-01 4.3361640e-01 4.3654251e-01]] [[126 509 337 302 378 508 138 275 506 307]]
126                                  NaN
509                        Batman (1989)
337                     True Lies (1994)
302    Ace Ventura: Pet Detective (1994)
378                   Cliffhanger (1993)
508            Dances with Wolves (1990)
138    Die Hard: With a Vengeance (1995)
275                      Stargate (1994)
506                       Aladdin (1992)
307      Clear and Present Danger (1994)
Name: title, dtype: object
