# Movie Recommendation using Collaborative Filtering

In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import process

In [2]:
movies=pd.read_csv('movie.csv')
ratings=pd.read_csv('rating.csv')

In [3]:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
27273,131254,Kein Bund für's Leben (2007),Comedy
27274,131256,"Feuer, Eis & Dosenbier (2002)",Comedy
27275,131258,The Pirates (2014),Adventure
27276,131260,Rentun Ruusu (2001),(no genres listed)


In [4]:
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40
...,...,...,...,...
20000258,138493,68954,4.5,2009-11-13 15:42:00
20000259,138493,69526,4.5,2009-12-03 18:31:48
20000260,138493,69644,3.0,2009-12-07 18:10:57
20000261,138493,70286,5.0,2009-11-13 15:42:24


In [5]:
data=movies.merge(ratings).drop(['genres','timestamp'],axis=1)
data

Unnamed: 0,movieId,title,userId,rating
0,1,Toy Story (1995),3,4.0
1,1,Toy Story (1995),6,5.0
2,1,Toy Story (1995),8,4.0
3,1,Toy Story (1995),10,4.0
4,1,Toy Story (1995),11,4.5
...,...,...,...,...
20000258,131254,Kein Bund für's Leben (2007),79570,4.0
20000259,131256,"Feuer, Eis & Dosenbier (2002)",79570,4.0
20000260,131258,The Pirates (2014),28906,2.5
20000261,131260,Rentun Ruusu (2001),65409,3.0


In [6]:
counts=data['userId'].value_counts()
data1=data[~data['userId'].isin(counts[counts < 500].index)]

data1

Unnamed: 0,movieId,title,userId,rating
4,1,Toy Story (1995),11,4.5
12,1,Toy Story (1995),24,4.0
18,1,Toy Story (1995),54,4.0
19,1,Toy Story (1995),58,5.0
27,1,Toy Story (1995),91,4.0
...,...,...,...,...
20000258,131254,Kein Bund für's Leben (2007),79570,4.0
20000259,131256,"Feuer, Eis & Dosenbier (2002)",79570,4.0
20000260,131258,The Pirates (2014),28906,2.5
20000261,131260,Rentun Ruusu (2001),65409,3.0


# Using correlation

In [7]:
pivot_table = data1.pivot_table(index = ["userId"],columns = ["title"],values = "rating")
counts1=pivot_table.isna().sum()
counts1

title
#chicagoGirl: The Social Network Takes on a Dictator (2013)    7489
$ (Dollars) (1971)                                             7484
$5 a Day (2008)                                                7481
$9.99 (2008)                                                   7474
$ellebrity (Sellebrity) (2012)                                 7490
                                                               ... 
À nos amours (1983)                                            7473
À nous la liberté (Freedom for Us) (1931)                      7372
À propos de Nice (1930)                                        7487
Åsa-Nisse - Wälkom to Knohult (2011)                           7489
貞子3D (2012)                                                    7490
Length: 26035, dtype: int64

In [8]:
pivot_table=pivot_table.fillna(0)
pivot_table



title,#chicagoGirl: The Social Network Takes on a Dictator (2013),$ (Dollars) (1971),$5 a Day (2008),$9.99 (2008),$ellebrity (Sellebrity) (2012),'71 (2014),'Hellboy': The Seeds of Creation (2004),"'Human' Factor, The (Human Factor, The) (1975)",'Neath the Arizona Skies (1934),'R Xmas (2001),...,xXx: State of the Union (2005),"¡Alambrista! (Illegal, The) (1977)",¡Qué hacer! (1970),¡Three Amigos! (1986),À l'aventure (2008),À nos amours (1983),À nous la liberté (Freedom for Us) (1931),À propos de Nice (1930),Åsa-Nisse - Wälkom to Knohult (2011),貞子3D (2012)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
54,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
58,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
91,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138397,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0,0.0
138406,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0,0.0,0.0
138411,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
138437,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0


In [108]:
movie_name=input('movie name :')
movie_n=process.extractOne(movie_name, movies['title'])[0]
print(movie_n)
movie_watched = pivot_table[movie_n]
similarity_with_other_movies = pivot_table.corrwith(movie_watched,method='pearson')   
similarity_with_other_movies = similarity_with_other_movies.sort_values(ascending=False)
similarity_with_other_movies.head(10)

movie name :fight club
Fight Club (1999)


title
Fight Club (1999)              1.000000
Snatch (2000)                  0.462716
American History X (1998)      0.443256
Kill Bill: Vol. 1 (2003)       0.433369
Seven (a.k.a. Se7en) (1995)    0.421463
Donnie Darko (2001)            0.411823
Kill Bill: Vol. 2 (2004)       0.409485
Sin City (2005)                0.408413
Memento (2000)                 0.401138
Requiem for a Dream (2000)     0.388656
dtype: float64

# Using  KNN

In [83]:
pivot_table1 = data1.pivot_table(index = ['title'],columns = ['userId'],values = "rating").fillna(0)
pivot_table1
                                                              

userId,11,24,54,58,91,104,116,134,156,208,...,138270,138301,138307,138325,138382,138397,138406,138411,138437,138474
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
#chicagoGirl: The Social Network Takes on a Dictator (2013),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$ (Dollars) (1971),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$5 a Day (2008),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$9.99 (2008),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$ellebrity (Sellebrity) (2012),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
À nos amours (1983),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
À nous la liberté (Freedom for Us) (1931),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,4.0,0.0,4.5,0.0,0.0,3.5,0.0,0.0,0.0
À propos de Nice (1930),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Åsa-Nisse - Wälkom to Knohult (2011),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
pivot_spr=csr_matrix(pivot_table1.values)

In [37]:
model_knn= NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20)
model_knn.fit(pivot_spr)

NearestNeighbors(algorithm='brute', metric='cosine', n_neighbors=20)

In [96]:
k=pd.DataFrame(pivot_table1.index.tolist(),columns=['movie'])
k

Unnamed: 0,movie
0,#chicagoGirl: The Social Network Takes on a Di...
1,$ (Dollars) (1971)
2,$5 a Day (2008)
3,$9.99 (2008)
4,$ellebrity (Sellebrity) (2012)
...,...
26030,À nos amours (1983)
26031,À nous la liberté (Freedom for Us) (1931)
26032,À propos de Nice (1930)
26033,Åsa-Nisse - Wälkom to Knohult (2011)


In [118]:
movie_name1=input('movie name :')
movie_n1=process.extractOne(movie_name1, movies['title'])[0]
print(movie_n1)
movie_knn=process.extractOne(movie_n1,k['movie'])[2]
distance,indices=model_knn.kneighbors(pivot_table1.iloc[movie_knn,:].values.reshape(1,-1),n_neighbors=10)
for i in range(0, len(distance.flatten())):     
    if(i==0):
        print("Recommendations for {0}:\n".format(pivot_table1.index[movie_knn]))    
    else:
        print("{0}: {1}, with distance of {2}:".format(i, pivot_table1.index[indices.flatten()[i]], distance.flatten()[i]))

movie name :The Darjeeling Limited
Darjeeling Limited, The (2007)
Recommendations for Darjeeling Limited, The (2007):

1: Juno (2007), with distance of 0.4430445010651861:
2: No Country for Old Men (2007), with distance of 0.4445830425355165:
3: There Will Be Blood (2007), with distance of 0.4500528091370606:
4: Little Miss Sunshine (2006), with distance of 0.4561227294404505:
5: Burn After Reading (2008), with distance of 0.45662184381601845:
6: Life Aquatic with Steve Zissou, The (2004), with distance of 0.4607064681175784:
7: Dark Knight, The (2008), with distance of 0.48350013640478673:
8: In Bruges (2008), with distance of 0.4839117142640409:
9: Children of Men (2006), with distance of 0.4938632148765385:
