## Movie Recommendation Using Collaborative Filtering

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
import warnings
warnings.simplefilter('ignore')

In [3]:
movie_df=pd.read_csv('movies.dat',delimiter='::',encoding='latin1',names=['movie_id','movie_name','Genre'])

In [4]:
movie_df.head()

Unnamed: 0,movie_id,movie_name,Genre
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
ratings_data=pd.read_csv('ratings.dat',delimiter='::',encoding='Latin1',names=['user_id','movie_id','ratings','timestamp'])

In [6]:
ratings_data.head(2)

Unnamed: 0,user_id,movie_id,ratings,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109


In [7]:
users_data=pd.read_csv('users.dat',encoding='latin1',delimiter='::',names=['user_id','gender','age','occupation','zipcode'])

In [8]:
users_data.head(2)

Unnamed: 0,user_id,gender,age,occupation,zipcode
0,1,F,1,10,48067
1,2,M,56,16,70072


In [9]:
users_data.nunique()

user_id       6040
gender           2
age              7
occupation      21
zipcode       3439
dtype: int64

In [10]:
users_data.age.value_counts()

25    2096
35    1193
18    1103
45     550
50     496
56     380
1      222
Name: age, dtype: int64

In [11]:
users_data.shape

(6040, 5)

In [12]:
ratings_data.shape

(1000209, 4)

In [13]:
movie_df.shape

(3883, 3)

In [14]:
## Merging the Movie_df and Ratings data

In [15]:
movie_rating_df=pd.merge(movie_df,ratings_data,on='movie_id')

In [16]:
movie_rating_df.head()

Unnamed: 0,movie_id,movie_name,Genre,user_id,ratings,timestamp
0,1,Toy Story (1995),Animation|Children's|Comedy,1,5,978824268
1,1,Toy Story (1995),Animation|Children's|Comedy,6,4,978237008
2,1,Toy Story (1995),Animation|Children's|Comedy,8,4,978233496
3,1,Toy Story (1995),Animation|Children's|Comedy,9,5,978225952
4,1,Toy Story (1995),Animation|Children's|Comedy,10,5,978226474


In [17]:
movie_rating_df.shape

(1000209, 6)

In [18]:
movie_rating_df.drop('timestamp',axis=1,inplace=True)

In [19]:
movie_rating_df.head()

Unnamed: 0,movie_id,movie_name,Genre,user_id,ratings
0,1,Toy Story (1995),Animation|Children's|Comedy,1,5
1,1,Toy Story (1995),Animation|Children's|Comedy,6,4
2,1,Toy Story (1995),Animation|Children's|Comedy,8,4
3,1,Toy Story (1995),Animation|Children's|Comedy,9,5
4,1,Toy Story (1995),Animation|Children's|Comedy,10,5


In [20]:
movie_rating_df.sample(5)

Unnamed: 0,movie_id,movie_name,Genre,user_id,ratings
797160,2968,Time Bandits (1981),Adventure|Fantasy|Sci-Fi,5624,2
470101,1673,Boogie Nights (1997),Drama,2926,5
680829,2511,"Long Goodbye, The (1973)",Crime,3841,3
557994,2034,"Black Hole, The (1979)",Sci-Fi,1298,3
873275,3271,Of Mice and Men (1992),Drama,4235,4


In [21]:
movie_rating_df=movie_rating_df.drop('Genre',axis=1)

In [22]:
movie_rating_df.shape

(1000209, 4)

In [23]:
movie_rating_df.describe()

Unnamed: 0,movie_id,user_id,ratings
count,1000209.0,1000209.0,1000209.0
mean,1865.54,3024.512,3.581564
std,1096.041,1728.413,1.117102
min,1.0,1.0,1.0
25%,1030.0,1506.0,3.0
50%,1835.0,3070.0,4.0
75%,2770.0,4476.0,4.0
max,3952.0,6040.0,5.0


In [24]:
## Creating Dataframe with average ratings of movies

In [25]:
rate_movie=movie_rating_df.groupby('movie_name')['ratings'].mean().to_frame()

In [26]:
rate_movie.head(15)

Unnamed: 0_level_0,ratings
movie_name,Unnamed: 1_level_1
"$1,000,000 Duck (1971)",3.027027
'Night Mother (1986),3.371429
'Til There Was You (1997),2.692308
"'burbs, The (1989)",2.910891
...And Justice for All (1979),3.713568
1-900 (1994),2.5
10 Things I Hate About You (1999),3.422857
101 Dalmatians (1961),3.59646
101 Dalmatians (1996),3.046703
12 Angry Men (1957),4.295455


In [27]:
rate_movie['no_of_ratings']=movie_rating_df.groupby('movie_name')['ratings'].count().to_frame()

In [28]:
rate_movie.head()

Unnamed: 0_level_0,ratings,no_of_ratings
movie_name,Unnamed: 1_level_1,Unnamed: 2_level_1
"$1,000,000 Duck (1971)",3.027027,37
'Night Mother (1986),3.371429,70
'Til There Was You (1997),2.692308,52
"'burbs, The (1989)",2.910891,303
...And Justice for All (1979),3.713568,199


In [29]:
rate_movie.shape

(3706, 2)

In [30]:
## Filtering only those movies where at least 50 number of times it has been rated to avoid misleading data

In [31]:
rate_movie=rate_movie[rate_movie['no_of_ratings']>50]

In [32]:
rate_movie.shape

(2499, 2)

In [33]:
rate_movie.sort_values('no_of_ratings',ascending=False)

Unnamed: 0_level_0,ratings,no_of_ratings
movie_name,Unnamed: 1_level_1,Unnamed: 2_level_1
American Beauty (1999),4.317386,3428
Star Wars: Episode IV - A New Hope (1977),4.453694,2991
Star Wars: Episode V - The Empire Strikes Back (1980),4.292977,2990
Star Wars: Episode VI - Return of the Jedi (1983),4.022893,2883
Jurassic Park (1993),3.763847,2672
...,...,...
Last Night (1998),3.725490,51
Wild Reeds (1994),3.431373,51
Four Days in September (1997),3.980392,51
Angel and the Badman (1947),3.392157,51


In [34]:
## Creating User Id interaction Matrix

In [35]:
movie_rating_df.head()

Unnamed: 0,movie_id,movie_name,user_id,ratings
0,1,Toy Story (1995),1,5
1,1,Toy Story (1995),6,4
2,1,Toy Story (1995),8,4
3,1,Toy Story (1995),9,5
4,1,Toy Story (1995),10,5


In [36]:
movie_matrix_UII=movie_rating_df.pivot_table(index='user_id',columns='movie_name',values='ratings')

In [37]:
movie_matrix_UII.head()

movie_name,"$1,000,000 Duck (1971)",'Night Mother (1986),'Til There Was You (1997),"'burbs, The (1989)",...And Justice for All (1979),1-900 (1994),10 Things I Hate About You (1999),101 Dalmatians (1961),101 Dalmatians (1996),12 Angry Men (1957),...,"Young Poisoner's Handbook, The (1995)",Young Sherlock Holmes (1985),Young and Innocent (1937),Your Friends and Neighbors (1998),Zachariah (1971),"Zed & Two Noughts, A (1985)",Zero Effect (1998),Zero Kelvin (Kjærlighetens kjøtere) (1995),Zeus and Roxanne (1997),eXistenZ (1999)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


In [38]:
## Fetch recommendation

In [39]:
American_Beauty_rating=movie_matrix_UII["American Beauty (1999)"]

In [40]:
## Finding Similar with American Beauty

In [41]:
similar_to_american_beauty=movie_matrix_UII.corrwith(American_Beauty_rating)

In [42]:
similar_to_american_beauty=pd.DataFrame(similar_to_american_beauty,columns=['correlation'])

In [43]:
similar_to_american_beauty.head()

Unnamed: 0_level_0,correlation
movie_name,Unnamed: 1_level_1
"$1,000,000 Duck (1971)",-0.19308
'Night Mother (1986),0.318625
'Til There Was You (1997),0.064574
"'burbs, The (1989)",0.14207
...And Justice for All (1979),0.182361


In [44]:
similar_to_american_beauty.dropna(inplace=True)

In [45]:
similar_to_american_beauty.reset_index(inplace=True)


In [46]:
similar_to_american_beauty.head()

Unnamed: 0,movie_name,correlation
0,"$1,000,000 Duck (1971)",-0.19308
1,'Night Mother (1986),0.318625
2,'Til There Was You (1997),0.064574
3,"'burbs, The (1989)",0.14207
4,...And Justice for All (1979),0.182361


In [47]:
similar_to_american_beauty.dtypes

movie_name      object
correlation    float64
dtype: object

In [48]:
similar_to_american_beauty.isnull().sum()

movie_name     0
correlation    0
dtype: int64

In [49]:
similar_to_american_beauty['correlation']=pd.to_numeric(similar_to_american_beauty['correlation'])

In [50]:
similar_to_american_beauty.dtypes

movie_name      object
correlation    float64
dtype: object

In [51]:
similar_to_american_beauty.sort_values(by='correlation',ascending=False)

Unnamed: 0,movie_name,correlation
1648,"James Dean Story, The (1957)",1.0
2839,Six of a Kind (1934),1.0
703,Conceiving Ada (1997),1.0
1479,Home Page (1999),1.0
548,"Cabinet of Dr. Ramirez, The (1991)",1.0
...,...,...
562,Captives (1994),-1.0
3237,Turn It Up (2000),-1.0
3178,Tokyo Fist (1995),-1.0
521,Brothers in Trouble (1995),-1.0


In [52]:
similar_to_american_beauty.sort_values(by='correlation',ascending=False).head(10)

Unnamed: 0,movie_name,correlation
1648,"James Dean Story, The (1957)",1.0
2839,Six of a Kind (1934),1.0
703,Conceiving Ada (1997),1.0
1479,Home Page (1999),1.0
548,"Cabinet of Dr. Ramirez, The (1991)",1.0
2368,Paralyzing Fear: The Story of Polio in America...,1.0
1612,Intimate Relations (1996),1.0
1574,In God's Hands (1998),1.0
3304,"Very Natural Thing, A (1974)",1.0
849,Defying Gravity (1997),1.0


### From Above we can find movies which are very similar to 'American Beauty (1999)' and recommend these to the user