# Movie Recommender System using Collaborative Filtering

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

In [3]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


# Data Manipulation

In [5]:
ratings = pd.merge(movies,ratings)

In [6]:
ratings.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0,964982703
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,847434962
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5,1106635946
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5,1510577970
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5,1305696483


In [7]:
ratings = ratings.drop(['movieId','timestamp','genres'],axis=1)
ratings.head()

Unnamed: 0,title,userId,rating
0,Toy Story (1995),1,4.0
1,Toy Story (1995),5,4.0
2,Toy Story (1995),7,4.5
3,Toy Story (1995),15,2.5
4,Toy Story (1995),17,4.5


# Reshaped the DataFrame using pivot_table(index, columns, values) function

In [8]:
user_ratings = ratings.pivot_table(index=['userId'],columns= ['title'],values = 'rating')

In [9]:
user_ratings.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,4.0,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


In [10]:
user_ratings = user_ratings.dropna(thresh=10,axis=1).fillna(0)

In [11]:
user_ratings.head()

title,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
user_ratings.shape

(610, 2269)

# Find Correlation between movies using pearson method

In [13]:
item_similarity = user_ratings.corr(method='pearson')
item_similarity

title,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),...,Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"'burbs, The (1989)",1.000000,0.063117,-0.023768,0.143482,0.011998,0.087931,0.224052,0.034223,0.009277,0.008331,...,0.017477,0.032470,0.134701,0.153158,0.101301,0.049897,0.003233,0.187953,0.062174,0.353194
(500) Days of Summer (2009),0.063117,1.000000,0.142471,0.273989,0.193960,0.148903,0.142141,0.159756,0.135486,0.200135,...,0.374515,0.178655,0.068407,0.414585,0.355723,0.252226,0.216007,0.053614,0.241092,0.125905
10 Cloverfield Lane (2016),-0.023768,0.142471,1.000000,-0.005799,0.112396,0.006139,-0.016835,0.031704,-0.024275,0.272943,...,0.242663,0.099059,-0.023477,0.272347,0.241751,0.195054,0.319371,0.177846,0.096638,0.002733
10 Things I Hate About You (1999),0.143482,0.273989,-0.005799,1.000000,0.244670,0.223481,0.211473,0.011784,0.091964,0.043383,...,0.243118,0.104858,0.132460,0.091853,0.158637,0.281934,0.050031,0.121029,0.130813,0.110612
"10,000 BC (2008)",0.011998,0.193960,0.112396,0.244670,1.000000,0.234459,0.119132,0.059187,-0.025882,0.089328,...,0.260261,0.087592,0.094913,0.184521,0.242299,0.240231,0.094773,0.088045,0.203002,0.083518
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zoolander (2001),0.049897,0.252226,0.195054,0.281934,0.240231,0.184324,0.274260,0.122107,0.017351,0.091416,...,0.304364,0.243820,-0.006269,0.242033,0.299522,1.000000,0.108147,0.097147,0.338034,0.109455
Zootopia (2016),0.003233,0.216007,0.319371,0.050031,0.094773,0.054024,0.077594,0.056742,0.063325,0.225747,...,0.286213,0.156603,0.011418,0.214385,0.298504,0.108147,1.000000,0.046885,0.200762,0.020595
eXistenZ (1999),0.187953,0.053614,0.177846,0.121029,0.088045,0.047804,0.085606,-0.001708,0.002528,0.128638,...,0.088202,0.028566,0.167541,0.145741,0.068763,0.097147,0.046885,1.000000,0.163022,0.138611
xXx (2002),0.062174,0.241092,0.096638,0.130813,0.203002,0.156932,0.248820,0.074306,0.037469,0.153335,...,0.271180,0.193624,0.080585,0.209840,0.203285,0.338034,0.200762,0.163022,1.000000,0.065673


# Getting Similar movies accorting to users ratings

In [14]:
def get_similar_movie(movie_name,user_rating):
    similar_score = item_similarity[movie_name]*user_rating
    return similar_score

print(get_similar_movie('2 Fast 2 Furious (Fast and the Furious 2, The) (2003)',5))

title
'burbs, The (1989)                   0.420328
(500) Days of Summer (2009)          1.209530
10 Cloverfield Lane (2016)           0.486818
10 Things I Hate About You (1999)    1.207843
10,000 BC (2008)                     1.919786
                                       ...   
Zoolander (2001)                     1.962639
Zootopia (2016)                      1.143314
eXistenZ (1999)                      0.331265
xXx (2002)                           2.684179
¡Three Amigos! (1986)                0.336691
Name: 2 Fast 2 Furious (Fast and the Furious 2, The) (2003), Length: 2269, dtype: float64


In [15]:
def get_similar_movie(movie_name,user_rating):
    similar_score = item_similarity[movie_name]*(user_rating-2.5)
    similar_score = similar_score.sort_values(ascending=False)
    return similar_score

print(get_similar_movie('2 Fast 2 Furious (Fast and the Furious 2, The) (2003)',5))

title
2 Fast 2 Furious (Fast and the Furious 2, The) (2003)    2.500000
xXx (2002)                                               1.342090
30 Days of Night (2007)                                  1.289608
Snakes on a Plane (2006)                                 1.284288
Die Another Day (2002)                                   1.280320
                                                           ...   
English Patient, The (1996)                             -0.099425
Postman, The (Postino, Il) (1994)                       -0.105862
Piano, The (1993)                                       -0.123113
Crimson Tide (1995)                                     -0.131209
Clear and Present Danger (1994)                         -0.149659
Name: 2 Fast 2 Furious (Fast and the Furious 2, The) (2003), Length: 2269, dtype: float64


In [16]:
def get_similar_movie(movie_name,user_rating):
    similar_score = item_similarity[movie_name]*(user_rating-2.5)
    similar_score = similar_score.sort_values(ascending=False)
    return similar_score

print(get_similar_movie('X-Men (2000)',1))

title
Madness of King George, The (1994)                            0.141331
Disclosure (1994)                                             0.138857
Like Water for Chocolate (Como agua para chocolate) (1992)    0.098761
Sudden Death (1995)                                           0.092656
Remains of the Day, The (1993)                                0.092591
                                                                ...   
Spider-Man 2 (2004)                                          -0.782193
X-Men: The Last Stand (2006)                                 -0.803705
Spider-Man (2002)                                            -0.809467
X2: X-Men United (2003)                                      -0.952688
X-Men (2000)                                                 -1.500000
Name: X-Men (2000), Length: 2269, dtype: float64


# Final Output

In [17]:
action_lover = [("2 Fast 2 Furious (Fast and the Furious 2, The) (2003)",5),
                ("12 Years a Slave (2013)",4),("2012 (2009)",3),
                ("(500) Days of Summer (2009)",2)]

similar_movies = pd.DataFrame()

for movie,rating in action_lover:
    similar_movies =  similar_movies.append(get_similar_movie(movie,rating))
    
similar_movies.head()

Unnamed: 0,"2 Fast 2 Furious (Fast and the Furious 2, The) (2003)",xXx (2002),30 Days of Night (2007),Snakes on a Plane (2006),Die Another Day (2002),"Fast and the Furious, The (2001)",Kung Fu Hustle (Gong fu) (2004),S.W.A.T. (2003),Wanted (2008),Rat Race (2001),...,Arsenic and Old Lace (1944),"Philadelphia Story, The (1940)",Hoop Dreams (1994),"Madness of King George, The (1994)",Like Water for Chocolate (Como agua para chocolate) (1992),"English Patient, The (1996)","Postman, The (Postino, Il) (1994)","Piano, The (1993)",Crimson Tide (1995),Clear and Present Danger (1994)
"2 Fast 2 Furious (Fast and the Furious 2, The) (2003)",2.5,1.34209,1.289608,1.284288,1.28032,1.249163,1.229736,1.214651,1.21128,1.209962,...,-0.09293,-0.093994,-0.094067,-0.09479,-0.09865,-0.099425,-0.105862,-0.123113,-0.131209,-0.149659
12 Years a Slave (2013),0.064218,0.056204,0.052268,-0.014553,0.162271,0.275179,0.014409,0.078675,0.179954,0.120895,...,-0.052629,0.091208,0.00073,-0.053682,-0.055868,0.023385,-0.059952,-0.077455,-0.027745,-0.079736
2012 (2009),0.140705,0.092694,0.097791,0.13102,0.11682,0.137507,0.10695,0.167014,0.171313,0.170316,...,-0.01886,0.016468,-0.003785,-0.019237,-0.020021,-0.007951,-0.021484,-0.027756,-0.005901,-0.019664
(500) Days of Summer (2009),-0.120953,-0.120546,-0.124918,-0.103815,-0.094167,-0.069616,-0.049342,-0.12325,-0.133691,-0.123743,...,-0.00758,-0.016974,-0.011696,0.028899,0.017704,0.027126,0.032275,0.016846,0.02287,0.041962


In [18]:
action_lover = [("2 Fast 2 Furious (Fast and the Furious 2, The) (2003)",5),
                ("12 Years a Slave (2013)",4),("2012 (2009)",3),
                ("(500) Days of Summer (2009)",2)]

similar_movies = pd.DataFrame()

for movie,rating in action_lover:
    similar_movies =  similar_movies.append(get_similar_movie(movie,rating),ignore_index=True)
    
similar_movies.head()
similar_movies.sum().sort_values(ascending=False)

2 Fast 2 Furious (Fast and the Furious 2, The) (2003)         2.583970
12 Years a Slave (2013)                                       1.613721
Fast and the Furious, The (2001)                              1.592233
Fast & Furious (Fast and the Furious 4, The) (2009)           1.523609
Mission: Impossible III (2006)                                1.488944
                                                                ...   
Postman, The (Postino, Il) (1994)                            -0.155024
Like Water for Chocolate (Como agua para chocolate) (1992)   -0.156833
Arsenic and Old Lace (1944)                                  -0.171999
Clear and Present Danger (1994)                              -0.207097
Piano, The (1993)                                            -0.211478
Length: 2269, dtype: float64

In [19]:
action_lover = [("X-Men: First Class (2011)",5)]

similar_movies = pd.DataFrame()

for movie,rating in action_lover:
    similar_movies =  similar_movies.append(get_similar_movie(movie,rating),ignore_index=True)
    
similar_movies.head()
similar_movies.sum().sort_values(ascending=False)

X-Men: First Class (2011)                                     2.500000
Iron Man 2 (2010)                                             1.692136
X-Men: Days of Future Past (2014)                             1.653927
Star Trek Into Darkness (2013)                                1.602668
Captain America: The First Avenger (2011)                     1.573299
                                                                ...   
Arsenic and Old Lace (1944)                                  -0.145993
Madness of King George, The (1994)                           -0.148914
Like Water for Chocolate (Como agua para chocolate) (1992)   -0.154978
Postman, The (Postino, Il) (1994)                            -0.166308
Disclosure (1994)                                            -0.171452
Length: 2269, dtype: float64

In [20]:
action_lover = [("Iron Man 3 (2013)",1)]

similar_movies = pd.DataFrame()

for movie,rating in action_lover:
    similar_movies =  similar_movies.append(get_similar_movie(movie,rating),ignore_index=True)
    
similar_movies.head()
similar_movies.sum().sort_values(ascending=False)

Outbreak (1995)                               0.125298
Dances with Wolves (1990)                     0.103169
Disclosure (1994)                             0.101155
American President, The (1995)                0.100001
Little Women (1994)                           0.090631
                                                ...   
Ant-Man (2015)                               -0.920368
Captain America: The First Avenger (2011)    -0.930243
Captain America: The Winter Soldier (2014)   -0.936815
Iron Man 2 (2010)                            -0.986935
Iron Man 3 (2013)                            -1.500000
Length: 2269, dtype: float64

In [23]:
action_lover = [("Iron Man 3 (2013)",1)]

similar_movies = pd.DataFrame()

for movie,rating in action_lover:
    similar_movies =  similar_movies.append(get_similar_movie(movie,rating),ignore_index=True)
    
similar_movies.head()
similar_movies.sum().sort_values(ascending=False)[0:20]

Outbreak (1995)                              0.125298
Dances with Wolves (1990)                    0.103169
Disclosure (1994)                            0.101155
American President, The (1995)               0.100001
Little Women (1994)                          0.090631
Sense and Sensibility (1995)                 0.088507
Don Juan DeMarco (1995)                      0.088080
Piano, The (1993)                            0.084479
Postman, The (Postino, Il) (1994)            0.084150
Specialist, The (1994)                       0.083331
Leaving Las Vegas (1995)                     0.082839
Copycat (1995)                               0.082412
Butch Cassidy and the Sundance Kid (1969)    0.081879
African Queen, The (1951)                    0.079963
Legends of the Fall (1994)                   0.078530
Blair Witch Project, The (1999)              0.077963
Sabrina (1995)                               0.077404
Firm, The (1993)                             0.076378
Apollo 13 (1995)            