In [58]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [59]:
movie_data = pd.read_csv("movies.csv")
ratings_data = pd.read_csv("ratings.csv")

In [60]:
movie_data.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [61]:
ratings_data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523


In [16]:
def standardize(row):
    new_row = (row - row.mean())/(row.max() - row.min())
    return new_row

ratings_std = ratings_data[['rating']].apply(standardize)

In [20]:
ratings_data['rating'] = ratings_std

In [21]:
ratings_data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,0.107367,1217897793
1,1,24,-0.448189,1217895807
2,1,32,0.107367,1217896246
3,1,47,0.107367,1217896556
4,1,50,0.107367,1217896523


In [62]:
ratings_data.drop('timestamp', axis=1, inplace=True)

In [63]:
ratings_data.head()

Unnamed: 0,userId,movieId,rating
0,1,16,4.0
1,1,24,1.5
2,1,32,4.0
3,1,47,4.0
4,1,50,4.0


In [64]:
movies_combo = pd.merge(movie_data, ratings_data)

In [65]:
movies_combo.head()

Unnamed: 0,movieId,title,genres,userId,rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,2,5.0
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,8,5.0
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,11,4.0
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,14,4.0


In [66]:
movies_combo.loc[movies_combo['movieId'].isin([7243, 8785, 54290, 128991])]

Unnamed: 0,movieId,title,genres,userId,rating


In [67]:
#Checking for missing movies (movies with no ratings data)
movie_data.loc[movie_data['movieId'].isin([7243, 8785, 54290, 128991])]

Unnamed: 0,movieId,title,genres
5262,7243,Intolerance: Love's Struggle Throughout the Ag...,Drama
5795,8785,Early Summer (Bakushû) (1951),Drama
7439,54290,Bratz: The Movie (2007),Comedy
10195,128991,Johnny Express (2014),Animation|Comedy|Sci-Fi


In [68]:
movies_combo.drop('genres', axis=1, inplace=True)
movies_combo.head()

Unnamed: 0,movieId,title,userId,rating
0,1,Toy Story (1995),2,5.0
1,1,Toy Story (1995),5,4.0
2,1,Toy Story (1995),8,5.0
3,1,Toy Story (1995),11,4.0
4,1,Toy Story (1995),14,4.0


In [69]:
#Rotating dataframe on movie and ratings per user
ratings_h2h = movies_combo.pivot_table(index=['userId'], columns=['title'], values='rating')
ratings_h2h.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...And Justice for All (1979),10 (1979),...,[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),a/k/a Tommy Chong (2005),eXistenZ (1999),loudQUIETloud: A Film About the Pixies (2006),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


In [70]:
# Droping movies that have less than 5 users who have rated them
ratings_h2h = ratings_h2h.dropna(thresh=5, axis=1).fillna(0)

In [71]:
ratings_h2h.head()

title,"'burbs, The (1989)",(500) Days of Summer (2009),*batteries not included (1987),...And Justice for All (1979),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),102 Dalmatians (2000),11:14 (2003),...,Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zulu (1964),[REC] (2007),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [73]:
# Let's Build our Similarity Matrix - Using Pearson Correlation
item_similarity_df = ratings_h2h.corr(method='pearson')

In [92]:
item_similarity_df.head(2000)[1150:1200]

title,"'burbs, The (1989)",(500) Days of Summer (2009),*batteries not included (1987),...And Justice for All (1979),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),102 Dalmatians (2000),11:14 (2003),...,Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zulu (1964),[REC] (2007),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Fallen (1998),0.244365,0.065689,0.099229,0.109327,0.16422,0.073871,0.099272,0.071209,0.028168,0.169314,...,0.131657,0.21927,0.196356,0.115532,0.050176,0.080793,0.210718,0.28615,0.195513,0.075988
Falling Down (1993),0.16564,0.092874,0.116805,0.13595,0.194774,0.101555,0.134047,0.194689,0.049468,0.136562,...,0.155397,0.253669,0.155343,0.236236,0.120614,0.060196,0.253574,0.111871,0.0777,0.214814
Fame (1980),-0.014387,0.083827,-0.01068,0.224053,0.051155,-0.010178,0.087786,0.021045,0.115758,0.170148,...,0.176906,0.141617,0.021504,0.085768,0.175623,-0.008563,0.02912,0.174183,-0.007921,0.071507
Family Guy Presents Stewie Griffin: The Untold Story (2005),0.109483,0.191084,0.063072,0.099342,0.1267,-0.015707,-0.031094,-0.000542,0.078706,0.143896,...,0.074685,0.237797,0.324369,0.103094,-0.013256,-0.013214,0.134743,0.276247,0.136302,0.130381
"Family Man, The (2000)",0.069217,0.106067,-0.020297,0.055274,0.118167,0.006096,0.058264,0.030849,-0.01578,0.127887,...,0.088219,0.053395,0.04723,0.151625,0.101384,-0.016272,-0.003042,0.032018,-0.015054,0.046112
Family Plot (1976),0.063414,0.098334,-0.01275,0.257047,0.029558,-0.012151,0.061009,0.114637,0.147221,0.11616,...,0.202798,0.158282,0.083484,0.162879,0.266274,-0.010222,0.012079,0.24218,0.118049,0.09202
"Family Stone, The (2005)",-0.013659,0.131084,0.212546,0.079828,0.287946,-0.009663,0.040305,0.087651,0.011093,0.020232,...,0.253346,0.181701,-0.014699,0.018898,0.104552,-0.008129,-0.009215,0.054793,-0.00752,0.127932
"Family Thing, A (1996)",-0.014481,0.018637,-0.01075,0.109637,0.048488,-0.010245,0.023938,0.021906,0.118706,0.174394,...,0.084801,0.057845,0.022423,0.040152,-0.008646,-0.008619,0.030128,0.089259,-0.007973,0.097002
"Family, The (2013)",0.152406,0.00899,-0.010313,0.194225,0.10247,-0.009828,0.012784,0.01135,0.17727,0.125467,...,0.153018,0.111138,0.267929,0.112261,-0.008294,-0.008268,0.017989,0.289765,-0.007649,0.049036
"Fan, The (1996)",0.037607,-0.035373,-0.019038,0.040783,0.048983,-0.018143,0.160288,-0.035554,0.032182,-0.012967,...,0.133888,0.03895,0.033207,0.059129,-0.015311,-0.015263,-0.030593,0.067019,-0.01412,0.051043


In [97]:
def recommend_movies(movie_name, user_rating):
    score = item_similarity_df[movie_name]*(user_rating-2.5)
    score = score.sort_values(ascending=False)
    
    return score

In [133]:
def predict_movie(movie_list):
    recommended = pd.DataFrame()
    for movie,rating in movie_list:
        recommended = recommended.append(recommend_movies(movie, rating),ignore_index=True)
    
    recommended.head()
    pred = list(recommended.sum().sort_values(ascending=False)[0:10].keys())
    return np.array(pred)

In [136]:
action_movie_lover = [['13th Warrior, The (1999)',3],
                      ['Fast and the Furious: Tokyo Drift, The (Fast and the Furious 3, The) (2006)',5],
                     ['Family Man, The (2000)',2]]

print(predict_movie(action_movie_lover))

['Fast and the Furious: Tokyo Drift, The (Fast and the Furious 3, The) (2006)'
 '2 Fast 2 Furious (Fast and the Furious 2, The) (2003)'
 'Fast & Furious (Fast and the Furious 4, The) (2009)' 'Legion (2010)'
 '8 Mile (2002)' 'Prince of Persia: The Sands of Time (2010)'
 'Gangster Squad (2013)' 'Louis C.K.: Oh My God (2013)'
 'I Love You Phillip Morris (2009)' 'World Trade Center (2006)']


### This model recommends movies that have been rated similarly as how you've rated the movies inputted (the correlation between your ratings of the movies inputted and how other users rated it).