# Recommendation

In [3]:
import numpy as np
import pandas as pd
import sklearn
import seaborn as sns
import matplotlib.pyplot as plt

import sklearn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import linear_kernel

##  CONTENT BASED RECOMMENDATION SYSTEM

#### Content-based filtering based on user past genre preference. 

In [10]:
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
10324,146684,Cosmic Scrat-tastrophe (2015),Animation|Children|Comedy
10325,146878,Le Grand Restaurant (1966),Comedy
10326,148238,A Very Murray Christmas (2015),Comedy
10327,148626,The Big Short (2015),Drama


In [33]:
movies.columns=['MoiveId', 'Title', 'Genres']


In [35]:
count2 = movies.loc[movies['Genres'] == 'Adventure'].count()[0]
count1 = movies.loc[movies['Genres'] == 'Comedy'].count()[0]


In [36]:
movies.fillna(value='0')
movies.columns = ['MovieId', 'Title', 'Genres']
movies

Unnamed: 0,MovieId,Title,Genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
10324,146684,Cosmic Scrat-tastrophe (2015),Animation|Children|Comedy
10325,146878,Le Grand Restaurant (1966),Comedy
10326,148238,A Very Murray Christmas (2015),Comedy
10327,148626,The Big Short (2015),Drama


In [37]:
movies['Genres']

0        Adventure|Animation|Children|Comedy|Fantasy
1                         Adventure|Children|Fantasy
2                                     Comedy|Romance
3                               Comedy|Drama|Romance
4                                             Comedy
                            ...                     
10324                      Animation|Children|Comedy
10325                                         Comedy
10326                                         Comedy
10327                                          Drama
10328                             (no genres listed)
Name: Genres, Length: 10329, dtype: object

In [38]:
tfidf = TfidfVectorizer(stop_words='english')
movies['Genres'] = movies['Genres'].fillna("")
tfidf_matrix = tfidf.fit_transform(movies['Genres'],  )

In [39]:
 cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [40]:
indices = pd.Series(movies.index, index= movies['Title']).drop_duplicates()
indices

Title
Toy Story (1995)                           0
Jumanji (1995)                             1
Grumpier Old Men (1995)                    2
Waiting to Exhale (1995)                   3
Father of the Bride Part II (1995)         4
                                       ...  
Cosmic Scrat-tastrophe (2015)          10324
Le Grand Restaurant (1966)             10325
A Very Murray Christmas (2015)         10326
The Big Short (2015)                   10327
Marco Polo: One Hundred Eyes (2015)    10328
Length: 10329, dtype: int64

In [41]:
indices['Jumanji (1995)']

1

In [42]:
def get_recommendations(title, cosine_sim = cosine_sim):
    idx = indices[title]
    sim_scores = enumerate(cosine_sim[idx])
    sim_scores = sorted(sim_scores, key = lambda x: x[1], reverse=True)
    sim_scores= sim_scores[1:11]
  
  
    sim_index = [i[0] for i in sim_scores]
    print(movies["Title"].iloc[sim_index])

In [43]:
get_recommendations('Jumanji (1995)')

55                     Indian in the Cupboard, The (1995)
112                     NeverEnding Story III, The (1994)
817                       Escape to Witch Mountain (1975)
1604            Darby O'Gill and the Little People (1959)
1652                                  Return to Oz (1985)
1716                        NeverEnding Story, The (1984)
1717    NeverEnding Story II: The Next Chapter, The (1...
1908                        Santa Claus: The Movie (1985)
3817    Harry Potter and the Sorcerer's Stone (a.k.a. ...
6471                            Magic in the Water (1995)
Name: Title, dtype: object


## COLLABORATIVE FITERING 


#### Collaborative filtering using cosine similarities between users movie interest and rating they awarded each movie of various to provide recommendations. 

In [44]:
ratings.columns = ['userId', 'MovieId', 'rating', 'timestamp']
ratings

Unnamed: 0,userId,MovieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523
...,...,...,...,...
105334,668,142488,4.0,1451535844
105335,668,142507,3.5,1451535889
105336,668,143385,4.0,1446388585
105337,668,144976,2.5,1448656898


In [45]:
movies[['MovieId', 'Title', 'Genres']]
ratings[['userId', 'MovieId', 'rating', 'timestamp']]

Unnamed: 0,userId,MovieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523
...,...,...,...,...
105334,668,142488,4.0,1451535844
105335,668,142507,3.5,1451535889
105336,668,143385,4.0,1446388585
105337,668,144976,2.5,1448656898


In [59]:
rating_jn = pd.merge(ratings, movies, how='left', on='MovieId')
rating_jn.head(50)

Unnamed: 0,userId,MovieId,rating,timestamp,Title,Genres
0,1,16,4.0,1217897793,Casino (1995),Crime|Drama
1,1,24,1.5,1217895807,Powder (1995),Drama|Sci-Fi
2,1,32,4.0,1217896246,Twelve Monkeys (a.k.a. 12 Monkeys) (1995),Mystery|Sci-Fi|Thriller
3,1,47,4.0,1217896556,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
4,1,50,4.0,1217896523,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
5,1,110,4.0,1217896150,Braveheart (1995),Action|Drama|War
6,1,150,3.0,1217895940,Apollo 13 (1995),Adventure|Drama|IMAX
7,1,161,4.0,1217897864,Crimson Tide (1995),Drama|Thriller|War
8,1,165,3.0,1217897135,Die Hard: With a Vengeance (1995),Action|Crime|Thriller
9,1,204,0.5,1217895786,Under Siege 2: Dark Territory (1995),Action


In [60]:
user_ratings = rating_jn.pivot_table(index=['userId'], columns=['Title'], values='rating')
user_ratings.head()

Title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...And Justice for All (1979),10 (1979),...,[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),a/k/a Tommy Chong (2005),eXistenZ (1999),loudQUIETloud: A Film About the Pixies (2006),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


In [61]:
user_ratings = rating_jn.pivot_table(index=['userId'], columns=['Title'], values='rating')
user_ratings.head()

Title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...And Justice for All (1979),10 (1979),...,[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),a/k/a Tommy Chong (2005),eXistenZ (1999),loudQUIETloud: A Film About the Pixies (2006),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,


In [62]:
## drop all moives who have less than 100 users who have rated it and fill remaining NaN with 0
user_ratings = user_ratings.dropna(thresh = 100, axis = 0).fillna (0)
user_ratings

Title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...And Justice for All (1979),10 (1979),...,[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),a/k/a Tommy Chong (2005),eXistenZ (1999),loudQUIETloud: A Film About the Pixies (2006),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
659,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
662,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
665,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
666,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0


In [63]:
#Creating a similarity matrix to understand how closely related this movies are 
user_item_similarity_df = user_ratings.corr(method = 'pearson')
user_item_similarity_df

Title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...And Justice for All (1979),10 (1979),...,[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),a/k/a Tommy Chong (2005),eXistenZ (1999),loudQUIETloud: A Film About the Pixies (2006),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'71 (2014),1.000000,,-0.003891,-0.005458,-0.015478,-0.003891,-0.020032,-0.011043,0.482123,-0.006375,...,-0.009444,-0.007112,-0.004405,-0.003891,-0.019020,-0.003891,0.162172,-0.009159,0.093784,-0.003891
'Hellboy': The Seeds of Creation (2004),,,,,,,,,,,...,,,,,,,,,,
'Round Midnight (1986),-0.003891,,1.000000,-0.005458,-0.015478,-0.003891,0.082819,-0.011043,0.359210,0.678912,...,-0.009444,0.222236,0.137644,-0.003891,0.095100,-0.003891,0.233817,-0.009159,0.093784,1.000000
'Til There Was You (1997),-0.005458,,-0.005458,1.000000,-0.021710,-0.005458,-0.028097,-0.015489,-0.013364,-0.008941,...,-0.013247,-0.009975,-0.006178,-0.005458,-0.026678,-0.005458,-0.023759,-0.012846,-0.031621,-0.005458
"'burbs, The (1989)",-0.015478,,-0.015478,-0.021710,1.000000,-0.015478,-0.033193,0.215687,0.050994,-0.025358,...,0.036924,-0.028288,-0.017521,-0.015478,0.205195,-0.015478,0.069356,-0.036431,0.132341,-0.015478
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
loudQUIETloud: A Film About the Pixies (2006),-0.003891,,-0.003891,-0.005458,-0.015478,-0.003891,0.237096,-0.011043,-0.009528,-0.006375,...,-0.009444,-0.007112,-0.004405,-0.003891,-0.019020,1.000000,-0.016939,-0.009159,-0.022544,-0.003891
xXx (2002),0.162172,,0.233817,-0.023759,0.069356,0.197995,0.245853,-0.048074,0.305011,0.290143,...,0.093085,0.026328,0.016307,0.233817,0.121827,-0.016939,1.000000,0.345581,0.082425,0.233817
xXx: State of the Union (2005),-0.009159,,-0.009159,-0.012846,-0.036431,-0.009159,0.242491,-0.025992,0.142892,-0.015005,...,0.138071,-0.016739,-0.010367,0.497184,0.151895,-0.009159,0.345581,1.000000,0.005610,-0.009159
¡Three Amigos! (1986),0.093784,,0.093784,-0.031621,0.132341,0.093784,0.018016,0.121724,0.172680,0.121884,...,-0.054718,-0.014627,-0.009060,-0.022544,0.126179,-0.022544,0.082425,0.005610,1.000000,0.093784


In [64]:
def get_similar_movies(movie_name, user_ratings):
    similar_score = user_item_similarity_df[movie_name] * user_ratings
    similar_score = similar_score.sort_values(ascending=False)
    
    return similar_score
print(get_similar_movies("Casino (1995)", 4.0 ))

Title
Casino (1995)                                                                                             4.000000
Goodfellas (1990)                                                                                         1.882004
Donnie Brasco (1997)                                                                                      1.714333
Carlito's Way (1993)                                                                                      1.615818
Clerks (1994)                                                                                             1.603785
                                                                                                            ...   
Whatever (1998)                                                                                                NaN
Wonderland (1999)                                                                                              NaN
Young and Prodigious T.S. Spivet, The (L'extravagant voyage du jeune et pr

In [65]:
action_lover = [("Casablanca (1942)", 5.0),("Rosencrantz and Guildenstern Are Dead (1990)", 3.5)]

similar_movies = pd.DataFrame()

for movie,rating in action_lover:
    similar_movies = similar_movies.append(get_similar_movies(movie,rating))
similar_movies.head()

  similar_movies = similar_movies.append(get_similar_movies(movie,rating))
  similar_movies = similar_movies.append(get_similar_movies(movie,rating))


Title,Casablanca (1942),"Maltese Falcon, The (1941)","African Queen, The (1951)",Psycho (1960),Gone with the Wind (1939),Sunset Blvd. (a.k.a. Sunset Boulevard) (1950),"Wizard of Oz, The (1939)",Bonnie and Clyde (1967),"Treasure of the Sierra Madre, The (1948)",Vertigo (1958),...,Two Much (1995),Uncommon Valor (1983),Unhook the Stars (1996),"Walk in the Sun, A (1945)","Watcher in the Woods, The (1980)",Whatever (1998),Wonderland (1999),"Young and Prodigious T.S. Spivet, The (L'extravagant voyage du jeune et prodigieux T.S. Spivet) (2013)","Yours, Mine and Ours (1968)","Yours, Mine and Ours (2005)"
Casablanca (1942),5.0,2.673825,2.510604,2.44087,2.382207,2.349948,2.245682,2.175066,2.157038,2.083164,...,,,,,,,,,,
Rosencrantz and Guildenstern Are Dead (1990),0.207575,0.448007,0.568378,0.129721,0.18948,-0.014485,0.202576,0.555998,0.509556,0.365724,...,,,,,,,,,,
