In [11]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
from surprise import Reader, Dataset, SVD, evaluate

import warnings; warnings.simplefilter('ignore')

## Reading the movies details form dataset (using pandas)

In [12]:
md = pd.read_csv('input_small/movies.csv')

In [13]:
md.head() #try md.head(20) for first 20 movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(md['genres'])

In [15]:
from sklearn.metrics.pairwise import linear_kernel
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [16]:
# Build a 1-dimensional array with movie titles
titles = md['title']
indices = pd.Series(md.index, index=md['title'])

# Function that get movie recommendations based on the cosine similarity score of movie genres
def genre_recommendations(title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:21]
    md_indices = [i[0] for i in sim_scores]
    return titles.iloc[md_indices]

In [17]:
genre_recommendations('Good Will Hunting (1997)').head(20)

24                            Leaving Las Vegas (1995)
27                                   Persuasion (1995)
42                How to Make an American Quilt (1995)
45                        When Night Is Falling (1995)
66                                 Bed of Roses (1996)
75     Once Upon a Time... When We Were Colored (1995)
76                           Angels and Insects (1995)
93               Bridges of Madison County, The (1995)
115                       Up Close and Personal (1996)
151                                    Mad Love (1995)
162                         Scarlet Letter, The (1995)
170                               Total Eclipse (1995)
175                       Walk in the Clouds, A (1995)
183                              Before Sunrise (1995)
189                           Circle of Friends (1995)
213                            Immortal Beloved (1994)
232                                 Love Affair (1994)
261                  Picture Bride (Bijo photo) (1994)
262       

In [18]:
genre_recommendations('Toy Story 3 (2010)').head(20)

7355                                   Toy Story 3 (2010)
7360    Shrek Forever After (a.k.a. Shrek: The Final C...
5376                            Polar Express, The (2004)
7302                      How to Train Your Dragon (2010)
8039                         Rise of the Guardians (2012)
0                                        Toy Story (1995)
1706                                          Antz (1998)
2355                                   Toy Story 2 (1999)
2809       Adventures of Rocky and Bullwinkle, The (2000)
3000                     Emperor's New Groove, The (2000)
3568                                Monsters, Inc. (2001)
6194                                     Wild, The (2006)
6486                               Shrek the Third (2007)
6948                       Tale of Despereaux, The (2008)
7760    Asterix and the Vikings (Astérix et les Viking...
8219                                         Turbo (2013)
8927                             The Good Dinosaur (2015)
9430          

In [27]:
genre_recommendations('Iron Man (2008)').head(30)

224             Star Wars: Episode IV - A New Hope (1977)
275                                       Stargate (1994)
385                                 Demolition Man (1993)
898     Star Wars: Episode V - The Empire Strikes Back...
911     Star Wars: Episode VI - Return of the Jedi (1983)
1058           Star Trek III: The Search for Spock (1984)
1346                                 Lost in Space (1998)
1557                                Rocketeer, The (1991)
1567                                          Tron (1982)
1692                            Six-String Samurai (1998)
1904                                   Logan's Run (1976)
1979     Star Wars: Episode I - The Phantom Menace (1999)
1986                                      Superman (1978)
1988                                  Superman III (1983)
1989              Superman IV: The Quest for Peace (1987)
2764                                       Mad Max (1979)
2766                    Mad Max Beyond Thunderdome (1985)
2836          

In [26]:
genre_recommendations('Iron Man 3 (2013)').head(30)

6521                          Transformers (2007)
7545                      I Am Number Four (2011)
7866                            Battleship (2012)
8151                            Iron Man 3 (2013)
8238                               Riddick (2013)
7687                             Contagion (2011)
6918        Day the Earth Stood Still, The (2008)
4351                  Matrix Reloaded, The (2003)
4639               Matrix Revolutions, The (2003)
6470                          Spider-Man 3 (2007)
7324                            Iron Man 2 (2010)
8120                 G.I. Joe: Retaliation (2013)
59      Lawnmower Man 2: Beyond Cyberspace (1996)
68                               Screamers (1995)
144                        Johnny Mnemonic (1995)
296                             Virtuosity (1995)
336                                Timecop (1994)
474                           Blade Runner (1982)
567                                   Solo (1996)
601                           Arrival, The (1996)
