## Content Based Recommender

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
movies = pd.read_csv('data/MoviesMetadata.csv')
movies.head()

Unnamed: 0,budget,id,imdb_id,original_language,original_title,overview,popularity,poster_path,release_date,revenue,...,title,vote_average,vote_count,name_genres,id_genres,name_production_countries,iso_3166_1_production_countries,name_production_companies,id_production_companies,year
0,30.0,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.946943,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,1995-10-30,373.554033,...,Toy Story,7.7,5415.0,"Animation, Comedy, Family","16, 35, 10751",United States of America,US,Pixar Animation Studios,3,1995
1,65.0,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.015539,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,1995-12-15,262.797249,...,Jumanji,6.9,2413.0,"Adventure, Fantasy, Family","12, 14, 10751",United States of America,US,"TriStar Pictures, Teitler Film, Interscope Com...","559, 2550, 10201",1995
2,0.0,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,1995-12-22,0.0,...,Grumpier Old Men,6.5,92.0,"Romance, Comedy","10749, 35",United States of America,US,"Warner Bros., Lancaster Gate","6194, 19464",1995
3,16.0,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.859495,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,1995-12-22,81.452156,...,Waiting to Exhale,6.1,34.0,"Comedy, Drama, Romance","35, 18, 10749",United States of America,US,Twentieth Century Fox Film Corporation,306,1995
4,0.0,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.387519,/e64sOI48hQXyru7naBFyssKFxVd.jpg,1995-02-10,76.578911,...,Father of the Bride Part II,5.7,173.0,Comedy,35,United States of America,US,"Sandollar Productions, Touchstone Pictures","5842, 9195",1995


In [3]:
movies.columns

Index(['budget', 'id', 'imdb_id', 'original_language', 'original_title',
       'overview', 'popularity', 'poster_path', 'release_date', 'revenue',
       'runtime', 'status', 'tagline', 'title', 'vote_average', 'vote_count',
       'name_genres', 'id_genres', 'name_production_countries',
       'iso_3166_1_production_countries', 'name_production_companies',
       'id_production_companies', 'year'],
      dtype='object')

In [4]:
movies.drop(columns=['iso_3166_1_production_countries', 'id_production_companies', 'id_genres'], inplace=True)
movies.shape

(45443, 20)

In [5]:
links = pd.read_csv('data/links.csv')
links = links[links['tmdbId'].notnull()]['tmdbId'].astype('int')

In [6]:
links.head()

0      862
1     8844
2    15602
3    31357
4    11862
Name: tmdbId, dtype: int64

In [7]:
movies_meta = movies[movies['id'].isin(links)]
movies_meta.shape

(45443, 20)

In [8]:
movies_meta['description'] = movies_meta['overview'] + movies_meta['tagline']
movies_meta['description'] = movies_meta['description'].fillna('')

In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(movies_meta['description'])
tfidf_matrix.shape

(45443, 573636)

In [10]:
from sklearn.metrics.pairwise import linear_kernel
cosine_similarity = linear_kernel(tfidf_matrix, tfidf_matrix)

In [11]:
movies_meta = movies_meta.reset_index()
titles = movies_meta['title']
indices = pd.Series(movies_meta.index, index=movies_meta['title'])

In [12]:
indices.shape

(45443,)

In [13]:
def find_idx(title):
    idx = pd.Series(indices[title])
    return idx[0]

In [14]:
def recommender(title):
    idx = find_idx(title=title)
    similarity_scores = list(enumerate(cosine_similarity[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = similarity_scores[1:21]
    movie_indices = [i[0] for i in similarity_scores]
    return titles.iloc[movie_indices]

In [15]:
recommender('The Dark Knight').head(20)

18247                      The Dark Knight Rises
150                               Batman Forever
1328                              Batman Returns
585                                       Batman
15507                 Batman: Under the Red Hood
20225    Batman: The Dark Knight Returns, Part 2
41954                      The Lego Batman Movie
18030                           Batman: Year One
25255                          Batman vs Dracula
9228          Batman Beyond: Return of the Joker
22012                             The Super Cops
11750                                  Slow Burn
3094                Batman: Mask of the Phantasm
3266                                         JFK
32103           Batman Unlimited: Monster Mayhem
19785    Batman: The Dark Knight Returns, Part 1
39606                   Batman: The Killing Joke
13928                                 Judas Kiss
18253         Sherlock Holmes: A Game of Shadows
7672                                 Masterminds
Name: title, dtype: 

In [16]:
recommender('Harry Potter and the Philosopher\'s Stone').head(20)

23495                                             Luv
5677          Harry Potter and the Chamber of Secrets
22006                              The Starving Games
4369                                    The Dead Pool
7724         Harry Potter and the Prisoner of Azkaban
10551             Harry Potter and the Goblet of Fire
18616                           A Very Potter Musical
39045                               Bullet to Beijing
3950                         Harry, He's Here To Help
7114                                    Love at Large
11924       Harry Potter and the Order of the Phoenix
30551                                            1920
39672                               Bridge and Tunnel
3583                                      Angel Heart
16124    Harry Potter and the Deathly Hallows: Part 1
18820                                Wild About Harry
17432    Harry Potter and the Deathly Hallows: Part 2
1596                                        Incognito
4979                        

In [17]:
recommender('Carrington').head(20)

21374              Alan Partridge: Alpha Papa
6733                                   Sylvia
40394                           Hacksaw Ridge
493                                 Mr. Jones
21750           Muhammad Ali's Greatest Fight
12266                           Close My Eyes
43873                            Giving It Up
32811                     The Lady in the Van
22201                          Geography Club
22820    The Cutting Edge: Going for the Gold
14014               Just Sex and Nothing Else
5970                       One from the Heart
21508         Family Band: The Cowsills Story
13684                     The Merry Gentleman
41997                                 Rangoon
34184                              Blind Date
6388                            Swimming Pool
10782                        Imagine Me & You
41062                        The Constitution
1852                       Driving Miss Daisy
Name: title, dtype: object