# Cosine Similarity Approach for Movie Recommendation.

Assigned to Jaini Patel (jp1891)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
import pickle
# from surprise import Reader, Dataset, SVD, evaluate

import warnings; warnings.simplefilter('ignore')

### Simple Recommendation

In [2]:
# Reading the file
movies_metadata = pd. read_csv('/Users/jainipatel/Spring2021/MDM/archive/movies_metadata.csv')
movies_metadata.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [3]:
# cleaning the genres column and converting to a list  
movies_metadata['genres'] = movies_metadata['genres'].fillna('[]').apply(literal_eval).apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])

In [4]:
# converting the vote_count and vote_average to integer and calculating the mean value
vote_counts = movies_metadata[movies_metadata['vote_count'].notnull()]['vote_count'].astype('int')
vote_averages = movies_metadata[movies_metadata['vote_average'].notnull()]['vote_average'].astype('int')
C = vote_averages.mean()
C

5.244896612406511

In [5]:
#top 5% rank. Minimum number of votes required to qualify.
m = vote_counts.quantile(0.95)
m

434.0

In [6]:
# fetching the year from the date column
movies_metadata['year'] = pd.to_datetime(movies_metadata['release_date'], errors='coerce').apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)

In [7]:
# This is a subset data for the first recommendation system (naive one)
qualified = movies_metadata[(movies_metadata['vote_count'] >= m) & (movies_metadata['vote_count'].notnull()) & (movies_metadata['vote_average'].notnull())][['title', 'year', 'vote_count', 'vote_average', 'popularity', 'genres']]
qualified['vote_count'] = qualified['vote_count'].astype('int')
qualified['vote_average'] = qualified['vote_average'].astype('int')
qualified.shape

(2274, 6)

In [8]:
# calculating weighted rating for the naive system.
def weighted_rating(x):
    v = x['vote_count']
    R = x['vote_average']
    return (v/(v+m) * R) + (m/(m+v) * C)

In [9]:
# adding the weighted coulmn to the qualified system
qualified['wr'] = qualified.apply(weighted_rating, axis=1)

In [10]:
# sorting the weights
qualified = qualified.sort_values('wr', ascending=False).head(250)

#### Top Movies

In [11]:
#top 15 movies on the basis of the vote counts
qualified.head(15)

Unnamed: 0,title,year,vote_count,vote_average,popularity,genres,wr
15480,Inception,2010,14075,8,29.1081,"[Action, Thriller, Science Fiction, Mystery, A...",7.917588
12481,The Dark Knight,2008,12269,8,123.167,"[Drama, Action, Crime, Thriller]",7.905871
22879,Interstellar,2014,11187,8,32.2135,"[Adventure, Drama, Science Fiction]",7.897107
2843,Fight Club,1999,9678,8,63.8696,[Drama],7.881753
4863,The Lord of the Rings: The Fellowship of the Ring,2001,8892,8,32.0707,"[Adventure, Fantasy, Action]",7.871787
292,Pulp Fiction,1994,8670,8,140.95,"[Thriller, Crime]",7.86866
314,The Shawshank Redemption,1994,8358,8,51.6454,"[Drama, Crime]",7.864
7000,The Lord of the Rings: The Return of the King,2003,8226,8,29.3244,"[Adventure, Fantasy, Action]",7.861927
351,Forrest Gump,1994,8147,8,48.3072,"[Comedy, Drama, Romance]",7.860656
5814,The Lord of the Rings: The Two Towers,2002,7641,8,29.4235,"[Adventure, Fantasy, Action]",7.851924


In [12]:
# genre based recommender filter
s = movies_metadata.apply(lambda x: pd.Series(x['genres']),axis=1).stack().reset_index(level=1, drop=True)
s.name = 'genre'
gen_movies_metadata = movies_metadata.drop('genres', axis=1).join(s)

In [13]:
def build_chart(genre, percentile=0.85):
    df = gen_movies_metadata[gen_movies_metadata['genre'] == genre]
    vote_counts = df[df['vote_count'].notnull()]['vote_count'].astype('int')
    vote_averages = df[df['vote_average'].notnull()]['vote_average'].astype('int')
    C = vote_averages.mean()
    m = vote_counts.quantile(percentile)
    
    qualified = df[(df['vote_count'] >= m) & (df['vote_count'].notnull()) & (df['vote_average'].notnull())][['title', 'year', 'vote_count', 'vote_average', 'popularity']]
    qualified['vote_count'] = qualified['vote_count'].astype('int')
    qualified['vote_average'] = qualified['vote_average'].astype('int')
    
    qualified['wr'] = qualified.apply(lambda x: (x['vote_count']/(x['vote_count']+m) * x['vote_average']) + (m/(m+x['vote_count']) * C), axis=1)
    qualified = qualified.sort_values('wr', ascending=False).head(250)
    
    return qualified

#### Genre Based Movie Recommender

In [14]:
# genre based recommender
build_chart('Romance').head(15)

Unnamed: 0,title,year,vote_count,vote_average,popularity,wr
10309,Dilwale Dulhania Le Jayenge,1995,661,9,34.457,8.565285
351,Forrest Gump,1994,8147,8,48.3072,7.971357
876,Vertigo,1958,1162,8,18.2082,7.811667
40251,Your Name.,2016,1030,8,34.461252,7.789489
883,Some Like It Hot,1959,835,8,11.8451,7.745154
1132,Cinema Paradiso,1988,834,8,14.177,7.744878
19901,Paperman,2012,734,8,7.19863,7.713951
37863,Sing Street,2016,669,8,10.672862,7.689483
882,The Apartment,1960,498,8,11.9943,7.599317
38718,The Handmaiden,2016,453,8,16.727405,7.566166


### Content Based Filtering:

In [15]:
# reading the files
links = pd.read_csv('/Users/jainipatel/Spring2021/MDM/archive/links.csv')
links = links[links['tmdbId'].notnull()]['tmdbId'].astype('int')

In [16]:
movies_metadata = movies_metadata.drop([19730, 29503, 35587]) #dropping the ids with NaN values

In [17]:
movies_metadata['id'] = movies_metadata['id'].astype('int')

In [18]:
s_movies_metadata = movies_metadata[movies_metadata['id'].isin(links)]
s_movies_metadata.shape


(45463, 25)

#### Movie Based Recommender

In [19]:
# creating the description column by combining overview and tagline attributes.
s_movies_metadata['tagline'] = s_movies_metadata['tagline'].fillna('')
s_movies_metadata['description'] = s_movies_metadata['overview'] + s_movies_metadata['tagline']
s_movies_metadata['description'] = s_movies_metadata['description'].fillna('')

In [20]:
s_movies_metadata.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,description
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[Animation, Comedy, Family]",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,1995,"Led by Woody, Andy's toys live happily in his ..."
1,False,,65000000,"[Adventure, Fantasy, Family]",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,1995,When siblings Judy and Peter discover an encha...
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[Romance, Comedy]",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,1995,A family wedding reignites the ancient feud be...
3,False,,16000000,"[Comedy, Drama, Romance]",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,1995,"Cheated on, mistreated and stepped on, the wom..."
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,[Comedy],,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,1995,Just when George Banks has recovered from his ...


In [21]:
# forming the word vector using TfidfVectorizer.
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(s_movies_metadata['description'])

In [22]:
tfidf_matrix.shape

(45463, 1104495)

#### Cosine Similarity.

In [23]:
# calculating the cosine similarity using linear kernel
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
# cosine_sim.shape

In [24]:
# reseting the index
s_movies_metadata = s_movies_metadata.reset_index()
titles = s_movies_metadata['title']
indices = pd.Series(s_movies_metadata.index, index=s_movies_metadata['title'])

In [25]:
indices

title
Toy Story                          0
Jumanji                            1
Grumpier Old Men                   2
Waiting to Exhale                  3
Father of the Bride Part II        4
                               ...  
Subdue                         45458
Century of Birthing            45459
Betrayal                       45460
Satan Triumphant               45461
Queerama                       45462
Length: 45463, dtype: int64

In [26]:
# recommendations based on cosine similarity 
def get_recommendations(title):
    idx = indices[title]
    if idx.size > 1:
        idx = idx[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    movie_indices = [i[0] for i in sim_scores]
    return titles.iloc[movie_indices]

In [27]:
get_recommendations('The Godfather').head(10)

44027    The Godfather Trilogy: 1972-1990
1178               The Godfather: Part II
31971                    Honor Thy Father
21613                          The Family
23125                          Blood Ties
38027            A Mother Should Be Loved
18322                     The Outside Man
11297                    Household Saints
10821                            Election
4324                                 Made
Name: title, dtype: object

In [28]:
get_recommendations('The Dark Knight').head(10)

18252                                The Dark Knight Rises
150                                         Batman Forever
1328                                        Batman Returns
21193    Batman Unmasked: The Psychology of the Dark Kn...
15511                           Batman: Under the Red Hood
20231              Batman: The Dark Knight Returns, Part 2
41973                                The Lego Batman Movie
585                                                 Batman
25266                                    Batman vs Dracula
18035                                     Batman: Year One
Name: title, dtype: object

In [29]:
get_recommendations('Mean Girls').head(10)

40096      Trapped in a Purple Haze
40080                     Black Wax
39680    Revenge of the Bridesmaids
5619          Historien om Kim Skov
4188                    Ice Castles
22305                    Demon Wind
1296                      Cape Fear
1295                      Cape Fear
41838    The Witching of Ben Wagner
19260                        Bag It
Name: title, dtype: object

#### Metadata Based Recommender

In [30]:
# reading the credits and keywords file 
credits = pd.read_csv('/Users/jainipatel/Spring2021/MDM/archive/credits.csv')
keywords = pd.read_csv('/Users/jainipatel/Spring2021/MDM/archive/keywords.csv')

In [31]:
# converting the id to integer
keywords['id'] = keywords['id'].astype('int')
credits['id'] = credits['id'].astype('int')
movies_metadata['id'] = movies_metadata['id'].astype('int')

In [32]:
movies_metadata.shape

(45463, 25)

In [33]:
# merging credits and keywords data
movies_metadata = movies_metadata.merge(credits, on='id')
movies_metadata = movies_metadata.merge(keywords, on='id')

In [34]:
s_movies_metadata = movies_metadata[movies_metadata['id'].isin(links)]
s_movies_metadata.shape

(46628, 28)

In [35]:
# cleaning the attributes cast, crew, keywords, cast_size, crew_size 
s_movies_metadata['cast'] = s_movies_metadata['cast'].apply(literal_eval)
s_movies_metadata['crew'] = s_movies_metadata['crew'].apply(literal_eval)
s_movies_metadata['keywords'] = s_movies_metadata['keywords'].apply(literal_eval)
s_movies_metadata['cast_size'] = s_movies_metadata['cast'].apply(lambda x: len(x))
s_movies_metadata['crew_size'] = s_movies_metadata['crew'].apply(lambda x: len(x))

In [36]:
def get_director(x):
    for i in x:
        if i['job'] == 'Director':
            return i['name']
    return np.nan

In [38]:
# cleaning the director column
s_movies_metadata['director'] = s_movies_metadata['crew'].apply(get_director)

In [39]:
# cleaning the cast column
s_movies_metadata['cast'] = s_movies_metadata['cast'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])
s_movies_metadata['cast'] = s_movies_metadata['cast'].apply(lambda x: x[:3] if len(x) >=3 else x)

In [40]:
# cleaning the keywords column
s_movies_metadata['keywords'] = s_movies_metadata['keywords'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])

In [41]:
s_movies_metadata['cast'] = s_movies_metadata['cast'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])

In [42]:
s_movies_metadata['director'] = s_movies_metadata['director'].astype('str').apply(lambda x: str.lower(x.replace(" ", "")))
s_movies_metadata['director'] = s_movies_metadata['director'].apply(lambda x: [x,x, x])

#### Keywords

In [43]:
#pre-processing keywords attribute.
s = s_movies_metadata.apply(lambda x: pd.Series(x['keywords']),axis=1).stack().reset_index(level=1, drop=True)
s.name = 'keyword'

In [44]:
s = s.value_counts()
s[:5]

woman director      3128
independent film    1942
murder              1314
based on novel       841
musical              734
Name: keyword, dtype: int64

In [45]:
# considering the keywords greater than 1
s = s[s > 1]

In [46]:
# stemmer helps us to distinguish between similar words ex. cats and cat are same words, so stemmer makes it cat 
stemmer = SnowballStemmer('english')
stemmer.stem('cats')

'cat'

In [47]:
def filter_keywords(x):
    words = []
    for i in x:
        if i in s:
            words.append(i)
    return words

In [48]:
# cleaning keywords column with the stemmer
s_movies_metadata['keywords'] = s_movies_metadata['keywords'].apply(filter_keywords)
s_movies_metadata['keywords'] = s_movies_metadata['keywords'].apply(lambda x: [stemmer.stem(i) for i in x])
s_movies_metadata['keywords'] = s_movies_metadata['keywords'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])

In [49]:
s_movies_metadata['soup'] = s_movies_metadata['keywords'] + s_movies_metadata['cast'] + s_movies_metadata['director'] + s_movies_metadata['genres']
s_movies_metadata['soup'] = s_movies_metadata['soup'].apply(lambda x: ' '.join(x))

In [50]:
# forming the word vector using CountVectorizer
count = CountVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
count_matrix = count.fit_transform(s_movies_metadata['soup'])

In [51]:
# calculating the cosine similarity
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [52]:
# reseting the column index for title
s_movies_metadata = s_movies_metadata.reset_index()
titles = s_movies_metadata['title']
indices = pd.Series(s_movies_metadata.index, index=s_movies_metadata['title'])

In [53]:
#Get Recommendation based on the keywords, cast, crew and genre. Uses the same function as description recommendation.
get_recommendations('The Dark Knight').head(10)

18442    The Dark Knight Rises
10210            Batman Begins
11463             The Prestige
26110                Doodlebug
26111                Doodlebug
2486                 Following
45843                  Dunkirk
5302                  Insomnia
15651                Inception
4126                   Memento
Name: title, dtype: object

In [54]:
get_recommendations('The Godfather').head(10)

15609                  The Rain People
4470     Tucker: The Man and His Dream
1199            The Godfather: Part II
1614                     The Rainmaker
4602                   The Cotton Club
6040                One from the Heart
4012                  Gardens of Stone
23143                       Captain EO
1934           The Godfather: Part III
3635                  The Conversation
Name: title, dtype: object

In [55]:
get_recommendations('Mean Girls').head(10)

4042                Head Over Heels
6513                  Freaky Friday
1590               The House of Yes
10422              Just Like Heaven
13994    Ghosts of Girlfriends Past
17472         Mr. Popper's Penguins
42333                   Bad Santa 2
23453               Vampire Academy
12524     The Spiderwick Chronicles
27879                      The DUFF
Name: title, dtype: object

#### Weighted Ratings

In [56]:
# this is an improved version combining the cosine similarity and the naive model qualified. 
# It further sorts the similar movies on the basis of weighted ratings and recommends the top 10 movies.
def improved_recommendations(title):
    idx = indices[title]
    if idx.size > 1:
        idx = idx[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:26]
    movie_indices = [i[0] for i in sim_scores]
    
    movies = s_movies_metadata.iloc[movie_indices][['title', 'vote_count', 'vote_average', 'year']]
    vote_counts = movies[movies['vote_count'].notnull()]['vote_count'].astype('int')
    vote_averages = movies[movies['vote_average'].notnull()]['vote_average'].astype('int')
    C = vote_averages.mean()
    m = vote_counts.quantile(0.60)
    qualified = movies[(movies['vote_count'] >= m) & (movies['vote_count'].notnull()) & (movies['vote_average'].notnull())]
    qualified['vote_count'] = qualified['vote_count'].astype('int')
    qualified['vote_average'] = qualified['vote_average'].astype('int')
    qualified['wr'] = qualified.apply(weighted_rating, axis=1)
    qualified = qualified.sort_values('wr', ascending=False).head(10)
    return qualified

In [57]:
improved_recommendations('The Dark Knight')

Unnamed: 0,title,vote_count,vote_average,year,wr
15651,Inception,14075,8,2010,7.917588
23076,Interstellar,11187,8,2014,7.897107
11463,The Prestige,4510,8,2006,7.758148
4126,Memento,4168,8,2000,7.740175
18442,The Dark Knight Rises,9263,7,2012,6.921448
10210,Batman Begins,7511,7,2005,6.904127
45843,Dunkirk,2712,7,2017,6.757878
1349,Batman Returns,1706,6,1992,5.846862
31282,Batman v Superman: Dawn of Justice,7189,5,2016,5.013943
1511,Batman & Robin,1447,4,1997,4.287233


In [58]:
improved_recommendations('The Godfather')

Unnamed: 0,title,vote_count,vote_average,year,wr
1199,The Godfather: Part II,3418,8,1974,7.689586
1186,Apocalypse Now,2112,8,1979,7.530356
1934,The Godfather: Part III,1589,7,1990,6.623473
1312,Dracula,1087,7,1992,6.499201
3635,The Conversation,377,7,1974,6.060771
2025,The Outsiders,293,6,1983,5.549223
1614,The Rainmaker,239,6,1997,5.513054
8911,Rumble Fish,141,6,1983,5.430061
2376,Peggy Sue Got Married,138,5,1986,5.185813
754,Jack,340,5,1996,5.137319


In [59]:
improved_recommendations('Mean Girls')

Unnamed: 0,title,vote_count,vote_average,year,wr
1879,The Breakfast Club,2189,7,1985,6.709602
40941,The Edge of Seventeen,952,7,2016,6.450422
27879,The DUFF,1372,6,2015,5.818541
4612,The Princess Diaries,1063,6,2001,5.781086
6513,Freaky Friday,919,6,2003,5.757786
10422,Just Like Heaven,595,6,2005,5.681521
12524,The Spiderwick Chronicles,593,6,2008,5.680901
23453,Vampire Academy,603,5,2014,5.102493
13994,Ghosts of Girlfriends Past,716,5,2009,5.092422
17472,Mr. Popper's Penguins,775,5,2011,5.087912


In [60]:
improved_recommendations('Avatar')

Unnamed: 0,title,vote_count,vote_average,year,wr
1659,Titanic,7770,7,1997,6.907153
21123,Star Trek Into Darkness,4479,7,2013,6.844959
582,Terminator 2: Judgment Day,4274,7,1991,6.838208
1216,The Terminator,4208,7,1984,6.835908
1179,Aliens,3282,7,1986,6.795018
1113,The Abyss,822,7,1989,6.393539
375,True Lies,1138,6,1994,5.79153
1831,Small Soldiers,522,6,1998,5.657202
24091,Jupiter Ascending,2816,5,2015,5.032703
13710,Dragonball Evolution,475,2,2009,3.549269


In [61]:
get_recommendations('Avatar').head(15) #cast and keywords

26774                          Avatar 2
9741                 Aliens of the Deep
45149        T2 3-D: Battle Across Time
1179                             Aliens
6220                Ghosts of the Abyss
1216                     The Terminator
582          Terminator 2: Judgment Day
1113                          The Abyss
5696     Piranha Part Two: The Spawning
375                           True Lies
1659                            Titanic
21123           Star Trek Into Darkness
17686                     Almighty Thor
26776                    Justice League
24091                 Jupiter Ascending
Name: title, dtype: object