# Project Title: Movie Recommended System

Aim of the project: Content Based Recommender System recommends movies similar to the movie user likes and analyses the sentiments on the reviews given by the user for that movie.

# Pre-Processing

Import essentila libraries

In [2]:
# Importing libraries
import pandas as pd
import numpy as np


In [4]:
# Loading the data from local database
df = pd.read_csv(r'C:\Users\polep\OneDrive - Högskolan Dalarna\Desktop\Interview\portfolio_projects\Movies-Recommendation-System-App-main\movie_metadata.csv')
df.head()

Unnamed: 0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,...,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
0,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,Action|Adventure|Fantasy|Sci-Fi,...,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
1,Color,Gore Verbinski,302.0,169.0,563.0,1000.0,Orlando Bloom,40000.0,309404152.0,Action|Adventure|Fantasy,...,1238.0,English,USA,PG-13,300000000.0,2007.0,5000.0,7.1,2.35,0
2,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,Action|Adventure|Thriller,...,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000
3,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,...,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
4,,Doug Walker,,,131.0,,Rob Walker,131.0,,Documentary,...,,,,,,,12.0,7.1,,0


In [5]:
# Dataframe columns
df.columns

Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',
       'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
       'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
       'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
       'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
       'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
       'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
       'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],
      dtype='object')

In [6]:
# Dataframe info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5043 entries, 0 to 5042
Data columns (total 28 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   color                      5024 non-null   object 
 1   director_name              4939 non-null   object 
 2   num_critic_for_reviews     4993 non-null   float64
 3   duration                   5028 non-null   float64
 4   director_facebook_likes    4939 non-null   float64
 5   actor_3_facebook_likes     5020 non-null   float64
 6   actor_2_name               5030 non-null   object 
 7   actor_1_facebook_likes     5036 non-null   float64
 8   gross                      4159 non-null   float64
 9   genres                     5043 non-null   object 
 10  actor_1_name               5036 non-null   object 
 11  movie_title                5043 non-null   object 
 12  num_voted_users            5043 non-null   int64  
 13  cast_total_facebook_likes  5043 non-null   int64

In [7]:
# iloc needs integers

df = df.loc[:,['movie_title', 'director_name','actor_1_name','actor_2_name','actor_3_name','genres']]
df.head()

Unnamed: 0,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,genres
0,Avatar,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action|Adventure|Fantasy|Sci-Fi
1,Pirates of the Caribbean: At World's End,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action|Adventure|Fantasy
2,Spectre,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action|Adventure|Thriller
3,The Dark Knight Rises,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action|Thriller
4,Star Wars: Episode VII - The Force Awakens ...,Doug Walker,Doug Walker,Rob Walker,,Documentary


# Handling Missing Values

In [8]:
# Extracting null values
df.isnull().sum()

movie_title        0
director_name    104
actor_1_name       7
actor_2_name      13
actor_3_name      23
genres             0
dtype: int64

In [9]:
# Droping null values
df.dropna( inplace = True)

In [10]:
df.isnull().sum()

movie_title      0
director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
genres           0
dtype: int64

In [11]:
# Replacing null values with the unknown text  
df['director_name'] = df['director_name'].replace(np.nan, 'Uknown')
df['actor_1_name'] = df['actor_1_name'].replace(np.nan, 'Uknown')
df['actor_2_name'] = df['actor_2_name'].replace(np.nan, 'Uknown')
df['actor_3_name'] = df['actor_3_name'].replace(np.nan, 'Uknown')

In [12]:
df.head()

Unnamed: 0,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,genres
0,Avatar,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action|Adventure|Fantasy|Sci-Fi
1,Pirates of the Caribbean: At World's End,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action|Adventure|Fantasy
2,Spectre,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action|Adventure|Thriller
3,The Dark Knight Rises,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action|Thriller
5,John Carter,Andrew Stanton,Daryl Sabara,Samantha Morton,Polly Walker,Action|Adventure|Sci-Fi


In [13]:
# Converting movie title upper to lower text
df['movie_title'] = df['movie_title'].str.lower()
df['movie_title'][1]
df['movie_title'] = df['movie_title'].apply(lambda x : x[:-1])

In [14]:
df['genres'] = df['genres'].str.replace('|', ' ')

  df['genres'] = df['genres'].str.replace('|', ' ')


In [13]:
#df.to_csv('data_1.csv', index = False)

In [16]:
df.head()

Unnamed: 0,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,genres
0,avatar,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action Adventure Fantasy Sci-Fi
1,pirates of the caribbean: at world's end,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action Adventure Fantasy
2,spectre,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action Adventure Thriller
3,the dark knight rises,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action Thriller
5,john carter,Andrew Stanton,Daryl Sabara,Samantha Morton,Polly Walker,Action Adventure Sci-Fi


Importing Dataset 2

In [24]:
# Importing libraries and data
import pandas as pd
import numpy as np
df_1 = pd.read_csv(r'C:\Users\polep\OneDrive - Högskolan Dalarna\Desktop\Interview\portfolio_projects\Movies-Recommendation-System-App-main\movies_metadata.csv')
df_1.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [25]:
# Dataframe columns
df_1.columns

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count'],
      dtype='object')

In [26]:
# Selecting specific columns
df_1 = df_1.loc[:,['title', 'genres','id','release_date']]
df_1.head()

Unnamed: 0,title,genres,id,release_date
0,Toy Story,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",862,1995-10-30
1,Jumanji,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",8844,1995-12-15
2,Grumpier Old Men,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",15602,1995-12-22
3,Waiting to Exhale,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",31357,1995-12-22
4,Father of the Bride Part II,"[{'id': 35, 'name': 'Comedy'}]",11862,1995-02-10


In [27]:
## release date column converts into pandas date time

df_1['release_date'] = pd.to_datetime(df_1['release_date'], errors = 'coerce')
df_1['year'] = df_1['release_date'].dt.year


In [28]:
# abstract syntax trees
import ast 
df_1['genres'] = df_1['genres'].map(lambda x: ast.literal_eval(x))

In [29]:
df_1['genres']

0        [{'id': 16, 'name': 'Animation'}, {'id': 35, '...
1        [{'id': 12, 'name': 'Adventure'}, {'id': 14, '...
2        [{'id': 10749, 'name': 'Romance'}, {'id': 35, ...
3        [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...
4                           [{'id': 35, 'name': 'Comedy'}]
                               ...                        
45461    [{'id': 18, 'name': 'Drama'}, {'id': 10751, 'n...
45462                        [{'id': 18, 'name': 'Drama'}]
45463    [{'id': 28, 'name': 'Action'}, {'id': 18, 'nam...
45464                                                   []
45465                                                   []
Name: genres, Length: 45466, dtype: object

In [30]:
#from ast import literal_eval
#df_1['genres'] = df_1['genres'].apply(literal_eval)


In [31]:
df_1.head()

Unnamed: 0,title,genres,id,release_date,year
0,Toy Story,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",862,1995-10-30,1995.0
1,Jumanji,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",8844,1995-12-15,1995.0
2,Grumpier Old Men,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",15602,1995-12-22,1995.0
3,Waiting to Exhale,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",31357,1995-12-22,1995.0
4,Father of the Bride Part II,"[{'id': 35, 'name': 'Comedy'}]",11862,1995-02-10,1995.0


In [32]:
## Replacing text 
def make_genreslist(x):
    gen = []
    st = ''
    for i in x:
        if i.get('name') == 'Science Fiction':
            scifi = 'Sci-Fi'
            gen.append(scifi)
        else:
            gen.append(i.get('name'))
    if gen == []:
        return np.NaN
    else:
        return (st.join(gen))      
    

In [33]:
df_1['genres_list'] = df_1['genres'].map(lambda x: make_genreslist(x))

In [34]:
# Extracting 2017 year of movies data

new_meta = df_1.loc[df_1.year == 2017,['genres','id','title','year', 'genres_list']]
new_meta['id'] = new_meta['id'].astype(int)

In [36]:
# New dataframe
new_meta.head()

Unnamed: 0,genres,id,title,year,genres_list
26560,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",166426,Pirates of the Caribbean: Dead Men Tell No Tales,2017.0,AdventureActionFantasyComedy
26561,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",141052,Justice League,2017.0,ActionAdventureFantasySci-Fi
26565,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",284053,Thor: Ragnarok,2017.0,ActionAdventureFantasySci-Fi
26566,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",283995,Guardians of the Galaxy Vol. 2,2017.0,ActionAdventureComedySci-Fi
30536,"[{'id': 14, 'name': 'Fantasy'}, {'id': 28, 'na...",245842,The King's Daughter,2017.0,FantasyActionAdventure


Importing 3rd dataset 

In [41]:
## Importing dataset
df_credit = pd.read_csv(r'C:\Users\polep\OneDrive - Högskolan Dalarna\Desktop\Interview\portfolio_projects\Movies-Recommendation-System-App-main\credits.csv')
df_credit.head()

Unnamed: 0,cast,crew,id
0,"[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...",862
1,"[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de...",8844
2,"[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de...",15602
3,"[{'cast_id': 1, 'character': ""Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de...",31357
4,"[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de...",11862


In [42]:

# Apply literal evaluation to dataframe columns
df_credit['cast'] = df_credit['cast'].map(lambda x: ast.literal_eval(x))
df_credit['crew'] = df_credit['crew'].map(lambda x: ast.literal_eval(x))

# Extracts actor names
def actor1(x):
    casts = []
    for i in x:
        casts.append(i.get('name'))
    if casts == []:
        return np.NaN
    else:
        return (casts[0])

In [43]:
df_credit['actor_1'] = df_credit['cast'].map(lambda x: actor1(x))

In [44]:
# Extracts actor names
def actor2(x):
    casts = []
    for i in x:
        casts.append(i.get('name'))
    if casts == [] or len(casts)<=1:
        return np.NaN
    else:
        return (casts[1])

In [45]:
df_credit['actor_2'] = df_credit['crew'].map(lambda x: actor2(x))

In [46]:
# Extracts actor names
def actor3(x):
    casts = []
    for i in x:
        casts.append(i.get('name'))
    if casts == [] or len(casts)<=2:
        return np.NaN
    else:
        return (casts[2])

In [47]:
df_credit['actor_3'] = df_credit['crew'].map(lambda x: actor3(x))

In [48]:
# Extracts director names
def get_directors(x):
    dt = []
    st = " "
    for i in x:
        if i.get('job') == 'Director':
            dt.append(i.get('name'))
    if dt == []:
        return np.NaN
    else:
        return (st.join(dt))

In [49]:
df_credit['director_name'] = df_credit['crew'].map(lambda x: get_directors(x))

Combining Multiple datasets

In [50]:
# New dataframes
df_credit.head()


Unnamed: 0,cast,crew,id,actor_1,actor_2,actor_3,director_name
0,"[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...",862,Tom Hanks,Joss Whedon,Andrew Stanton,John Lasseter
1,"[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de...",8844,Robin Williams,Jonathan Hensleigh,James Horner,Joe Johnston
2,"[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de...",15602,Walter Matthau,Mark Steven Johnson,Mark Steven Johnson,Howard Deutch
3,"[{'cast_id': 1, 'character': 'Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de...",31357,Whitney Houston,Ronald Bass,Ronald Bass,Forest Whitaker
4,"[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de...",11862,Steve Martin,Elliot Davis,Nancy Meyers,Charles Shyer


In [51]:
new_meta.head()

Unnamed: 0,genres,id,title,year,genres_list
26560,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",166426,Pirates of the Caribbean: Dead Men Tell No Tales,2017.0,AdventureActionFantasyComedy
26561,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",141052,Justice League,2017.0,ActionAdventureFantasySci-Fi
26565,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",284053,Thor: Ragnarok,2017.0,ActionAdventureFantasySci-Fi
26566,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",283995,Guardians of the Galaxy Vol. 2,2017.0,ActionAdventureComedySci-Fi
30536,"[{'id': 14, 'name': 'Fantasy'}, {'id': 28, 'na...",245842,The King's Daughter,2017.0,FantasyActionAdventure


In [52]:
# Concatenate dataframes
data = pd.merge(df_credit, new_meta, on = 'id')
data.head()

Unnamed: 0,cast,crew,id,actor_1,actor_2,actor_3,director_name,genres,title,year,genres_list
0,"[{'cast_id': 1, 'character': 'Captain Jack Spa...","[{'credit_id': '52fe4c9cc3a36847f8236a65', 'de...",166426,Johnny Depp,Ted Elliott,Terry Rossio,Joachim Rønning Espen Sandberg,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",Pirates of the Caribbean: Dead Men Tell No Tales,2017.0,AdventureActionFantasyComedy
1,"[{'cast_id': 2, 'character': 'Bruce Wayne / Ba...","[{'credit_id': '55ef66dbc3a3686f1700a52d', 'de...",141052,Ben Affleck,Danny Elfman,Ben Affleck,Zack Snyder,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",Justice League,2017.0,ActionAdventureFantasySci-Fi
2,"[{'cast_id': 0, 'character': 'Thor Odinson', '...","[{'credit_id': '56a93fa4c3a36872db001e7a', 'de...",284053,Chris Hemsworth,Craig Kyle,Christopher Yost,Taika Waititi,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",Thor: Ragnarok,2017.0,ActionAdventureFantasySci-Fi
3,"[{'cast_id': 3, 'character': 'Peter Quill / St...","[{'credit_id': '59171547925141583c0315a6', 'de...",283995,Chris Pratt,Jason C. Lewis,Lora Hirschberg,James Gunn,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",Guardians of the Galaxy Vol. 2,2017.0,ActionAdventureComedySci-Fi
4,"[{'cast_id': 0, 'character': 'King Louis XIV',...","[{'credit_id': '5431de49c3a36825d300007e', 'de...",245842,Pierce Brosnan,Ronald Bass,Barry Berman,Sean McNamara,"[{'id': 14, 'name': 'Fantasy'}, {'id': 28, 'na...",The King's Daughter,2017.0,FantasyActionAdventure


In [53]:
# Extracting specific columns
main_data = data.loc[:, ['title', 'director_name', 'actor_1', 'actor_2', 'actor_3', 'genres_list']]
main_data['title'] = main_data['title'].str.lower()
main_data.head()

Unnamed: 0,title,director_name,actor_1,actor_2,actor_3,genres_list
0,pirates of the caribbean: dead men tell no tales,Joachim Rønning Espen Sandberg,Johnny Depp,Ted Elliott,Terry Rossio,AdventureActionFantasyComedy
1,justice league,Zack Snyder,Ben Affleck,Danny Elfman,Ben Affleck,ActionAdventureFantasySci-Fi
2,thor: ragnarok,Taika Waititi,Chris Hemsworth,Craig Kyle,Christopher Yost,ActionAdventureFantasySci-Fi
3,guardians of the galaxy vol. 2,James Gunn,Chris Pratt,Jason C. Lewis,Lora Hirschberg,ActionAdventureComedySci-Fi
4,the king's daughter,Sean McNamara,Pierce Brosnan,Ronald Bass,Barry Berman,FantasyActionAdventure


In [54]:
main_data['combine_data'] = main_data['director_name'] + ' '+ main_data['actor_1'] + ' '+ main_data['actor_2'] + ' '+ main_data['actor_3'] + ' '+ main_data['genres_list']

main_data.head()

Unnamed: 0,title,director_name,actor_1,actor_2,actor_3,genres_list,combine_data
0,pirates of the caribbean: dead men tell no tales,Joachim Rønning Espen Sandberg,Johnny Depp,Ted Elliott,Terry Rossio,AdventureActionFantasyComedy,Joachim Rønning Espen Sandberg Johnny Depp Ted...
1,justice league,Zack Snyder,Ben Affleck,Danny Elfman,Ben Affleck,ActionAdventureFantasySci-Fi,Zack Snyder Ben Affleck Danny Elfman Ben Affle...
2,thor: ragnarok,Taika Waititi,Chris Hemsworth,Craig Kyle,Christopher Yost,ActionAdventureFantasySci-Fi,Taika Waititi Chris Hemsworth Craig Kyle Chris...
3,guardians of the galaxy vol. 2,James Gunn,Chris Pratt,Jason C. Lewis,Lora Hirschberg,ActionAdventureComedySci-Fi,James Gunn Chris Pratt Jason C. Lewis Lora Hir...
4,the king's daughter,Sean McNamara,Pierce Brosnan,Ronald Bass,Barry Berman,FantasyActionAdventure,Sean McNamara Pierce Brosnan Ronald Bass Barry...


In [55]:
# Replace columns names
main_data = main_data.rename(columns = {'title': 'movie_title'})
main_data = main_data.rename(columns = {'genres_list': 'genres'})
main_data = main_data.rename(columns = {'actor_1': 'actor_1_name'})
main_data = main_data.rename(columns = {'actor_2': 'actor_2_name'})
main_data = main_data.rename(columns = {'actor_3': 'actor_3_name'})

main_data.dropna(inplace = True)
main_data.isnull().sum()

movie_title      0
director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
genres           0
combine_data     0
dtype: int64

In [56]:
main_data.head()



Unnamed: 0,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,genres,combine_data
0,pirates of the caribbean: dead men tell no tales,Joachim Rønning Espen Sandberg,Johnny Depp,Ted Elliott,Terry Rossio,AdventureActionFantasyComedy,Joachim Rønning Espen Sandberg Johnny Depp Ted...
1,justice league,Zack Snyder,Ben Affleck,Danny Elfman,Ben Affleck,ActionAdventureFantasySci-Fi,Zack Snyder Ben Affleck Danny Elfman Ben Affle...
2,thor: ragnarok,Taika Waititi,Chris Hemsworth,Craig Kyle,Christopher Yost,ActionAdventureFantasySci-Fi,Taika Waititi Chris Hemsworth Craig Kyle Chris...
3,guardians of the galaxy vol. 2,James Gunn,Chris Pratt,Jason C. Lewis,Lora Hirschberg,ActionAdventureComedySci-Fi,James Gunn Chris Pratt Jason C. Lewis Lora Hir...
4,the king's daughter,Sean McNamara,Pierce Brosnan,Ronald Bass,Barry Berman,FantasyActionAdventure,Sean McNamara Pierce Brosnan Ronald Bass Barry...


In [57]:
df['combine_data']  = df['director_name'] + ' '+ df['actor_1_name'] + ' '+ df['actor_2_name'] + ' '+ df['actor_3_name'] + ' '+ df['genres']

df.head()

Unnamed: 0,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,genres,combine_data
0,avatar,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action Adventure Fantasy Sci-Fi,James Cameron CCH Pounder Joel David Moore Wes...
1,pirates of the caribbean: at world's end,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action Adventure Fantasy,Gore Verbinski Johnny Depp Orlando Bloom Jack ...
2,spectre,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action Adventure Thriller,Sam Mendes Christoph Waltz Rory Kinnear Stepha...
3,the dark knight rises,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action Thriller,Christopher Nolan Tom Hardy Christian Bale Jos...
5,john carter,Andrew Stanton,Daryl Sabara,Samantha Morton,Polly Walker,Action Adventure Sci-Fi,Andrew Stanton Daryl Sabara Samantha Morton Po...


In [58]:
df.isnull().sum()
main_data.isnull().sum()


movie_title      0
director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
genres           0
combine_data     0
dtype: int64

In [59]:
new_data = df.append(main_data)
new_data.drop_duplicates(subset = 'movie_title', keep = 'last', inplace = True)

In [60]:
new_data

Unnamed: 0,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,genres,combine_data
0,avatar,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action Adventure Fantasy Sci-Fi,James Cameron CCH Pounder Joel David Moore Wes...
1,pirates of the caribbean: at world's end,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action Adventure Fantasy,Gore Verbinski Johnny Depp Orlando Bloom Jack ...
2,spectre,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action Adventure Thriller,Sam Mendes Christoph Waltz Rory Kinnear Stepha...
3,the dark knight rises,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action Thriller,Christopher Nolan Tom Hardy Christian Bale Jos...
5,john carter,Andrew Stanton,Daryl Sabara,Samantha Morton,Polly Walker,Action Adventure Sci-Fi,Andrew Stanton Daryl Sabara Samantha Morton Po...
...,...,...,...,...,...,...,...
522,take me,Pat Healy,Taylor Schilling,Mike Makowsky,Mel Eslyn,ComedyCrime,Pat Healy Taylor Schilling Mike Makowsky Mel E...
523,phillauri,Anshai Lal,Anushka Sharma,Anvita Dutt,Anushka Sharma,FantasyComedyRomanceDrama,Anshai Lal Anushka Sharma Anvita Dutt Anushka ...
524,the incredible jessica james,Jim Strouse,Jessica Williams,Jim Strouse,Amanda Ford,RomanceComedy,Jim Strouse Jessica Williams Jim Strouse Amand...
527,cop and a half: new recruit,Jonathan A. Rosenbaum,Lou Diamond Phillips,Jonathan A. Rosenbaum,Adrian Vina,CrimeComedyActionFamily,Jonathan A. Rosenbaum Lou Diamond Phillips Jon...


In [116]:
new_data.isnull().sum()
new_data.to_csv('data.csv',index = False)

# Model Building for movie Recommended System

In [61]:
# prediction

new_data['movie_title'].head()

0                                      avatar
1    pirates of the caribbean: at world's end
2                                     spectre
3                       the dark knight rises
5                                 john carter
Name: movie_title, dtype: object

In [62]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [65]:
cv = CountVectorizer()
count_matrix = cv.fit_transform(new_data['combine_data'])


# Cosine similarity is a metric used to determine how similar two entities are irrespective of their size. 
# Mathematically, it measures the cosine of the angle between two vectors projected in a multi-dimensional space.
#similarity score matrix
sim = cosine_similarity(count_matrix)
sim

array([[1.        , 0.24174689, 0.16116459, ..., 0.        , 0.        ,
        0.        ],
       [0.24174689, 1.        , 0.18181818, ..., 0.        , 0.        ,
        0.        ],
       [0.16116459, 0.18181818, 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [63]:
# Checking users input in database
movie = input('enter the name of the movie name ')
movie = movie.lower()
movie in new_data['movie_title'].unique()

enter the name of the movie name avatar


True

In [66]:
i = new_data.loc[new_data['movie_title'] == movie].index[0]
lst = enumerate(sim[i])
lst = sorted(lst, key = lambda x:x[1], reverse = True)
lst[1:11]

[(2333, 0.4629100498862758),
 (585, 0.44474958999666075),
 (1051, 0.44474958999666075),
 (1594, 0.41403933560541256),
 (256, 0.4029114820126901),
 (3375, 0.4029114820126901),
 (621, 0.40089186286863654),
 (32, 0.38575837490522985),
 (51, 0.38575837490522985),
 (83, 0.38575837490522985)]

In [67]:
def movie_recommend(movie):
    movie = movie.lower()
    if movie not in new_data['movie_title'].unique():
        return('This movie is not in our database.\nPlease check if you spelled it correct.')
    else:
        # getting the index of the movie in the dataframe
        i = new_data.loc[new_data['movie_title']== movie].index[0]

        # fetching the row containing similarity scores of the movie
        # from similarity matrix and enumerate it
        lst = list(enumerate(sim[i]))

        # sorting this list in decreasing order based on the similarity score
        lst = sorted(lst, key = lambda x:x[1] ,reverse=True)

        # taking top 1- movie scores
        # not taking the first index since it is the same movie
        lst = lst[1:11]

        # making an empty list that will containg all 10 movie recommendations
        l = []
        for i in range(len(lst)):
            a = lst[i][0]
            l.append(new_data['movie_title'][a])
            
        for i in range(len(l)):
            print(lst[i])
    

In [68]:
# Model recommended to users inputs
movie_recommend('avatar')

(2333, 0.4629100498862758)
(585, 0.44474958999666075)
(1051, 0.44474958999666075)
(1594, 0.41403933560541256)
(256, 0.4029114820126901)
(3375, 0.4029114820126901)
(621, 0.40089186286863654)
(32, 0.38575837490522985)
(51, 0.38575837490522985)
(83, 0.38575837490522985)


In [70]:
new_data['movie_title'][2333]

'by the sea'

In [71]:
new_data.index

def find_index_from_title(title):
    return new_data[new_data['movie_title'] == title].index.values[0]

def find_title_from_index(index):
    return new_data[new_data.index == index]["movie_title"].values[0]

In [73]:
# the movies recommended list.

#find_index_from_title('avatar')
find_title_from_index(0)

i=0
for element in lst:
    print(find_title_from_index(element[0]))
    i=i+1
    if i>10:
        break


avatar
by the sea
xxx
the cotton club
lee daniels' the butler
insurgent
on her majesty's secret service
ballistic: ecks vs. sever
iron man 3
prince of persia: the sands of time
dawn of the planet of the apes
