# Movie Recommendation System

## Import Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
pd.pandas.set_option("display.max_columns",None) 
pd.set_option('mode.chained_assignment', None)

## Load Dataset

In [4]:
movie_metadata=pd.read_csv('movie_metadata.csv')
movie_metadata.head()

Unnamed: 0,color,director_name,num_critic_for_reviews,duration,director_facebook_likes,actor_3_facebook_likes,actor_2_name,actor_1_facebook_likes,gross,genres,actor_1_name,movie_title,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,content_rating,budget,title_year,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
0,Color,James Cameron,723.0,178.0,0.0,855.0,Joel David Moore,1000.0,760505847.0,Action|Adventure|Fantasy|Sci-Fi,CCH Pounder,Avatar,886204,4834,Wes Studi,0.0,avatar|future|marine|native|paraplegic,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,3054.0,English,USA,PG-13,237000000.0,2009.0,936.0,7.9,1.78,33000
1,Color,Gore Verbinski,302.0,169.0,563.0,1000.0,Orlando Bloom,40000.0,309404152.0,Action|Adventure|Fantasy,Johnny Depp,Pirates of the Caribbean: At World's End,471220,48350,Jack Davenport,0.0,goddess|marriage ceremony|marriage proposal|pi...,http://www.imdb.com/title/tt0449088/?ref_=fn_t...,1238.0,English,USA,PG-13,300000000.0,2007.0,5000.0,7.1,2.35,0
2,Color,Sam Mendes,602.0,148.0,0.0,161.0,Rory Kinnear,11000.0,200074175.0,Action|Adventure|Thriller,Christoph Waltz,Spectre,275868,11700,Stephanie Sigman,1.0,bomb|espionage|sequel|spy|terrorist,http://www.imdb.com/title/tt2379713/?ref_=fn_t...,994.0,English,UK,PG-13,245000000.0,2015.0,393.0,6.8,2.35,85000
3,Color,Christopher Nolan,813.0,164.0,22000.0,23000.0,Christian Bale,27000.0,448130642.0,Action|Thriller,Tom Hardy,The Dark Knight Rises,1144337,106759,Joseph Gordon-Levitt,0.0,deception|imprisonment|lawlessness|police offi...,http://www.imdb.com/title/tt1345836/?ref_=fn_t...,2701.0,English,USA,PG-13,250000000.0,2012.0,23000.0,8.5,2.35,164000
4,,Doug Walker,,,131.0,,Rob Walker,131.0,,Documentary,Doug Walker,Star Wars: Episode VII - The Force Awakens ...,8,143,,0.0,,http://www.imdb.com/title/tt5289954/?ref_=fn_t...,,,,,,,12.0,7.1,,0


In [5]:
movie_metadata.shape

(5043, 28)

## Exploratory Data Analysis

In [6]:
movie_metadata.columns

Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',
       'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
       'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
       'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
       'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
       'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
       'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
       'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],
      dtype='object')

In [7]:
data=movie_metadata[['movie_title','director_name','actor_1_name','actor_2_name','actor_3_name','genres','plot_keywords','language','country']]

In [8]:
data.head()

Unnamed: 0,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,genres,plot_keywords,language,country
0,Avatar,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action|Adventure|Fantasy|Sci-Fi,avatar|future|marine|native|paraplegic,English,USA
1,Pirates of the Caribbean: At World's End,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action|Adventure|Fantasy,goddess|marriage ceremony|marriage proposal|pi...,English,USA
2,Spectre,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action|Adventure|Thriller,bomb|espionage|sequel|spy|terrorist,English,UK
3,The Dark Knight Rises,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action|Thriller,deception|imprisonment|lawlessness|police offi...,English,USA
4,Star Wars: Episode VII - The Force Awakens ...,Doug Walker,Doug Walker,Rob Walker,,Documentary,,,


In [9]:
data.shape

(5043, 9)

In [10]:
data.isnull().sum()

movie_title        0
director_name    104
actor_1_name       7
actor_2_name      13
actor_3_name      23
genres             0
plot_keywords    153
language          12
country            5
dtype: int64

## Feature Engineering

In [11]:
data['genres']=data['genres'].str.replace('|',' ')

In [12]:
data['plot_keywords']=data['plot_keywords'].str.replace('|',' ')

In [13]:
data.head()

Unnamed: 0,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,genres,plot_keywords,language,country
0,Avatar,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action Adventure Fantasy Sci-Fi,avatar future marine native paraplegic,English,USA
1,Pirates of the Caribbean: At World's End,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action Adventure Fantasy,goddess marriage ceremony marriage proposal pi...,English,USA
2,Spectre,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action Adventure Thriller,bomb espionage sequel spy terrorist,English,UK
3,The Dark Knight Rises,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action Thriller,deception imprisonment lawlessness police offi...,English,USA
4,Star Wars: Episode VII - The Force Awakens ...,Doug Walker,Doug Walker,Rob Walker,,Documentary,,,


In [14]:
missing_features = [feature for feature in data.columns if data[feature].isnull().sum()>0]
missing_features

['director_name',
 'actor_1_name',
 'actor_2_name',
 'actor_3_name',
 'plot_keywords',
 'language',
 'country']

In [15]:
for feature in missing_features:
    data[feature]=data[feature].fillna('Missing')

In [16]:
data.isnull().sum()

movie_title      0
director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
genres           0
plot_keywords    0
language         0
country          0
dtype: int64

In [17]:
data['combined']=data['director_name']+','+data['actor_1_name']+','+data['actor_2_name']+','+data['actor_3_name']+','+data['genres']+','+data['plot_keywords']+','+data['language']+','+data['country']

In [18]:
data.head()

Unnamed: 0,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,genres,plot_keywords,language,country,combined
0,Avatar,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action Adventure Fantasy Sci-Fi,avatar future marine native paraplegic,English,USA,"James Cameron,CCH Pounder,Joel David Moore,Wes..."
1,Pirates of the Caribbean: At World's End,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action Adventure Fantasy,goddess marriage ceremony marriage proposal pi...,English,USA,"Gore Verbinski,Johnny Depp,Orlando Bloom,Jack ..."
2,Spectre,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action Adventure Thriller,bomb espionage sequel spy terrorist,English,UK,"Sam Mendes,Christoph Waltz,Rory Kinnear,Stepha..."
3,The Dark Knight Rises,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action Thriller,deception imprisonment lawlessness police offi...,English,USA,"Christopher Nolan,Tom Hardy,Christian Bale,Jos..."
4,Star Wars: Episode VII - The Force Awakens ...,Doug Walker,Doug Walker,Rob Walker,Missing,Documentary,Missing,Missing,Missing,"Doug Walker,Doug Walker,Rob Walker,Missing,Doc..."


In [19]:
data_processed=data[['movie_title','combined']]

In [20]:
data_processed.head()

Unnamed: 0,movie_title,combined
0,Avatar,"James Cameron,CCH Pounder,Joel David Moore,Wes..."
1,Pirates of the Caribbean: At World's End,"Gore Verbinski,Johnny Depp,Orlando Bloom,Jack ..."
2,Spectre,"Sam Mendes,Christoph Waltz,Rory Kinnear,Stepha..."
3,The Dark Knight Rises,"Christopher Nolan,Tom Hardy,Christian Bale,Jos..."
4,Star Wars: Episode VII - The Force Awakens ...,"Doug Walker,Doug Walker,Rob Walker,Missing,Doc..."


In [21]:
data_processed.drop_duplicates(subset='movie_title',keep='last',inplace=True)

In [22]:
data_processed.shape

(4917, 2)

In [38]:
data_processed['movie_title']=data_processed['movie_title'].apply(lambda x:x[:-1])

In [96]:
data_processed['movie_title']=data_processed['movie_title'].str.lower()

In [97]:
data_processed

Unnamed: 0,movie_title,combined
0,avatar,"James Cameron,CCH Pounder,Joel David Moore,Wes..."
1,pirates of the caribbean: at world's end,"Gore Verbinski,Johnny Depp,Orlando Bloom,Jack ..."
2,spectre,"Sam Mendes,Christoph Waltz,Rory Kinnear,Stepha..."
3,the dark knight rises,"Christopher Nolan,Tom Hardy,Christian Bale,Jos..."
4,star wars: episode vii - the force awakens ...,"Doug Walker,Doug Walker,Rob Walker,Missing,Doc..."
...,...,...
5038,signed sealed delivered,"Scott Smith,Eric Mabius,Daphne Zuniga,Crystal ..."
5039,the following,"Missing,Natalie Zea,Valorie Curry,Sam Underwoo..."
5040,a plague so pleasant,"Benjamin Roberds,Eva Boehnke,Maxwell Moody,Dav..."
5041,shanghai calling,"Daniel Hsia,Alan Ruck,Daniel Henney,Eliza Coup..."


In [98]:
data_processed.to_csv('data.csv',index=False)