# Data Cleaning and Preprocessing
Link for the dataset: https://www.kaggle.com/datasets/rounakbanik/the-movies-dataset

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json
import ast


In [2]:
metadata = pd.read_csv("/Users/user/Downloads/AshokaUniversity/monsoon23-courses/IML/final_project/dataset/movielens_kaggle/movies_metadata.csv")
credits = pd.read_csv("/Users/user/Downloads/AshokaUniversity/monsoon23-courses/IML/final_project/dataset/movielens_kaggle/credits.csv")

  metadata = pd.read_csv("/Users/user/Downloads/AshokaUniversity/monsoon23-courses/IML/final_project/dataset/movielens_kaggle/movies_metadata.csv")


In [3]:
print(metadata.isnull().sum()) # null values in each column
metadata = metadata.drop(['belongs_to_collection', 'homepage', 'tagline', 'overview', 'status', 'video', 'poster_path'], axis=1) # dropping features with lots of null values and some irrelevant features


adult                        0
belongs_to_collection    40972
budget                       0
genres                       0
homepage                 37684
id                           0
imdb_id                     17
original_language           11
original_title               0
overview                   954
popularity                   5
poster_path                386
production_companies         3
production_countries         3
release_date                87
revenue                      6
runtime                    263
spoken_languages             6
status                      87
tagline                  25054
title                        6
video                        6
vote_average                 6
vote_count                   6
dtype: int64


In [4]:
metadata.dtypes

adult                    object
budget                   object
genres                   object
id                       object
imdb_id                  object
original_language        object
original_title           object
popularity               object
production_companies     object
production_countries     object
release_date             object
revenue                 float64
runtime                 float64
spoken_languages         object
title                    object
vote_average            float64
vote_count              float64
dtype: object

In [5]:
metadata.head(2)

Unnamed: 0,adult,budget,genres,id,imdb_id,original_language,original_title,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,title,vote_average,vote_count
0,False,30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",862,tt0114709,en,Toy Story,21.946943,"[{'name': 'Pixar Animation Studios', 'id': 3}]","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Toy Story,7.7,5415.0
1,False,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",8844,tt0113497,en,Jumanji,17.015539,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Jumanji,6.9,2413.0


In [6]:
metadata['budget'] = pd.to_numeric(metadata['budget'], errors='coerce')
metadata['popularity'] = pd.to_numeric(metadata['popularity'], errors='coerce')
metadata['runtime'] = pd.to_numeric(metadata['runtime'], errors='coerce')
# we will drop the rows that have a revenue of less than 10000 or a budget of less than 10000 or a runtime of less than 30 to weed out some extreme entries and improve data quality
metadata = metadata.drop(metadata[metadata.revenue < 10000].index)
metadata = metadata.drop(metadata[metadata.budget < 10000].index)
metadata = metadata.drop(metadata[metadata.runtime < 30].index)


In [7]:
metadata.describe()

Unnamed: 0,budget,popularity,revenue,runtime,vote_average,vote_count
count,5269.0,5269.0,5269.0,5268.0,5269.0,5269.0
mean,31702230.0,9.912472,92187750.0,110.267654,6.283963,746.356614
std,40362420.0,13.923883,167389600.0,21.458295,0.910063,1260.85817
min,10000.0,0.000657,10000.0,57.0,0.0,0.0
25%,6000000.0,5.600277,7938872.0,95.0,5.7,83.0
50%,17500000.0,8.471302,31083600.0,106.0,6.3,287.0
75%,40000000.0,11.807601,101564900.0,120.0,6.9,827.0
max,380000000.0,547.488298,2787965000.0,338.0,9.1,14075.0


In [8]:
metadata = metadata.dropna(inplace=False)
metadata.reset_index(drop=True, inplace=True)


In [9]:
metadata.shape

(5268, 17)

In [10]:
# here we make a new column for release year and release month with the appropriate data types
metadata['release_year'] = pd.DatetimeIndex(metadata['release_date']).year
metadata['release_month'] = pd.DatetimeIndex(metadata['release_date']).month
metadata['release_year'] = metadata['release_year'].astype('int64')
metadata['release_month'] = metadata['release_month'].astype('int64')
metadata.dtypes

adult                    object
budget                  float64
genres                   object
id                       object
imdb_id                  object
original_language        object
original_title           object
popularity              float64
production_companies     object
production_countries     object
release_date             object
revenue                 float64
runtime                 float64
spoken_languages         object
title                    object
vote_average            float64
vote_count              float64
release_year              int64
release_month             int64
dtype: object

In [11]:
metadata.describe()

Unnamed: 0,budget,popularity,revenue,runtime,vote_average,vote_count,release_year,release_month
count,5268.0,5268.0,5268.0,5268.0,5268.0,5268.0,5268.0,5268.0
mean,31707110.0,9.914298,92205240.0,110.267654,6.284966,746.498102,1999.76139,6.941344
std,40364690.0,13.924574,167400700.0,21.458295,0.907232,1260.936026,15.834433,3.381981
min,10000.0,0.000657,10000.0,57.0,0.0,0.0,1915.0,1.0
25%,6000000.0,5.601664,7954186.0,95.0,5.7,83.0,1994.0,4.0
50%,17500000.0,8.472335,31115960.0,106.0,6.3,287.0,2004.0,7.0
75%,40000000.0,11.808118,101572500.0,120.0,6.9,827.5,2011.0,10.0
max,380000000.0,547.488298,2787965000.0,338.0,9.1,14075.0,2017.0,12.0


In [12]:
metadata = metadata.drop(metadata[metadata.release_year < 1950].index) # movies have changed over time so we will only consider movies from 1950 onwards
metadata.reset_index(drop=True, inplace=True)
metadata.describe()

Unnamed: 0,budget,popularity,revenue,runtime,vote_average,vote_count,release_year,release_month
count,5170.0,5170.0,5170.0,5170.0,5170.0,5170.0,5170.0,5170.0
mean,32265380.0,9.994889,93653130.0,110.32205,6.273598,756.152611,2000.949903,6.929207
std,40519420.0,14.028914,168495900.0,21.320976,0.902638,1269.59176,13.349289,3.375686
min,10000.0,0.000657,10000.0,62.0,0.0,0.0,1950.0,1.0
25%,6000000.0,5.703605,8350294.0,96.0,5.7,86.0,1995.0,4.0
50%,18000000.0,8.546266,32483050.0,106.0,6.3,290.5,2005.0,7.0
75%,40000000.0,11.840878,103025700.0,120.0,6.9,841.0,2011.0,10.0
max,380000000.0,547.488298,2787965000.0,338.0,9.1,14075.0,2017.0,12.0


In [13]:
metadata.head(2)

Unnamed: 0,adult,budget,genres,id,imdb_id,original_language,original_title,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,title,vote_average,vote_count,release_year,release_month
0,False,30000000.0,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",862,tt0114709,en,Toy Story,21.946943,"[{'name': 'Pixar Animation Studios', 'id': 3}]","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Toy Story,7.7,5415.0,1995,10
1,False,65000000.0,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",8844,tt0113497,en,Jumanji,17.015539,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Jumanji,6.9,2413.0,1995,12


In [14]:
"""
 # the dataset had a json-like format but not with json double quotes 
 so I couldn't use json so instead I saw on the kaggle site with the dataset that it needed to be parsed using ast.literal_eval
"""

# first we replace null values with empty string
# then we parse
metadata['genre_list'] = metadata['genres'].fillna('[]').apply(ast.literal_eval)
metadata['genre_list'] = metadata['genre_list'].apply(lambda x: [i['name'] for i in x]) # take out the values corresponding to the names

metadata['production_companies_list'] = metadata['production_companies'].fillna('[]').apply(ast.literal_eval)
metadata['production_companies_list'] = metadata['production_companies_list'].apply(lambda x: [i['name'] for i in x])

metadata['production_countries_list'] = metadata['production_countries'].fillna('[]').apply(ast.literal_eval)
metadata['production_countries_list'] = metadata['production_countries_list'].apply(lambda x: [i['name'] for i in x])

metadata['spoken_languages_list'] = metadata['spoken_languages'].fillna('[]').apply(ast.literal_eval)
metadata['spoken_languages_list'] = metadata['spoken_languages_list'].apply(lambda x: [i['name'] for i in x])

metadata.head(2)

Unnamed: 0,adult,budget,genres,id,imdb_id,original_language,original_title,popularity,production_companies,production_countries,...,spoken_languages,title,vote_average,vote_count,release_year,release_month,genre_list,production_companies_list,production_countries_list,spoken_languages_list
0,False,30000000.0,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",862,tt0114709,en,Toy Story,21.946943,"[{'name': 'Pixar Animation Studios', 'id': 3}]","[{'iso_3166_1': 'US', 'name': 'United States o...",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Toy Story,7.7,5415.0,1995,10,"[Animation, Comedy, Family]",[Pixar Animation Studios],[United States of America],[English]
1,False,65000000.0,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",8844,tt0113497,en,Jumanji,17.015539,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",...,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Jumanji,6.9,2413.0,1995,12,"[Adventure, Fantasy, Family]","[TriStar Pictures, Teitler Film, Interscope Co...",[United States of America],"[English, Français]"


In [15]:
# we pick the first value from each list for now to simplify our analysis
metadata['genre'] = metadata['genre_list'].str[0]
metadata['production_company'] = metadata['production_companies_list'].str[0]
metadata['production_country'] = metadata['production_countries_list'].str[0]
metadata['spoken_language'] = metadata['spoken_languages_list'].str[0]
metadata.shape


(5170, 27)

In [16]:
metadata.drop(['genres', 'production_companies', 'production_countries', 'spoken_languages', 'genre_list', 'production_companies_list', 'production_countries_list', 'spoken_languages_list', 'release_date'], axis=1, inplace=True)
metadata.head(2)

Unnamed: 0,adult,budget,id,imdb_id,original_language,original_title,popularity,revenue,runtime,title,vote_average,vote_count,release_year,release_month,genre,production_company,production_country,spoken_language
0,False,30000000.0,862,tt0114709,en,Toy Story,21.946943,373554033.0,81.0,Toy Story,7.7,5415.0,1995,10,Animation,Pixar Animation Studios,United States of America,English
1,False,65000000.0,8844,tt0113497,en,Jumanji,17.015539,262797249.0,104.0,Jumanji,6.9,2413.0,1995,12,Adventure,TriStar Pictures,United States of America,English


In [17]:
metadata.describe()


Unnamed: 0,budget,popularity,revenue,runtime,vote_average,vote_count,release_year,release_month
count,5170.0,5170.0,5170.0,5170.0,5170.0,5170.0,5170.0,5170.0
mean,32265380.0,9.994889,93653130.0,110.32205,6.273598,756.152611,2000.949903,6.929207
std,40519420.0,14.028914,168495900.0,21.320976,0.902638,1269.59176,13.349289,3.375686
min,10000.0,0.000657,10000.0,62.0,0.0,0.0,1950.0,1.0
25%,6000000.0,5.703605,8350294.0,96.0,5.7,86.0,1995.0,4.0
50%,18000000.0,8.546266,32483050.0,106.0,6.3,290.5,2005.0,7.0
75%,40000000.0,11.840878,103025700.0,120.0,6.9,841.0,2011.0,10.0
max,380000000.0,547.488298,2787965000.0,338.0,9.1,14075.0,2017.0,12.0


In [18]:
metadata = metadata.sort_values(by=['revenue'], ascending=False)
metadata.head(5)

Unnamed: 0,adult,budget,id,imdb_id,original_language,original_title,popularity,revenue,runtime,title,vote_average,vote_count,release_year,release_month,genre,production_company,production_country,spoken_language
3357,False,237000000.0,19995,tt0499549,en,Avatar,185.070892,2787965000.0,162.0,Avatar,7.2,12114.0,2009,12,Action,Ingenious Film Partners,United States of America,English
4466,False,245000000.0,140607,tt2488496,en,Star Wars: The Force Awakens,31.626013,2068224000.0,136.0,Star Wars: The Force Awakens,7.5,7993.0,2015,12,Action,Lucasfilm,United States of America,English
561,False,200000000.0,597,tt0120338,en,Titanic,26.88907,1845034000.0,194.0,Titanic,7.5,7770.0,1997,11,Drama,Paramount Pictures,United States of America,English
3731,False,220000000.0,24428,tt0848228,en,The Avengers,89.887648,1519558000.0,143.0,The Avengers,7.4,12000.0,2012,4,Science Fiction,Paramount Pictures,United States of America,English
4411,False,150000000.0,135397,tt0369610,en,Jurassic World,32.790475,1513529000.0,124.0,Jurassic World,6.5,8842.0,2015,6,Action,Universal Studios,United States of America,English


In [19]:
metadata['production_country'].value_counts()

production_country
United States of America    3302
United Kingdom               376
France                       214
Canada                       185
Germany                      177
                            ... 
Malta                          1
Uruguay                        1
Bahamas                        1
Malaysia                       1
Mali                           1
Name: count, Length: 61, dtype: int64

We can see that some production countries are appearing very few times in the dataset, so to improve quality we drop entries having a production country that appears less than 5 times in our dataset

In [20]:
country_list = []
country_count = []
metadata['production_country'].value_counts()
for i in range(len(metadata['production_country'].value_counts())): 
    if metadata['production_country'].value_counts()[i] < 5:
        country_list.append(metadata['production_country'].value_counts().index[i])
        country_count.append(metadata['production_country'].value_counts()[i])

len(country_list) ,sum(country_count)
metadata.shape

(5170, 18)

In [21]:
print(country_list), print(country_count)

['Bulgaria', 'Greece', 'Chile', 'Indonesia', 'Poland', 'Serbia', 'Luxembourg', 'Ukraine', 'Iceland', 'Singapore', 'Taiwan', 'Namibia', 'Burkina Faso', 'Ecuador', 'Iran', 'Pakistan', 'Qatar', 'Cambodia', 'Algeria', 'Peru', 'Malta', 'Uruguay', 'Bahamas', 'Malaysia', 'Mali']
[4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


(None, None)

In [22]:
for i in range(len(country_list)):
    metadata.drop(metadata[metadata['production_country'] == country_list[i]].index, inplace = True)
metadata.reset_index(drop=True, inplace=True)
metadata.describe()

Unnamed: 0,budget,popularity,revenue,runtime,vote_average,vote_count,release_year,release_month
count,5127.0,5127.0,5127.0,5127.0,5127.0,5127.0,5127.0,5127.0
mean,32389350.0,10.019311,94127270.0,110.331188,6.273279,759.215331,2000.921006,6.929783
std,40596520.0,14.077739,169034600.0,21.350089,0.901414,1273.680569,13.343455,3.378304
min,10000.0,0.000657,10000.0,62.0,0.0,0.0,1950.0,1.0
25%,6000000.0,5.719902,8496945.0,96.0,5.7,86.0,1995.0,4.0
50%,18000000.0,8.559189,32980880.0,106.0,6.3,292.0,2005.0,7.0
75%,40000000.0,11.844568,103402700.0,120.0,6.9,842.5,2011.0,10.0
max,380000000.0,547.488298,2787965000.0,338.0,9.1,14075.0,2017.0,12.0


In [23]:
metadata.head(2)

Unnamed: 0,adult,budget,id,imdb_id,original_language,original_title,popularity,revenue,runtime,title,vote_average,vote_count,release_year,release_month,genre,production_company,production_country,spoken_language
0,False,237000000.0,19995,tt0499549,en,Avatar,185.070892,2787965000.0,162.0,Avatar,7.2,12114.0,2009,12,Action,Ingenious Film Partners,United States of America,English
1,False,245000000.0,140607,tt2488496,en,Star Wars: The Force Awakens,31.626013,2068224000.0,136.0,Star Wars: The Force Awakens,7.5,7993.0,2015,12,Action,Lucasfilm,United States of America,English


In [24]:
metadata['production_country'].value_counts()

production_country
United States of America    3302
United Kingdom               376
France                       214
Canada                       185
Germany                      177
India                        165
Australia                     97
Russia                        69
Japan                         61
Italy                         51
China                         49
Spain                         42
Ireland                       28
Hong Kong                     25
Belgium                       22
South Korea                   21
Mexico                        20
New Zealand                   20
Denmark                       17
Czech Republic                17
Netherlands                   14
Brazil                        10
Switzerland                    9
Finland                        9
Austria                        9
Argentina                      8
Sweden                         8
South Africa                   7
Norway                         7
United Arab Emirates    

### Joining metadata with credits and then picking the lead actor/actress and the director

In [25]:
metadata['id'] = metadata['id'].astype('int64')
main_data_merged = pd.merge(metadata, credits, on='id', how='inner') 
main_data_merged.drop_duplicates(inplace=True)
main_data_merged.head(2)

Unnamed: 0,adult,budget,id,imdb_id,original_language,original_title,popularity,revenue,runtime,title,vote_average,vote_count,release_year,release_month,genre,production_company,production_country,spoken_language,cast,crew
0,False,237000000.0,19995,tt0499549,en,Avatar,185.070892,2787965000.0,162.0,Avatar,7.2,12114.0,2009,12,Action,Ingenious Film Partners,United States of America,English,"[{'cast_id': 242, 'character': 'Jake Sully', '...","[{'credit_id': '52fe48009251416c750aca23', 'de..."
1,False,245000000.0,140607,tt2488496,en,Star Wars: The Force Awakens,31.626013,2068224000.0,136.0,Star Wars: The Force Awakens,7.5,7993.0,2015,12,Action,Lucasfilm,United States of America,English,"[{'cast_id': 73, 'character': 'Rey', 'credit_i...","[{'credit_id': '52fe4a959251416c750e7123', 'de..."


In [26]:
main_data_merged['main_cast'] =main_data_merged['cast'].fillna('[]').apply(ast.literal_eval)
main_data_merged['main_cast'] =main_data_merged['main_cast'].apply(lambda x: [i['name'] for i in x])
main_data_merged['main_cast'] =main_data_merged['main_cast'].str[0]

In [27]:
main_data_merged['main_cast_id'] =main_data_merged['cast'].fillna('[]').apply(ast.literal_eval)
main_data_merged['main_cast_id'] =main_data_merged['main_cast_id'].apply(lambda x: [i['id'] for i in x])
main_data_merged['main_cast_id'] =main_data_merged['main_cast_id'].str[0]

In [28]:
main_data_merged['director'] = main_data_merged['crew'].fillna('[]').apply(ast.literal_eval)
main_data_merged['director'] = main_data_merged['director'].apply(lambda x: [i['name'] for i in x if i['job'] == 'Director'])
main_data_merged['director'] = main_data_merged['director'].str[0]

In [29]:
main_data_merged.drop(['cast', 'crew', 'adult'], axis=1, inplace=True)
main_data_merged.drop_duplicates(subset='id', inplace=True)
main_data_merged.reset_index(drop=True, inplace=True)
main_data_merged.describe()

Unnamed: 0,budget,id,popularity,revenue,runtime,vote_average,vote_count,release_year,release_month,main_cast_id
count,5122.0,5122.0,5122.0,5122.0,5122.0,5122.0,5122.0,5122.0,5122.0,5114.0
mean,32398410.0,60221.091761,10.023999,94190690.0,110.336392,6.273467,759.837173,2000.916439,6.929324,62051.07
std,40613430.0,95789.210756,14.083566,169103100.0,21.355041,0.901558,1274.143581,13.348134,3.377574,206965.5
min,10000.0,5.0,0.000657,10000.0,62.0,0.0,0.0,1950.0,1.0,2.0
25%,6000000.0,8647.0,5.7214,8500000.0,96.0,5.7,86.0,1995.0,4.0,2397.5
50%,18000000.0,13972.5,8.56071,32977410.0,106.0,6.3,292.0,2005.0,7.0,10665.5
75%,40000000.0,59342.75,11.845715,103701600.0,120.0,6.9,843.75,2011.0,10.0,38420.0
max,380000000.0,443319.0,547.488298,2787965000.0,338.0,9.1,14075.0,2017.0,12.0,1783718.0


In [30]:
main_data_merged.isnull().sum()

budget                  0
id                      0
imdb_id                 0
original_language       0
original_title          0
popularity              0
revenue                 0
runtime                 0
title                   0
vote_average            0
vote_count              0
release_year            0
release_month           0
genre                  10
production_company    152
production_country     50
spoken_language        12
main_cast               8
main_cast_id            8
director                3
dtype: int64

In [31]:
# remove the entries which have a null value in any column
main_data_merged.dropna(inplace=True)
main_data_merged.reset_index(drop=True, inplace=True)
main_data_merged.shape


(4950, 20)

In [32]:
main_data_merged.head(5)

Unnamed: 0,budget,id,imdb_id,original_language,original_title,popularity,revenue,runtime,title,vote_average,vote_count,release_year,release_month,genre,production_company,production_country,spoken_language,main_cast,main_cast_id,director
0,237000000.0,19995,tt0499549,en,Avatar,185.070892,2787965000.0,162.0,Avatar,7.2,12114.0,2009,12,Action,Ingenious Film Partners,United States of America,English,Sam Worthington,65731.0,James Cameron
1,245000000.0,140607,tt2488496,en,Star Wars: The Force Awakens,31.626013,2068224000.0,136.0,Star Wars: The Force Awakens,7.5,7993.0,2015,12,Action,Lucasfilm,United States of America,English,Daisy Ridley,1315036.0,J.J. Abrams
2,200000000.0,597,tt0120338,en,Titanic,26.88907,1845034000.0,194.0,Titanic,7.5,7770.0,1997,11,Drama,Paramount Pictures,United States of America,English,Kate Winslet,204.0,James Cameron
3,220000000.0,24428,tt0848228,en,The Avengers,89.887648,1519558000.0,143.0,The Avengers,7.4,12000.0,2012,4,Science Fiction,Paramount Pictures,United States of America,English,Robert Downey Jr.,3223.0,Joss Whedon
4,150000000.0,135397,tt0369610,en,Jurassic World,32.790475,1513529000.0,124.0,Jurassic World,6.5,8842.0,2015,6,Action,Universal Studios,United States of America,English,Chris Pratt,73457.0,Colin Trevorrow


In [33]:
main_data_merged = main_data_merged.sort_values(by='release_year', ascending=True)
main_data_merged.reset_index(drop=True, inplace=True)

In [34]:
main_data_merged.shape

(4950, 20)

In [35]:
# make a new dataframe which is a copy of main_data_merged and has a new column called success degree which is set to 1 if the revenue is >= 1.5 times the budget and 0 otherwise
# save this new dataframe as a csv file with the title binary_success.csv
binary_success = main_data_merged.copy()
binary_success['success_degree'] = np.where(binary_success['revenue'] >= 1.5*binary_success['budget'], 1, 0)
binary_success.to_csv('/Users/user/Downloads/AshokaUniversity/monsoon23-courses/IML/final_project/dataset/binary_success.csv', index=False)


In [36]:
main_data_merged.to_csv('/Users/user/Downloads/AshokaUniversity/monsoon23-courses/IML/final_project/dataset/main_data_merged.csv', index=False)