# Exploratory Notebook

In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import sqlite3
import re
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

## Reading Data

bom_movie_gross

In [2]:
bom_movie_gross=pd.read_csv('../Data/bom.movie_gross.csv.gz' , compression='gzip')

In [3]:
#pd.options.display.max_columns=100

The data base


In [4]:
conn1=sqlite3.connect('../Data/im.db/im.db')

In [5]:
query=("""
SELECT *
FROM movie_basics

""")
movie_basics=pd.read_sql(query,conn1)

movie_info

In [6]:
movie_info=pd.read_csv('../Data/rt.movie_info.tsv.gz' , delimiter='\t' ,compression='gzip')

reviews

In [7]:
reviews=pd.read_csv('../Data/rt.reviews.tsv.gz' , delimiter='\t' ,compression='gzip' , encoding='windows-1252')

The movie data base

In [8]:
tmdb_df=pd.read_csv('../Data/tmdb.movies.csv.gz',compression='gzip' , index_col=0)

movie_budget

In [9]:
movie_budget=pd.read_csv('../Data/tn.movie_budgets.csv.gz' , compression='gzip')

## Exploring and Cleaning of Data

###### bom_movie_gross cleaning and checking data

In [10]:
bom_movie_gross.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3387 entries, 0 to 3386
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   title           3387 non-null   object 
 1   studio          3382 non-null   object 
 2   domestic_gross  3359 non-null   float64
 3   foreign_gross   2037 non-null   object 
 4   year            3387 non-null   int64  
dtypes: float64(1), int64(1), object(3)
memory usage: 132.4+ KB


In [11]:
bom_movie_gross.sort_values('domestic_gross' , ascending=False)

Unnamed: 0,title,studio,domestic_gross,foreign_gross,year
1872,Star Wars: The Force Awakens,BV,936700000.0,1131.6,2015
3080,Black Panther,BV,700100000.0,646900000,2018
3079,Avengers: Infinity War,BV,678800000.0,1369.5,2018
1873,Jurassic World,Uni.,652300000.0,1019.4,2015
727,Marvel's The Avengers,BV,623400000.0,895500000,2012
...,...,...,...,...,...
1975,Surprise - Journey To The West,AR,,49600000,2015
2392,Finding Mr. Right 2,CL,,114700000,2016
2468,Solace,LGP,,22400000,2016
2595,Viral,W/Dim.,,552000,2016


In [12]:
#bom_movie_gross.loc[bom_movie_gross['foreign_gross'].str.contains('\d' , regex=True , na=False)]
for index , val in bom_movie_gross.foreign_gross.items():
    #if isinstance(val, str) and re.search((r'\d*\.\d+') , str(val)):
    if re.search((r'\d*\.\d+') , str(val)):
        spl_val=val.split('.')
        val=spl_val[0]+('0'*int(spl_val[1]))
        bom_movie_gross.at[index,'foreign_gross']=str(val)

In [13]:
#changing foreign gross column in to float data type
bom_movie_gross['foreign_gross'] = bom_movie_gross['foreign_gross'].str.replace(',' , '')

In [14]:
#filled null values with zero instead of droping , because we can have it from other data
bom_movie_gross=bom_movie_gross.fillna(0)

In [15]:
cols=['domestic_gross','foreign_gross']
bom_movie_gross[cols]=bom_movie_gross[cols].astype(int)

In [16]:
# created new column world wide profit
bom_movie_gross['world_wide_profit']=bom_movie_gross.foreign_gross+bom_movie_gross.domestic_gross

In [17]:
bom_movie_gross.sort_values('domestic_gross' , ascending=False)

Unnamed: 0,title,studio,domestic_gross,foreign_gross,year,world_wide_profit
1872,Star Wars: The Force Awakens,BV,936700000,1131000000,2015,2067700000
3080,Black Panther,BV,700100000,646900000,2018,1347000000
3079,Avengers: Infinity War,BV,678800000,136900000,2018,815700000
1873,Jurassic World,Uni.,652300000,10190000,2015,662490000
727,Marvel's The Avengers,BV,623400000,895500000,2012,1518900000
...,...,...,...,...,...,...
936,"Lula, Son of Brazil",NYer,0,3800000,2012,3800000
1079,The Green Wave,RF,0,70100,2012,70100
1975,Surprise - Journey To The West,AR,0,49600000,2015,49600000
966,The Cup (2012),Myr.,0,1800000,2012,1800000


In [18]:
bom_movie_gross.head()

Unnamed: 0,title,studio,domestic_gross,foreign_gross,year,world_wide_profit
0,Toy Story 3,BV,415000000,652000000,2010,1067000000
1,Alice in Wonderland (2010),BV,334200000,691300000,2010,1025500000
2,Harry Potter and the Deathly Hallows Part 1,WB,296000000,664300000,2010,960300000
3,Inception,WB,292600000,535700000,2010,828300000
4,Shrek Forever After,P/DW,238700000,513900000,2010,752600000


In [19]:
bom_movie_gross.isna().sum()

title                0
studio               0
domestic_gross       0
foreign_gross        0
year                 0
world_wide_profit    0
dtype: int64

In [20]:
bom_movie_gross.shape

(3387, 6)

###### Movie_info data cleaning

In [21]:
movie_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1560 entries, 0 to 1559
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id            1560 non-null   int64 
 1   synopsis      1498 non-null   object
 2   rating        1557 non-null   object
 3   genre         1552 non-null   object
 4   director      1361 non-null   object
 5   writer        1111 non-null   object
 6   theater_date  1201 non-null   object
 7   dvd_date      1201 non-null   object
 8   currency      340 non-null    object
 9   box_office    340 non-null    object
 10  runtime       1530 non-null   object
 11  studio        494 non-null    object
dtypes: int64(1), object(11)
memory usage: 146.4+ KB


In [22]:
#droped currency and box_office columns from movie_info because they have a lot of null values
movie_info.drop(["currency" , 'box_office'], axis='columns' , inplace=True)

In [23]:
movie_info.sample(5)

Unnamed: 0,id,synopsis,rating,genre,director,writer,theater_date,dvd_date,runtime,studio
152,197,"Adapted from the Frederick Forsythe novel, Dog...",R,Action and Adventure|Art House and Internation...,John Irvin,,"Jun 1, 1980","Nov 20, 2001",104 minutes,
311,410,"""Cadillac Records"" is about the rise and fall ...",R,Drama|Musical and Performing Arts,Darnell Martin,Darnell Martin,"Dec 5, 2008","Mar 10, 2009",108 minutes,Sony Pictures
63,80,When Francisco Manoel de Silva (Klaus Kinski) ...,NR,Action and Adventure|Art House and Internation...,Werner Herzog,Werner Herzog,"Dec 3, 1987","Oct 24, 2000",110 minutes,
1003,1297,"Based on true events, WOMAN WALKS AHEAD tells ...",R,Drama,Susanna White,Steven Knight,"Jun 29, 2018","Aug 28, 2018",102 minutes,
25,35,"In this film, conjoined twins Blake and Franci...",R,Drama,Michael Polish,Michael Polish|Mark Polish,"Jul 30, 1999","Jan 18, 2000",110 minutes,


##### Reviews data cleaning

In [24]:
reviews.id.unique()

array([   3,    5,    6, ..., 1998, 1999, 2000], dtype=int64)

In [25]:
reviews.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54432 entries, 0 to 54431
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   id          54432 non-null  int64 
 1   review      48869 non-null  object
 2   rating      40915 non-null  object
 3   fresh       54432 non-null  object
 4   critic      51710 non-null  object
 5   top_critic  54432 non-null  int64 
 6   publisher   54123 non-null  object
 7   date        54432 non-null  object
dtypes: int64(2), object(6)
memory usage: 3.3+ MB


In [26]:
#created a new column to have numeric values of fresh column -- fresh=1 and rotten=0
reviews['num_fresh']=""
for index, value in reviews.fresh.items():
    if value=='fresh':
        reviews.at[index , 'num_fresh']=1
    else: reviews.at[index , 'num_fresh']=0

In [27]:
reviews.head()

Unnamed: 0,id,review,rating,fresh,critic,top_critic,publisher,date,num_fresh
0,3,A distinctly gallows take on contemporary fina...,3/5,fresh,PJ Nabarro,0,Patrick Nabarro,"November 10, 2018",1
1,3,It's an allegory in search of a meaning that n...,,rotten,Annalee Newitz,0,io9.com,"May 23, 2018",0
2,3,... life lived in a bubble in financial dealin...,,fresh,Sean Axmaker,0,Stream on Demand,"January 4, 2018",1
3,3,Continuing along a line introduced in last yea...,,fresh,Daniel Kasman,0,MUBI,"November 16, 2017",1
4,3,... a perverse twist on neorealism...,,fresh,,0,Cinema Scope,"October 12, 2017",1


In [28]:
reviews.rating.unique()

array(['3/5', nan, 'C', '2/5', 'B-', '2/4', 'B', '3/4', '4/5', '4/4',
       '6/10', '1/4', '8', '2.5/4', '4/10', '2.0/5', '3/10', '7/10', 'A-',
       '5/5', 'F', '3.5/4', 'D+', '1.5/4', '3.5/5', '8/10', 'B+', '9/10',
       '2.5/5', '7.5/10', '5.5/10', 'C-', '1.5/5', '1/5', '5/10', 'C+',
       '0/5', '6', '0.5/4', 'D', '3.1/5', '3/6', '4.5/5', '0/4', '2/10',
       'D-', '7', '1/10', '3', 'A+', 'A', '4.0/4', '9.5/10', '2.5',
       '2.1/2', '6.5/10', '3.7/5', '8.4/10', '9', '1', '7.2/10', '2.2/5',
       '0.5/10', '5', '0', '2', '4.5', '7.7', '5.0/5', '8.5/10', '3.0/5',
       '0.5/5', '1.5/10', '3.0/4', '2.3/10', '4.5/10', '4/6', '3.5',
       '8.6/10', '6/8', '2.0/4', '2.7', '4.2/10', '5.8', '4', '7.1/10',
       '5/4', 'N', '3.5/10', '5.8/10', 'R', '4.0/5', '0/10', '5.0/10',
       '5.9/10', '2.4/5', '1.9/5', '4.9', '7.4/10', '1.5', '2.3/4',
       '8.8/10', '4.0/10', '2.2', '3.8/10', '6.8/10', '7.3', '7.0/10',
       '3.2', '4.2', '8.4', '5.5/5', '6.3/10', '7.6/10', '8.1/10',
  

In [29]:
reviews.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54432 entries, 0 to 54431
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   id          54432 non-null  int64 
 1   review      48869 non-null  object
 2   rating      40915 non-null  object
 3   fresh       54432 non-null  object
 4   critic      51710 non-null  object
 5   top_critic  54432 non-null  int64 
 6   publisher   54123 non-null  object
 7   date        54432 non-null  object
 8   num_fresh   54432 non-null  object
dtypes: int64(2), object(7)
memory usage: 3.7+ MB


###### tmdb_df cleaning

In [30]:
#dropped duplicated rows
tmdb_df=tmdb_df.drop_duplicates()

In [31]:
tmdb_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 25497 entries, 0 to 26516
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   genre_ids          25497 non-null  object 
 1   id                 25497 non-null  int64  
 2   original_language  25497 non-null  object 
 3   original_title     25497 non-null  object 
 4   popularity         25497 non-null  float64
 5   release_date       25497 non-null  object 
 6   title              25497 non-null  object 
 7   vote_average       25497 non-null  float64
 8   vote_count         25497 non-null  int64  
dtypes: float64(2), int64(2), object(5)
memory usage: 1.9+ MB


In [32]:
genre_map = {28: 'Action', 12: 'Adventure', 16: 'Animation', 35: 'Comedy',
             80: 'Crime', 99: 'Documentary', 18: 'Drama', 10751: 'Family', 
             14: 'Fantasy', 36: 'History', 27: 'Horror', 10402: 'Music', 
             9648: 'Mystery', 10749: 'Romance', 878: 'Science Fiction', 
             10770: 'TV Movie', 53: 'Thriller', 10752: 'War', 37: 'Western'}
#tmdb['genres'] = tmdb['genre_ids'].apply(lambda x: [genre_map[int(genre_id)] for genre_id in eval(x)])
tmdb_df['genres_labels'] = tmdb_df['genre_ids'].apply(lambda x: ', '.join([genre_map[int(genre_id)] for genre_id in eval(x)]))


In [33]:
tmdb_df.head(2)

Unnamed: 0,genre_ids,id,original_language,original_title,popularity,release_date,title,vote_average,vote_count,genres_labels
0,"[12, 14, 10751]",12444,en,Harry Potter and the Deathly Hallows: Part 1,33.533,2010-11-19,Harry Potter and the Deathly Hallows: Part 1,7.7,10788,"Adventure, Fantasy, Family"
1,"[14, 12, 16, 10751]",10191,en,How to Train Your Dragon,28.734,2010-03-26,How to Train Your Dragon,7.7,7610,"Fantasy, Adventure, Animation, Family"


In [74]:
tmdb_df.loc[tmdb_df.genres_labels.str.contains('Action')].head(20)

Unnamed: 0,genre_ids,id,original_language,original_title,popularity,release_date,title,vote_average,vote_count,genres_labels
2,"[12, 28, 878]",10138,en,Iron Man 2,28.515,2010-05-07,Iron Man 2,6.8,12368,"Adventure, Action, Science Fiction"
4,"[28, 878, 12]",27205,en,Inception,27.92,2010-07-16,Inception,8.3,22186,"Action, Science Fiction, Adventure"
6,"[28, 12, 14, 878]",19995,en,Avatar,26.526,2009-12-18,Avatar,7.4,18676,"Action, Adventure, Fantasy, Science Fiction"
9,"[16, 28, 35, 10751, 878]",38055,en,Megamind,22.855,2010-11-04,Megamind,6.8,3635,"Animation, Action, Comedy, Family, Science Fic..."
12,"[53, 12, 28]",27578,en,The Expendables,21.517,2010-08-03,The Expendables,6.1,4647,"Thriller, Adventure, Action"
16,"[28, 53, 878]",20504,en,The Book of Eli,18.985,2010-01-11,The Book of Eli,6.7,3495,"Action, Thriller, Science Fiction"
18,"[28, 18, 53]",2502,en,The Bourne Supremacy,18.199,2004-07-23,The Bourne Supremacy,7.3,4367,"Action, Drama, Thriller"
20,"[28, 18, 9648, 53]",2501,en,The Bourne Identity,17.935,2002-06-14,The Bourne Identity,7.4,5406,"Action, Drama, Mystery, Thriller"
21,"[28, 35]",37834,en,Knight and Day,17.713,2010-06-23,Knight and Day,6.0,2494,"Action, Comedy"
24,"[53, 28, 12, 35, 80]",34544,en,The A-Team,17.097,2010-06-11,The A-Team,6.3,2703,"Thriller, Action, Adventure, Comedy, Crime"


In [35]:
tmdb_df.isna().sum()

genre_ids            0
id                   0
original_language    0
original_title       0
popularity           0
release_date         0
title                0
vote_average         0
vote_count           0
genres_labels        0
dtype: int64

###### movie_budget cleaning

In [36]:
#casting release date type in to datetime type
movie_budget.release_date=pd.to_datetime(movie_budget.release_date)

In [37]:
# function which helps us to omit $ and , from any column and cast it in to float type
# used dtype=np,int64 to support long integers
def change(data):
    data = data.str.replace('[$,]', '', regex=True)
    return data.astype(dtype=np.int64)


In [38]:
#applied change function to three columns :domestic_gross,worldwide_gross, production_budget using apply method
movie_budget[['domestic_gross', 'worldwide_gross', 'production_budget']] = movie_budget[['domestic_gross', 'worldwide_gross', 'production_budget']].apply(change)

In [39]:
#created net profit by subructing budget from gross
movie_budget['net_profit']= movie_budget['worldwide_gross'] - movie_budget['production_budget'] 

In [40]:
movie_budget.shape

(5782, 7)

#### Merged bom_movie_gross and movie_budget by title and movie columns respectiviely (inner)

In [41]:
bom_movie_gross.shape

(3387, 6)

In [42]:
movie_budget.shape

(5782, 7)

In [43]:
#merged bom_movie_gross and movie_budget by title and movie columns respectiviely (inner)
gross_budget = pd.merge(bom_movie_gross, movie_budget, how='inner', left_on='title', right_on='movie')

In [44]:
gross_budget.shape

(1247, 13)

==========================================================================

In [45]:
#function for creating a new column which has the values from both tables
#def combine_titles(data , col1,col2):
#    if pd.notna(data[col1]) and pd.notna(data[col2]):
#        return data[col1]
#    elif pd.notna(data[col1]):
#        return data[col1]
#    elif pd.notna(data[col2]):
#        return data[col2]
#    else:return None

Calling the function

In [46]:
#gross_budget['movie_title']=gross_budget.apply(combine_titles ,args=('movie','title'), axis=1)

In [47]:
#gross_budget['domestic_gross']=gross_budget.apply(combine_titles ,args=('domestic_gross_y','domestic_gross_x'), axis=1)

In [48]:
#gross_budget['world_wide_gross_Profit']=gross_budget.apply(combine_titles ,args=('worldwide_gross','world_wide_profit'), axis=1)

In [49]:
# # filling foreign gross null values from domestic gross y - worldwide_gross
# for index,value in enumerate(gross_budget.foreign_gross):
#     if (pd.isna(value) or value==0.0):
#         gross_budget.at[index,'foreign_gross']=gross_budget.world_wide_gross_Profit[index]- gross_budget.domestic_gross[index]

In [50]:
#remove rows with zero values in world_wide_gross
#gross_budget=gross_budget.loc[gross_budget.world_wide_gross_Profit!=0]
#gross_budget=gross_budget.loc[gross_budget.domestic_gross!=0]

=========================================================

In [51]:
#dropped columns which wont help us for analysis and also duplicates
drop_cols=['studio','domestic_gross_x' ,'year','release_date','movie','world_wide_profit','id']
gross_budget.drop(drop_cols , axis=1 , inplace=True)
#gross_budget.set_index('id',inplace=True)

In [52]:
gross_budget.head()

Unnamed: 0,title,foreign_gross,production_budget,domestic_gross_y,worldwide_gross,net_profit
0,Toy Story 3,652000000,200000000,415004880,1068879522,868879522
1,Inception,535700000,160000000,292576195,835524642,675524642
2,Shrek Forever After,513900000,165000000,238736787,756244673,591244673
3,The Twilight Saga: Eclipse,398000000,68000000,300531751,706102828,638102828
4,Iron Man 2,311500000,170000000,312433331,621156389,451156389


In [53]:
gross_budget.dropna(subset=['net_profit', 'production_budget'], inplace=True)

In [54]:
gross_budget.shape

(1247, 6)

In [55]:
gross_budget.isna().sum()

title                0
foreign_gross        0
production_budget    0
domestic_gross_y     0
worldwide_gross      0
net_profit           0
dtype: int64

In [56]:
#created new column ROI(Return on investment)
gross_budget['ROI']=(gross_budget.net_profit/gross_budget.production_budget)*100

In [57]:
gross_budget.head()

Unnamed: 0,title,foreign_gross,production_budget,domestic_gross_y,worldwide_gross,net_profit,ROI
0,Toy Story 3,652000000,200000000,415004880,1068879522,868879522,434.439761
1,Inception,535700000,160000000,292576195,835524642,675524642,422.202901
2,Shrek Forever After,513900000,165000000,238736787,756244673,591244673,358.330105
3,The Twilight Saga: Eclipse,398000000,68000000,300531751,706102828,638102828,938.386512
4,Iron Man 2,311500000,170000000,312433331,621156389,451156389,265.386111


In [58]:
gross_budget.loc[gross_budget.title.duplicated()].shape

(9, 7)

In [59]:
gross_budget=gross_budget.loc[gross_budget.title.duplicated()==False]

In [60]:
gross_budget.sort_values('net_profit',ascending=False).head()

Unnamed: 0,title,foreign_gross,production_budget,domestic_gross_y,worldwide_gross,net_profit,ROI
1154,Avengers: Infinity War,136900000,300000000,678815482,2048134200,1748134200,582.7114
764,Jurassic World,10190000,215000000,652270625,1648854864,1433854864,666.909239
765,Furious 7,1163,190000000,353007020,1518722794,1328722794,699.327786
1155,Black Panther,646900000,200000000,700059566,1348258224,1148258224,574.129112
1156,Jurassic World: Fallen Kingdom,891800000,170000000,417719760,1305772799,1135772799,668.101646


In [61]:
gross_budget.isna().sum()

title                0
foreign_gross        0
production_budget    0
domestic_gross_y     0
worldwide_gross      0
net_profit           0
ROI                  0
dtype: int64

###### Movie_basics cleaning

In [62]:
query=("""
SELECT *
FROM movie_akas
WHERE region=='US'

""")
movie_basics=pd.read_sql(query,conn1)
movie_basics.head()

Unnamed: 0,movie_id,ordering,title,region,language,types,attributes,is_original_title
0,tt0369610,21,Jurassic World 3D,US,,,3-D version,0.0
1,tt0369610,29,Jurassic World,US,,,,0.0
2,tt0369610,2,Ebb Tide,US,,,fake working title,0.0
3,tt0369610,36,Jurassic Park IV,US,,working,,0.0
4,tt0369610,44,Jurassic Park 4,US,,,informal alternative title,0.0


In [63]:
#join movie_basic and movie_rating
query=("""
SELECT *
FROM movie_basics AS mb
     JOIN movie_ratings AS mr 
     USING(movie_id)
""")
movie_basics_rating=pd.read_sql(query,conn1)
movie_basics_rating.head()

Unnamed: 0,movie_id,primary_title,original_title,start_year,runtime_minutes,genres,averagerating,numvotes
0,tt0063540,Sunghursh,Sunghursh,2013,175.0,"Action,Crime,Drama",7.0,77
1,tt0066787,One Day Before the Rainy Season,Ashad Ka Ek Din,2019,114.0,"Biography,Drama",7.2,43
2,tt0069049,The Other Side of the Wind,The Other Side of the Wind,2018,122.0,Drama,6.9,4517
3,tt0069204,Sabse Bada Sukh,Sabse Bada Sukh,2018,,"Comedy,Drama",6.1,13
4,tt0100275,The Wandering Soap Opera,La Telenovela Errante,2017,80.0,"Comedy,Drama,Fantasy",6.5,119


#### Merged gross_budget and movie_basic_rating to have the genra

In [64]:
#merging gross_budget & tmdb on left
gross_budget_genre = pd.merge(gross_budget,
                       movie_basics_rating,
                       left_on='title',
                       right_on='primary_title',
                       how='left')

In [65]:
gross_budget_genre=gross_budget_genre.loc[gross_budget_genre.title.duplicated()==False]

In [66]:
#dropped all null values
gross_budget_genre = gross_budget_genre.dropna()

In [67]:
#dropped unwanted cols
drop_col2=['original_title','original_title','primary_title','start_year' , 'movie_id']
gross_budget_genre.drop(drop_col2 , axis=1 , inplace=True)

In [68]:
gross_budget_genre.head(3)

Unnamed: 0,title,foreign_gross,production_budget,domestic_gross_y,worldwide_gross,net_profit,ROI,runtime_minutes,genres,averagerating,numvotes
0,Toy Story 3,652000000,200000000,415004880,1068879522,868879522,434.439761,103.0,"Adventure,Animation,Comedy",8.3,682218.0
1,Inception,535700000,160000000,292576195,835524642,675524642,422.202901,148.0,"Action,Adventure,Sci-Fi",8.8,1841066.0
2,Shrek Forever After,513900000,165000000,238736787,756244673,591244673,358.330105,93.0,"Adventure,Animation,Comedy",6.3,167532.0


In [69]:
gross_budget_genre.shape

(1147, 11)

Clean data to CSV

In [70]:
gross_budget.to_csv('../Data/clean_data/gross_budget.csv')

In [71]:
gross_budget_genre.to_csv('../Data/clean_data/gross_budget_genre.csv')