In [1]:
# import dependencies
import pandas as pd
import numpy as np
from ast import literal_eval

## Clean the movies_metatdata dataset prior to loading to database table

In [2]:
# Read the csv file to dataframe
movies_df = pd.read_csv("movies_metadata_test.csv")
movies_df.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,10/30/1995,373554033,81,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,12/15/1995,262797249,104,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,12/22/1995,0,101,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,12/22/1995,81452156,127,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,2/10/1995,76578911,106,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173


In [3]:
len(movies_df.index)

10

### Convert "belongs_to_column" from list type to boolean

In [4]:
# Put "belongs_to_column" into a series
collection_col = pd.Series(movies_df.belongs_to_collection)

In [5]:
# Drop the "belongs_to_column" from dataframe
movies_df = movies_df.drop(['belongs_to_collection'], axis=1)

In [6]:
movies_df.head()

Unnamed: 0,adult,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.946943,...,10/30/1995,373554033,81,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415
1,False,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.015539,...,12/15/1995,262797249,104,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413
2,False,0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,...,12/22/1995,0,101,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92
3,False,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.859495,...,12/22/1995,81452156,127,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34
4,False,0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.387519,...,2/10/1995,76578911,106,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173


In [7]:
# Drop the Nan rows from Series
collection_col = collection_col.dropna()

In [8]:
# Change data type to Boolean
collection_col = collection_col.astype('bool')

In [9]:
collection_col

0    True
2    True
4    True
9    True
Name: belongs_to_collection, dtype: bool

In [10]:
# Convert Series to DataFrame
collection_col = collection_col.to_frame()

In [11]:
# Join collections column back into movies_df DataFrame
movies_df = movies_df.join(collection_col)

In [12]:
# Reorder columns
movies_df = movies_df[["id", "imdb_id", "adult", "belongs_to_collection", "budget", "genres", "homepage",  "original_language", "original_title", "overview", "popularity", "poster_path", "production_companies", "production_countries", "revenue", "runtime", "spoken_languages", "status", "tagline", "title", "video", "vote_average", "vote_count"]]

In [13]:
# Fill remaining NaN with False
movies_df["belongs_to_collection"].fillna("False", inplace=True)

In [14]:
movies_df.head(10)

Unnamed: 0,id,imdb_id,adult,belongs_to_collection,budget,genres,homepage,original_language,original_title,overview,...,production_countries,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,862,tt0114709,False,True,30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,"[{'iso_3166_1': 'US', 'name': 'United States o...",373554033,81,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415
1,8844,tt0113497,False,False,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,en,Jumanji,When siblings Judy and Peter discover an encha...,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",262797249,104,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413
2,15602,tt0113228,False,True,0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",0,101,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92
3,31357,tt0114885,False,False,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,"[{'iso_3166_1': 'US', 'name': 'United States o...",81452156,127,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34
4,11862,tt0113041,False,True,0,"[{'id': 35, 'name': 'Comedy'}]",,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",76578911,106,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173
5,949,tt0113277,False,False,60000000,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",...,"[{'iso_3166_1': 'US', 'name': 'United States o...",187436818,170,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,A Los Angeles Crime Saga,Heat,False,7.7,1886
6,11860,tt0114319,False,False,58000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...",,en,Sabrina,An ugly duckling having undergone a remarkable...,...,"[{'iso_3166_1': 'DE', 'name': 'Germany'}, {'is...",0,127,"[{'iso_639_1': 'fr', 'name': 'Français'}, {'is...",Released,You are cordially invited to the most surprisi...,Sabrina,False,6.2,141
7,45325,tt0112302,False,False,0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",,en,Tom and Huck,"A mischievous young boy, Tom Sawyer, witnesses...",...,"[{'iso_3166_1': 'US', 'name': 'United States o...",0,97,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,The Original Bad Boys.,Tom and Huck,False,5.4,45
8,9091,tt0114576,False,False,35000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",,en,Sudden Death,International action superstar Jean Claude Van...,...,"[{'iso_3166_1': 'US', 'name': 'United States o...",64350171,106,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Terror goes into overtime.,Sudden Death,False,5.5,174
9,710,tt0113189,False,True,58000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",http://www.mgm.com/view/movie/757/Goldeneye/,en,GoldenEye,James Bond must unmask the mysterious head of ...,...,"[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",352194034,130,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,No limits. No fears. No substitutes.,GoldenEye,False,6.6,1194


In [15]:
len(movies_df.index)

10

## Separate "genres" column (a list of dictionaries) to
## separate columns for each genre

In [16]:
# Put "genres" into a series
genres_col = pd.Series(movies_df.genres)

In [17]:
# Drop the "genres" from dataframe
movies_df = movies_df.drop(['genres'], axis=1)

In [18]:
# Drop any Nan rows from Series - there are none
genres_col = genres_col.dropna()

In [19]:
genres_col

0    [{'id': 16, 'name': 'Animation'}, {'id': 35, '...
1    [{'id': 12, 'name': 'Adventure'}, {'id': 14, '...
2    [{'id': 10749, 'name': 'Romance'}, {'id': 35, ...
3    [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...
4                       [{'id': 35, 'name': 'Comedy'}]
5    [{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...
6    [{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...
7    [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...
8    [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...
9    [{'id': 12, 'name': 'Adventure'}, {'id': 28, '...
Name: genres, dtype: object

In [20]:
# Convert Series to DataFrame
genres_df = pd.DataFrame(genres_col)
genres_df.head(10)

Unnamed: 0,genres
0,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '..."
1,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '..."
2,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ..."
3,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam..."
4,"[{'id': 35, 'name': 'Comedy'}]"
5,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam..."
6,"[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '..."
7,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam..."
8,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam..."
9,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '..."


In [21]:
len(genres_df)

10

In [22]:
# Get just the 'name' key values - result is a list of just the values
genres_df['genres'] = genres_df['genres'].fillna('[]').apply(literal_eval).apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])

In [23]:
genres_df.head(10)

Unnamed: 0,genres
0,"[Animation, Comedy, Family]"
1,"[Adventure, Fantasy, Family]"
2,"[Romance, Comedy]"
3,"[Comedy, Drama, Romance]"
4,[Comedy]
5,"[Action, Crime, Drama, Thriller]"
6,"[Comedy, Romance]"
7,"[Action, Adventure, Drama, Family]"
8,"[Action, Adventure, Thriller]"
9,"[Adventure, Action, Thriller]"


In [24]:
# Add genre columns

genres_df["Drama"], genres_df["Comedy"], genres_df["Thriller"], genres_df["Romance"], genres_df["Action"], genres_df["Horror"],  genres_df["Crime"], genres_df["Documentary"], genres_df["Adventure"], genres_df["Science Fiction"], genres_df["Others" ] =False,False,False,False,False,False,False,False,False,False, False

In [25]:
genres_df.head(10)

Unnamed: 0,genres,Drama,Comedy,Thriller,Romance,Action,Horror,Crime,Documentary,Adventure,Science Fiction,Others
0,"[Animation, Comedy, Family]",False,False,False,False,False,False,False,False,False,False,False
1,"[Adventure, Fantasy, Family]",False,False,False,False,False,False,False,False,False,False,False
2,"[Romance, Comedy]",False,False,False,False,False,False,False,False,False,False,False
3,"[Comedy, Drama, Romance]",False,False,False,False,False,False,False,False,False,False,False
4,[Comedy],False,False,False,False,False,False,False,False,False,False,False
5,"[Action, Crime, Drama, Thriller]",False,False,False,False,False,False,False,False,False,False,False
6,"[Comedy, Romance]",False,False,False,False,False,False,False,False,False,False,False
7,"[Action, Adventure, Drama, Family]",False,False,False,False,False,False,False,False,False,False,False
8,"[Action, Adventure, Thriller]",False,False,False,False,False,False,False,False,False,False,False
9,"[Adventure, Action, Thriller]",False,False,False,False,False,False,False,False,False,False,False


In [26]:
# Nested Loops. Loop through each row of the dataframe. The data type of the column 'genres' is a list of genres 
# so loop through each element of that list.  This gets the value of the element. Match the value to the appropriate column.
for index, row in genres_df.iterrows():
    print(row['genres'])
    for genre in row['genres']:
        print(genre)
        if genre == "Drama": 
            genres_df.loc[index,"Drama"] = True
        elif  genre == "Comedy":
            genres_df.loc[index,"Comedy"] = True
        elif genre == "Thriller":
            genres_df.loc[index,"Thriller"] = True
        elif genre == "Romance":
            genres_df.loc[index,"Romance"] = True
        elif genre == "Action":
            genres_df.loc[index,"Action"] = True
        elif genre == "Horror":
            genres_df.loc[index,"Horror"] = True
        elif genre == "Crime":
            genres_df.loc[index,"Crime"] = True
        elif genre == "Documentary":
            genres_df.loc[index,"Documentary"] = True
        elif genre == "Adventure":
            genres_df.loc[index,"Adventure"] = True
        elif genre == "Science Fiction":
            genres_df.loc[index,"Science Fiction"] = True
        else:
            genres_df.loc[index,"Others"] = True


['Animation', 'Comedy', 'Family']
Animation
Comedy
Family
['Adventure', 'Fantasy', 'Family']
Adventure
Fantasy
Family
['Romance', 'Comedy']
Romance
Comedy
['Comedy', 'Drama', 'Romance']
Comedy
Drama
Romance
['Comedy']
Comedy
['Action', 'Crime', 'Drama', 'Thriller']
Action
Crime
Drama
Thriller
['Comedy', 'Romance']
Comedy
Romance
['Action', 'Adventure', 'Drama', 'Family']
Action
Adventure
Drama
Family
['Action', 'Adventure', 'Thriller']
Action
Adventure
Thriller
['Adventure', 'Action', 'Thriller']
Adventure
Action
Thriller


In [27]:
genres_df.head()

Unnamed: 0,genres,Drama,Comedy,Thriller,Romance,Action,Horror,Crime,Documentary,Adventure,Science Fiction,Others
0,"[Animation, Comedy, Family]",False,True,False,False,False,False,False,False,False,False,True
1,"[Adventure, Fantasy, Family]",False,False,False,False,False,False,False,False,True,False,True
2,"[Romance, Comedy]",False,True,False,True,False,False,False,False,False,False,False
3,"[Comedy, Drama, Romance]",True,True,False,True,False,False,False,False,False,False,False
4,[Comedy],False,True,False,False,False,False,False,False,False,False,False


In [28]:
# Drop the "genres" from genres_df dataframe
genres_df = genres_df.drop(['genres'], axis=1)

In [29]:
genres_df.head(10)

Unnamed: 0,Drama,Comedy,Thriller,Romance,Action,Horror,Crime,Documentary,Adventure,Science Fiction,Others
0,False,True,False,False,False,False,False,False,False,False,True
1,False,False,False,False,False,False,False,False,True,False,True
2,False,True,False,True,False,False,False,False,False,False,False
3,True,True,False,True,False,False,False,False,False,False,False
4,False,True,False,False,False,False,False,False,False,False,False
5,True,False,True,False,True,False,True,False,False,False,False
6,False,True,False,True,False,False,False,False,False,False,False
7,True,False,False,False,True,False,False,False,True,False,True
8,False,False,True,False,True,False,False,False,True,False,False
9,False,False,True,False,True,False,False,False,True,False,False


In [30]:
# Join new genre columns back into movies_df DataFrame
movies_df = movies_df.join(genres_df)

In [31]:
movies_df.tail(10)

Unnamed: 0,id,imdb_id,adult,belongs_to_collection,budget,homepage,original_language,original_title,overview,popularity,...,Comedy,Thriller,Romance,Action,Horror,Crime,Documentary,Adventure,Science Fiction,Others
0,862,tt0114709,False,True,30000000,http://toystory.disney.com/toy-story,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.946943,...,True,False,False,False,False,False,False,False,False,True
1,8844,tt0113497,False,False,65000000,,en,Jumanji,When siblings Judy and Peter discover an encha...,17.015539,...,False,False,False,False,False,False,False,True,False,True
2,15602,tt0113228,False,True,0,,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,...,True,False,True,False,False,False,False,False,False,False
3,31357,tt0114885,False,False,16000000,,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.859495,...,True,False,True,False,False,False,False,False,False,False
4,11862,tt0113041,False,True,0,,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.387519,...,True,False,False,False,False,False,False,False,False,False
5,949,tt0113277,False,False,60000000,,en,Heat,"Obsessive master thief, Neil McCauley leads a ...",17.924927,...,False,True,False,True,False,True,False,False,False,False
6,11860,tt0114319,False,False,58000000,,en,Sabrina,An ugly duckling having undergone a remarkable...,6.677277,...,True,False,True,False,False,False,False,False,False,False
7,45325,tt0112302,False,False,0,,en,Tom and Huck,"A mischievous young boy, Tom Sawyer, witnesses...",2.561161,...,False,False,False,True,False,False,False,True,False,True
8,9091,tt0114576,False,False,35000000,,en,Sudden Death,International action superstar Jean Claude Van...,5.23158,...,False,True,False,True,False,False,False,True,False,False
9,710,tt0113189,False,True,58000000,http://www.mgm.com/view/movie/757/Goldeneye/,en,GoldenEye,James Bond must unmask the mysterious head of ...,14.686036,...,False,True,False,True,False,False,False,True,False,False


In [32]:
len(movies_df.index)

10

## Separate "production_companies" column (a list of dictionaries) to
## separate columns for each company

In [None]:
# Put "production_companies" column into a series
companies_col = pd.Series(movies_df.production_companies)

In [None]:
# Drop the "production_companies" from dataframe
movies_df = movies_df.drop(['production_companies'], axis=1)

In [None]:
# Drop any Nan rows from Series
companies_col = companies_col.dropna()

In [None]:
companies_col

In [None]:
# Convert Series to DataFrame
companies_df = pd.DataFrame(companies_col)
companies_df.head(10)

In [None]:
len(companies_df)

In [None]:
# Get just the 'name' key values - result is a list of just the values
companies_df['production_companies'] = companies_df['production_companies'].fillna('[]').apply(literal_eval).apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])

In [None]:
companies_df.head(25)

In [None]:
for index, row in companies_df.iterrows():
    print(row['production_companies'])
    co_count = 0
    for co in row['production_companies']:
        co_count += 1
        print(co_count)
        companies_df.loc[index,'production_companies'] = co_count
    

In [None]:
companies_df.head()

### Convert "spoken_languages" from list of dictionaries to just the languages count

In [33]:
# Put "spoken_languages" into a series
spoken_col = pd.Series(movies_df.spoken_languages)

In [34]:
# Drop the "spoken_languages" from dataframe
movies_df = movies_df.drop(['spoken_languages'], axis=1)

In [35]:
# Convert Series to DataFrame
spoken_df = pd.DataFrame(spoken_col)
spoken_df.head(10)

Unnamed: 0,spoken_languages
0,"[{'iso_639_1': 'en', 'name': 'English'}]"
1,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso..."
2,"[{'iso_639_1': 'en', 'name': 'English'}]"
3,"[{'iso_639_1': 'en', 'name': 'English'}]"
4,"[{'iso_639_1': 'en', 'name': 'English'}]"
5,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso..."
6,"[{'iso_639_1': 'fr', 'name': 'Français'}, {'is..."
7,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso..."
8,"[{'iso_639_1': 'en', 'name': 'English'}]"
9,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso..."


In [36]:
# Get just the 'iso_639_1' key values - result is a list of just the language abbreviation values
spoken_df['spoken_languages'] = spoken_df['spoken_languages'].fillna('[]').apply(literal_eval).apply(lambda x: [i['iso_639_1'] for i in x] if isinstance(x, list) else [])

In [37]:
spoken_df.head(10)

Unnamed: 0,spoken_languages
0,[en]
1,"[en, fr]"
2,[en]
3,[en]
4,[en]
5,"[en, es]"
6,"[fr, en]"
7,"[en, de]"
8,[en]
9,"[en, ru, es]"


In [38]:
spoken_df['spoken_languages'].value_counts()

[en]            5
[en, es]        1
[en, fr]        1
[fr, en]        1
[en, de]        1
[en, ru, es]    1
Name: spoken_languages, dtype: int64

In [39]:
for index, row in spoken_df.iterrows():
    print(row['spoken_languages'])
    language_count = 0
    for language in row['spoken_languages']:
        language_count += 1
        #print(language_count)
        spoken_df.loc[index,'spoken_languages'] = language_count


['en']
['en', 'fr']
['en']
['en']
['en']
['en', 'es']
['fr', 'en']
['en', 'de']
['en']
['en', 'ru', 'es']


In [40]:
spoken_df.head(20)


Unnamed: 0,spoken_languages
0,1
1,2
2,1
3,1
4,1
5,2
6,2
7,2
8,1
9,3


In [41]:
# Join new spoken_languages column (which is now a count) back into movies_df DataFrame
movies_df = movies_df.join(spoken_df)

In [42]:
movies_df.columns


Index(['id', 'imdb_id', 'adult', 'belongs_to_collection', 'budget', 'homepage',
       'original_language', 'original_title', 'overview', 'popularity',
       'poster_path', 'production_companies', 'production_countries',
       'revenue', 'runtime', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count', 'Drama', 'Comedy', 'Thriller', 'Romance',
       'Action', 'Horror', 'Crime', 'Documentary', 'Adventure',
       'Science Fiction', 'Others', 'spoken_languages'],
      dtype='object')

In [44]:
movies_df.to_csv('movies.csv')