# IMDb Data Analytics Extractor
- Import data from the CSV File located in `src/data/imdb_movies_data.csv`
- Extract analysis data on various factors in the movies list


## Data import
Import the required packages. <br />
Import the dataframe and ensure it is ready for analysis


In [1]:
import pandas as pd
import numpy as np
import json


Import the dataframe from the CSV file

In [2]:
movie_data = pd.read_csv('./src/data/imdb_movies_data.csv')
movie_data['decade'] = ((movie_data['year'] // 10).astype(int) * 10)
movie_data.head(5)

Unnamed: 0,ranking,ranking.1,movie_name,url,year,rating,vote_count,summary,production_1,production_2,...,language_1,language_2,language_3,language_4,language_5,budget,gross_worldwide,gross_usa,opening_week_usa,decade
0,1,1,The Shawshank Redemption,https://www.imdb.com/title/tt0111161/,1994,9.3,2333043,Two imprisoned men bond over a number of years...,Castle Rock Entertainment,,...,English,,,,,"$25,000,000","$28,817,291","$28,699,976","$727,327",1990
1,2,2,The Godfather,https://www.imdb.com/title/tt0068646/,1972,9.2,1612835,An organized crime dynasty's aging patriarch t...,Paramount Pictures,Alfran Productions,...,English,Italian,Latin,,,"$6,000,000","$246,120,986","$134,966,411","$302,393",1970
2,3,3,The Godfather: Part II,https://www.imdb.com/title/tt0071562/,1974,9.0,1125471,The early life and career of Vito Corleone in ...,Paramount Pictures,The Coppola Company,...,English,Italian,Spanish,Latin,Sicilian,"$13,000,000","$48,035,783","$47,834,595","$171,417",1970
3,4,4,The Dark Knight,https://www.imdb.com/title/tt0468569/,2008,9.0,2294364,When the menace known as the Joker wreaks havo...,Warner Bros.,Legendary Entertainment,...,English,Mandarin,,,,"$185,000,000","$1,005,973,645","$534,858,444","$158,411,483",2000
4,5,5,12 Angry Men,https://www.imdb.com/title/tt0050083/,1957,9.0,686732,A jury holdout attempts to prevent a miscarria...,Orion-Nova Productions,,...,English,,,,,"$350,000",$576,,,1950


## Extracting data into seperate Dataframes and JSON
Extracting the various fields that can be resolved per decade

In [3]:
# -------------------------------------------------------------------------------------
# Resolve movies per decade
no_movies_per_decade = pd.DataFrame({
    "decade" : movie_data['decade'].value_counts().index,
    "movies" : movie_data['decade'].value_counts()
}).sort_values('decade').reset_index(drop = True)

no_movies_per_decade_json = no_movies_per_decade.to_dict('records')

# -------------------------------------------------------------------------------------
# Resolve movie ratings per decade
movie_rating_per_decade = pd.DataFrame({
    "decade" : movie_data['decade'].value_counts().index,
    "mean_rating" : None
}).sort_values('decade').reset_index(drop = True)

for i in range(len(movie_rating_per_decade)):
    decade_filter = movie_data['decade'] == movie_rating_per_decade.iloc[i,0]
    filtered_movies = movie_data[decade_filter]
    movie_rating_per_decade.iloc[i,1] = round(filtered_movies['rating'].mean(), 3)

movie_rating_per_decade_json = movie_rating_per_decade.to_dict('records')

# -------------------------------------------------------------------------------------
# Resolve total votes recieved per decade
movie_votes_per_decade = pd.DataFrame({
    "decade" : movie_data['decade'].value_counts().index,
    "mean_votes" : None
}).sort_values('decade').reset_index(drop = True)

decade_to_vote =  pd.DataFrame({
    'decade' : movie_data['decade'].values,
    'votes' : movie_data['vote_count'].values
})

for i in range(len(decade_to_vote)):
    decade_to_vote.iloc[i,1] = int(''.join(filter(lambda x : x.isdigit(), decade_to_vote.iloc[i,1] )))

for i in range(len(movie_votes_per_decade)):
    decade_filter = decade_to_vote['decade'] == movie_votes_per_decade.iloc[i,0]
    filtered_votes = decade_to_vote[decade_filter]
    movie_votes_per_decade.iloc[i,1] = round(filtered_votes['votes'].mean(), 3)

movie_votes_per_decade_json = movie_votes_per_decade.to_dict('records')

movie_votes_per_decade

Unnamed: 0,decade,mean_votes
0,1920,90427.5
1,1930,169388.0
2,1940,224224.0
3,1950,200622.0
4,1960,246585.0
5,1970,549483.0
6,1980,459742.0
7,1990,904336.0
8,2000,742077.0
9,2010,542346.0


Extracting various budget/ monetary field and resolving them per decade

In [4]:

# -------------------------------------------------------------------------------------
# Resolve mean budget per decade
movie_budget_per_decade = pd.DataFrame({
    "decade" : movie_data['decade'].value_counts().index,
    "mean_budget" : None,
    "mean_gross_worldwide" : None,
    "mean_gross_usa": None,
    "mean_opening_week_usa":None
}).sort_values('decade').reset_index(drop = True)

decade_to_costs = pd.DataFrame({
    'decade' : movie_data['decade'].values,
    'budget' : movie_data['budget'].values,
    'gross_worldwide' : movie_data['gross_worldwide'].values,
    'gross_usa' : movie_data['gross_usa'].values,
    'opening_week_usa' : movie_data['opening_week_usa'].values
})

decade_to_costs['budget'] = (decade_to_costs['budget'].replace(np.nan, '0'))
decade_to_costs['gross_worldwide'] = (decade_to_costs['gross_worldwide'].replace(np.nan, '0'))
decade_to_costs['gross_usa'] = (decade_to_costs['gross_usa'].replace(np.nan, '0'))
decade_to_costs['opening_week_usa'] = (decade_to_costs['opening_week_usa'].replace(np.nan, '0'))

for i in range(len(decade_to_costs)): 
    decade_to_costs.iloc[i, 1] = int(''.join(filter(lambda x : x.isdigit(), decade_to_costs.iloc[i,1] )))
    decade_to_costs.iloc[i, 2] = int(''.join(filter(lambda x : x.isdigit(), decade_to_costs.iloc[i,2] )))
    decade_to_costs.iloc[i, 3] = int(''.join(filter(lambda x : x.isdigit(), decade_to_costs.iloc[i,3] )))
    decade_to_costs.iloc[i, 4] = int(''.join(filter(lambda x : x.isdigit(), decade_to_costs.iloc[i,4] )))

for i in range(len(movie_budget_per_decade)):
    decade_filter = decade_to_costs['decade'] == movie_budget_per_decade.iloc[i,0]
    filtered_budgets = decade_to_costs[decade_filter]
    movie_budget_per_decade.iloc[i,1] = filtered_budgets['budget'].mean()
    movie_budget_per_decade.iloc[i,2] = filtered_budgets['gross_worldwide'].mean()
    movie_budget_per_decade.iloc[i,3] = filtered_budgets['gross_usa'].mean()
    movie_budget_per_decade.iloc[i,4] = filtered_budgets['opening_week_usa'].mean()

movie_budget_per_decade_json = movie_budget_per_decade.to_dict('records')

movie_budget_per_decade

Unnamed: 0,decade,mean_budget,mean_gross_worldwide,mean_gross_usa,mean_opening_week_usa
0,1920,1320500.0,237570.0,372570.0,4299.0
1,1930,1533670.0,66810100.0,33535900.0,221139.0
2,1940,1231800.0,2446440.0,1546050.0,40852.1
3,1950,7180510.0,6542500.0,6488610.0,33188.7
4,1960,2967050.0,12168400.0,11825800.0,38955.3
5,1970,7078400.0,96355200.0,72128200.0,315171.0
6,1980,16015400.0,113827000.0,69407600.0,3630980.0
7,1990,93526400.0,236655000.0,100223000.0,9969970.0
8,2000,54777400.0,250248000.0,98142200.0,18337800.0
9,2010,296242000.0,398081000.0,131514000.0,37035500.0


Extracting fields which involve categorical data

In [5]:
# -------------------------------------------------------------------------------------
# 1 Censor rating
censor_data = pd.DataFrame({
    'censor_rating' : movie_data['censor_rating'].value_counts().index,
    'count' : movie_data['censor_rating'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('censor_rating').reset_index(drop = True)

for i in range(len(censor_data)):
    censor_filter = movie_data['censor_rating'] == censor_data.iloc[i,0]
    filtered_movies = movie_data[censor_filter]
    censor_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

censor_data_json = censor_data.to_dict('records')

# -------------------------------------------------------------------------------------
# 2 Director ratings
director_data = pd.DataFrame({
    'director' : movie_data['director'].value_counts().index,
    'count' : movie_data['director'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('director').reset_index(drop = True)

for i in range(len(director_data)):
    director_filter = movie_data['director'] == director_data.iloc[i,0]
    filtered_movies = movie_data[director_filter]
    director_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

director_data_json = director_data.to_dict('records')

director_data

Unnamed: 0,director,count,mean_imdb_rating
0,Aamir Khan,1,8.4
1,Adam Elliot,1,8.1
2,Akira Kurosawa,6,8.317
3,Alejandro G. Iñárritu(as Alejandro González Iñ...,1,8.1
4,Alfred Hitchcock,6,8.3
...,...,...,...
153,Wim Wenders,1,8.1
154,Wolfgang Petersen,1,8.3
155,Yasujirô Ozu,1,8.2
156,Yavuz Turgul,1,8.2


Extracting the productions data

In [6]:
production1_data = pd.DataFrame({
    'production_co' : movie_data['production_1'].value_counts().index,
    'count' : movie_data['production_1'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('production_co').reset_index(drop = True)

production2_data = pd.DataFrame({
    'production_co' : movie_data['production_2'].value_counts().index,
    'count' : movie_data['production_2'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('production_co').reset_index(drop = True)

production3_data = pd.DataFrame({
    'production_co' : movie_data['production_3'].value_counts().index,
    'count' : movie_data['production_3'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('production_co').reset_index(drop = True)

production1_data['production_co'].replace(' ', np.nan, inplace=True)
production2_data['production_co'].replace(' ', np.nan, inplace=True)
production3_data['production_co'].replace(' ', np.nan, inplace=True)

production1_data.dropna(subset=['production_co'], inplace=True)
production2_data.dropna(subset=['production_co'], inplace=True)
production3_data.dropna(subset=['production_co'], inplace=True)

for i in range(len(production1_data)):
    production_co1_filter = movie_data['production_1'] == production1_data.iloc[i,0]
    filtered_movies = movie_data[production_co1_filter]
    production1_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

for i in range(len(production2_data)):
    production_co2_filter = movie_data['production_2'] == production2_data.iloc[i,0]
    filtered_movies = movie_data[production_co2_filter]
    production2_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

for i in range(len(production3_data)):
    production_co3_filter = movie_data['production_3'] == production3_data.iloc[i,0]
    filtered_movies = movie_data[production_co3_filter]
    production3_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

production1_data_json = production1_data.to_dict('records')
production2_data_json = production2_data.to_dict('records')
production3_data_json = production3_data.to_dict('records')

production1_data

Unnamed: 0,production_co,count,mean_imdb_rating
0,ABC Animation,1,8.1
1,AMLF,1,8.3
2,Aamir Khan Productions,2,8.4
3,Act III Communications,1,8
4,Alcon Entertainment,1,8.1
...,...,...,...
151,Warner Bros.,19,8.384
152,Warner Independent Pictures (WIP),1,8.1
153,Wiedemann & Berg Filmproduktion,1,8.4
154,Zanuck/Brown Productions,1,8.3


Extracting the genres data

In [7]:
primary_genre_data = pd.DataFrame({
    "genre" : movie_data['genre_1'].value_counts().index,
    'count' : movie_data['genre_1'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('genre').reset_index(drop = True)

secondary_genre_data = pd.DataFrame({
    "genre" : movie_data['genre_2'].value_counts().index,
    'count' : movie_data['genre_2'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('genre').reset_index(drop = True)

ternary_genre_data = pd.DataFrame({
    "genre" : movie_data['genre_3'].value_counts().index,
    'count' : movie_data['genre_3'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('genre').reset_index(drop = True)

extra_genre_data = pd.DataFrame({
    "genre" : movie_data['genre_4'].value_counts().index,
    'count' : movie_data['genre_4'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('genre').reset_index(drop = True)

primary_genre_data['genre'].replace(' ', np.nan, inplace=True)
secondary_genre_data['genre'].replace(' ', np.nan, inplace=True)
ternary_genre_data['genre'].replace(' ', np.nan, inplace=True)
extra_genre_data['genre'].replace(' ', np.nan, inplace=True)

primary_genre_data.dropna(subset=['genre'], inplace=True)
secondary_genre_data.dropna(subset=['genre'], inplace=True)
ternary_genre_data.dropna(subset=['genre'], inplace=True)
extra_genre_data.dropna(subset=['genre'], inplace=True)

for i in range(len(primary_genre_data)):
    genre_filter = movie_data['genre_1'] == primary_genre_data.iloc[i,0]
    filtered_movies = movie_data[genre_filter]
    primary_genre_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

for i in range(len(secondary_genre_data)):
    genre_filter = movie_data['genre_2'] == secondary_genre_data.iloc[i,0]
    filtered_movies = movie_data[genre_filter]
    secondary_genre_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

for i in range(len(ternary_genre_data)):
    genre_filter = movie_data['genre_3'] == ternary_genre_data.iloc[i,0]
    filtered_movies = movie_data[genre_filter]
    ternary_genre_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

for i in range(len(extra_genre_data)):
    genre_filter = movie_data['genre_4'] == extra_genre_data.iloc[i,0]
    filtered_movies = movie_data[genre_filter]
    extra_genre_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

primary_genre_data_json = primary_genre_data.to_dict('records')
secondary_genre_data_json = secondary_genre_data.to_dict('records')
ternary_genre_data_json = ternary_genre_data.to_dict('records')
extra_genre_data_json = extra_genre_data.to_dict('records')

primary_genre_data

Unnamed: 0,genre,count,mean_imdb_rating
0,Action,41,8.371
1,Adventure,20,8.21
2,Animation,22,8.264
3,Biography,22,8.264
4,Comedy,25,8.276
5,Crime,38,8.366
6,Drama,71,8.285
7,Film-Noir,1,8.1
8,Horror,3,8.333
9,Mystery,4,8.325


Extracting the language data

In [8]:
primary_language_data = pd.DataFrame({
    'language' : movie_data['language_1'].value_counts().index,
    'count' : movie_data['language_1'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('language').reset_index(drop = True)

secondary_language_data = pd.DataFrame({
    'language' : movie_data['language_2'].value_counts().index,
    'count' : movie_data['language_2'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('language').reset_index(drop = True)

ternary_language_data = pd.DataFrame({
    'language' : movie_data['language_3'].value_counts().index,
    'count' : movie_data['language_3'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('language').reset_index(drop = True)

fourth_language_data = pd.DataFrame({
    'language' : movie_data['language_4'].value_counts().index,
    'count' : movie_data['language_4'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('language').reset_index(drop = True)

fifth_language_data = pd.DataFrame({
    'language' : movie_data['language_5'].value_counts().index,
    'count' : movie_data['language_5'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('language').reset_index(drop = True)

primary_language_data['language'].replace(' ', np.nan, inplace=True)
secondary_language_data['language'].replace(' ', np.nan, inplace=True)
ternary_language_data['language'].replace(' ', np.nan, inplace=True)
fourth_language_data['language'].replace(' ', np.nan, inplace=True)
fifth_language_data['language'].replace(' ', np.nan, inplace=True)

primary_language_data.dropna(subset=['language'], inplace=True)
secondary_language_data.dropna(subset=['language'], inplace=True)
ternary_language_data.dropna(subset=['language'], inplace=True)
fourth_language_data.dropna(subset=['language'], inplace=True)
fifth_language_data.dropna(subset=['language'], inplace=True)

for i in range(len(primary_language_data)):
    language_filter = movie_data['language_1'] == primary_language_data.iloc[i,0]
    filtered_movies = movie_data[language_filter]
    primary_language_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

for i in range(len(secondary_language_data)):
    language_filter = movie_data['language_2'] == secondary_language_data.iloc[i,0]
    filtered_movies = movie_data[language_filter]
    secondary_language_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

for i in range(len(ternary_language_data)):
    language_filter = movie_data['language_3'] == ternary_language_data.iloc[i,0]
    filtered_movies = movie_data[language_filter]
    ternary_language_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

for i in range(len(fourth_language_data)):
    language_filter = movie_data['language_4'] == fourth_language_data.iloc[i,0]
    filtered_movies = movie_data[language_filter]
    fourth_language_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

for i in range(len(fifth_language_data)):
    language_filter = movie_data['language_5'] == fifth_language_data.iloc[i,0]
    filtered_movies = movie_data[language_filter]
    fifth_language_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

primary_language_data_json = primary_language_data.to_dict('records')
secondary_language_data_json = secondary_language_data.to_dict('records')
ternary_language_data_json = ternary_language_data.to_dict('records')
fourth_language_data_json = fourth_language_data.to_dict('records')
fifth_language_data_json = fifth_language_data.to_dict('records')

primary_language_data

Unnamed: 0,language,count,mean_imdb_rating
0,Arabic,2,8.25
1,Cantonese,1,8.1
2,Danish,1,8.3
3,English,174,8.314
4,French,8,8.212
5,German,6,8.3
6,Hindi,8,8.312
7,Italian,4,8.55
8,Japanese,16,8.325
9,Korean,4,8.3


Extracting the country data

In [9]:
primary_country_data = pd.DataFrame({
    'country' : movie_data['country_1'].value_counts().index,
    'count' : movie_data['country_1'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('country').reset_index(drop = True)

secondary_country_data = pd.DataFrame({
    'country' : movie_data['country_2'].value_counts().index,
    'count' : movie_data['country_2'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('country').reset_index(drop = True)

ternary_country_data = pd.DataFrame({
    'country' : movie_data['country_3'].value_counts().index,
    'count' : movie_data['country_3'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('country').reset_index(drop = True)

extra_country_data = pd.DataFrame({
    'country' : movie_data['country_4'].value_counts().index,
    'count' : movie_data['country_4'].value_counts(),
    'mean_imdb_rating' : None
}).sort_values('country').reset_index(drop = True)

primary_country_data['country'].replace(' ', np.nan, inplace=True)
secondary_country_data['country'].replace(' ', np.nan, inplace=True)
ternary_country_data['country'].replace(' ', np.nan, inplace=True)
extra_country_data['country'].replace(' ', np.nan, inplace=True)

primary_country_data.dropna(subset=['country'], inplace=True)
secondary_country_data.dropna(subset=['country'], inplace=True)
ternary_country_data.dropna(subset=['country'], inplace=True)
extra_country_data.dropna(subset=['country'], inplace=True)

for i in range(len(primary_country_data)):
    country_filter = movie_data['country_1'] == primary_country_data.iloc[i,0]
    filtered_movies = movie_data[country_filter]
    primary_country_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

for i in range(len(secondary_country_data)):
    country_filter = movie_data['country_2'] == secondary_country_data.iloc[i,0]
    filtered_movies = movie_data[country_filter]
    secondary_country_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

for i in range(len(ternary_country_data)):
    country_filter = movie_data['country_3'] == ternary_country_data.iloc[i,0]
    filtered_movies = movie_data[country_filter]
    ternary_country_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

for i in range(len(extra_country_data)):
    country_filter = movie_data['country_4'] == extra_country_data.iloc[i,0]
    filtered_movies = movie_data[country_filter]
    extra_country_data.iloc[i,2] = round(filtered_movies['rating'].mean(), 3)

primary_country_data_json = primary_country_data.to_dict('records')
secondary_country_data_json = secondary_country_data.to_dict('records')
ternary_country_data_json = ternary_country_data.to_dict('records')
extra_country_data_json = extra_country_data.to_dict('records')

primary_country_data

Unnamed: 0,country,count,mean_imdb_rating
0,Argentina,2,8.15
1,Australia,3,8.1
2,Brazil,1,8.6
3,Canada,2,8.2
4,Denmark,1,8.3
5,France,9,8.222
6,Germany,6,8.233
7,Hong Kong,1,8.1
8,India,9,8.322
9,Iran,2,8.3



Saving various Dataframes to CSV files

In [10]:
no_movies_per_decade.to_csv('./src/data/no_movies_per_decade.csv')
movie_budget_per_decade.to_csv('./src/data/movie_budget_per_decade.csv')
movie_rating_per_decade.to_csv('./src/data/movie_rating_per_decade.csv')
movie_votes_per_decade.to_csv('./src/data/movie_votes_per_decade.csv')

censor_data.to_csv('./src/data/censor_data.csv')
director_data.to_csv('./src/data/director_data.csv')

production1_data.to_csv('./src/data/production1_data.csv')
production2_data.to_csv('./src/data/production2_data.csv')
production3_data.to_csv('./src/data/production3_data.csv')

primary_genre_data.to_csv('./src/data/primary_genre_data.csv')
secondary_genre_data.to_csv('./src/data/secondary_genre_data.csv')
ternary_genre_data.to_csv('./src/data/ternary_genre_data.csv')
extra_genre_data.to_csv('./src/data/extra_genre_data.csv')

primary_language_data.to_csv('./src/data/primary_language_data.csv')
secondary_language_data.to_csv('./src/data/secondary_language_data.csv')
ternary_language_data.to_csv('./src/data/ternary_language_data.csv')
fourth_language_data.to_csv('./src/data/fourth_language_data.csv')
fifth_language_data.to_csv('./src/data/fifth_language_data.csv')

primary_country_data.to_csv('./src/data/primary_country_data.csv')
secondary_country_data.to_csv('./src/data/secondary_country_data.csv')
ternary_country_data.to_csv('./src/data/ternary_country_data.csv')
extra_country_data.to_csv('./src/data/extra_country_data.csv')

print("Successfully saved to CSV")

Successfully saved to CSV


Saving data to JSON

In [11]:
imdb_movie_analysis_data = {
    'no_movies_per_decade' : no_movies_per_decade_json,
    'movie_budget_per_decade' : movie_budget_per_decade_json,
    'movie_rating_per_decade' : movie_rating_per_decade_json,
    'movie_votes_per_decade' : movie_votes_per_decade_json,
    'censor_data' : censor_data_json,
    'director_data' : director_data_json,
    'production1_data' : production1_data_json,
    'production2_data' : production2_data_json,
    'production3_data' : production3_data_json,
    'primary_genre_data' : primary_genre_data_json,
    'secondary_genre_data' : secondary_genre_data_json,
    'ternary_genre_data' : ternary_genre_data_json,
    'extra_genre_data' : extra_genre_data_json,
    'primary_language_data' : primary_language_data_json,
    'secondary_language_data' : secondary_language_data_json,
    'ternary_language_data' : ternary_language_data_json,
    'fourth_language_data' : fourth_language_data_json,
    'fifth_language_data' : fifth_language_data_json,
    'primary_country_data' : primary_country_data_json,
    'secondary_country_data' : secondary_country_data_json,
    'ternary_country_data' : ternary_country_data_json,
    'extra_country_data' : extra_country_data_json,
}

with open('./src/data/imdb_movie_analysis_data.json', 'w') as file:
    json.dump(imdb_movie_analysis_data, file)
print("Successfully saved to JSON file")

Successfully saved to JSON file
