In [4]:
import pandas as pd
import numpy as np
import plotly.express as px
import json


In [5]:
file_movie = "tmdb_5000_movies.csv"
file_credit = "tmdb_5000_credits.csv"

In [6]:
#files
df_m = pd.read_csv(file_movie)
df_c = pd.read_csv(file_credit)

print(df_c.head())

   movie_id                                     title  \
0     19995                                    Avatar   
1       285  Pirates of the Caribbean: At World's End   
2    206647                                   Spectre   
3     49026                     The Dark Knight Rises   
4     49529                               John Carter   

                                                cast  \
0  [{"cast_id": 242, "character": "Jake Sully", "...   
1  [{"cast_id": 4, "character": "Captain Jack Spa...   
2  [{"cast_id": 1, "character": "James Bond", "cr...   
3  [{"cast_id": 2, "character": "Bruce Wayne / Ba...   
4  [{"cast_id": 5, "character": "John Carter", "c...   

                                                crew  
0  [{"credit_id": "52fe48009251416c750aca23", "de...  
1  [{"credit_id": "52fe4232c3a36847f800b579", "de...  
2  [{"credit_id": "54805967c3a36829b5002c41", "de...  
3  [{"credit_id": "52fe4781c3a36847f81398c3", "de...  
4  [{"credit_id": "52fe479ac3a36847f813eaa3",

In [7]:
print(df_m.head())

      budget                                             genres  \
0  237000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   
1  300000000  [{"id": 12, "name": "Adventure"}, {"id": 14, "...   
2  245000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   
3  250000000  [{"id": 28, "name": "Action"}, {"id": 80, "nam...   
4  260000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   

                                       homepage      id  \
0                   http://www.avatarmovie.com/   19995   
1  http://disney.go.com/disneypictures/pirates/     285   
2   http://www.sonypictures.com/movies/spectre/  206647   
3            http://www.thedarkknightrises.com/   49026   
4          http://movies.disney.com/john-carter   49529   

                                            keywords original_language  \
0  [{"id": 1463, "name": "culture clash"}, {"id":...                en   
1  [{"id": 270, "name": "ocean"}, {"id": 726, "na...                en   
2  [{"id": 470, "nam

In [8]:
df_m['production_countries'].iloc[:10].tolist()

['[{"iso_3166_1": "US", "name": "United States of America"}, {"iso_3166_1": "GB", "name": "United Kingdom"}]',
 '[{"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "GB", "name": "United Kingdom"}, {"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "GB", "name": "United Kingdom"}, {"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "US", "name": "United States of America"}]']

In [9]:
def get_countries(x):
    """ converts the country names into a list of countries """
    try:
        data = json.loads(x)
        return [d["name"] for d in data]
    except:
        return []

In [10]:

print(df_m['production_countries'].head())

0    [{"iso_3166_1": "US", "name": "United States o...
1    [{"iso_3166_1": "US", "name": "United States o...
2    [{"iso_3166_1": "GB", "name": "United Kingdom"...
3    [{"iso_3166_1": "US", "name": "United States o...
4    [{"iso_3166_1": "US", "name": "United States o...
Name: production_countries, dtype: object


In [19]:

df_m['production_countries'] = df_m['production_countries'].apply(get_countries)
# Expand to one row per country
movies_expanded = df_m.explode('production_countries')

In [20]:
movies_expanded["production_countries"].head(10)

0    United States of America
0              United Kingdom
1    United States of America
2              United Kingdom
2    United States of America
3    United States of America
4    United States of America
5    United States of America
6    United States of America
7    United States of America
Name: production_countries, dtype: object

In [26]:
geo_df = (
    movies_expanded.groupby('production_countries')
    .agg({
        'revenue':'mean',
        'budget':'mean',
        'popularity':'mean',
        'vote_average':'mean',
        'title':'count'
    })
    .reset_index()
    .rename(columns={'title':'movie_count', 'production_countries':'country'})
)

fig = px.scatter_geo(
    geo_df,
    locations="country",
    locationmode="country names",
    size="revenue",
    color="budget",
    hover_name="country",
    color_continuous_scale="Mint",
    hover_data={
        "revenue":":.0f",
        "budget":":.0f",
        "movie_count":True,
        "vote_average":True,
        "popularity":True
    },
    projection="natural earth",
    title="Global Movie Revenue Explorer: Average Revenue vs Budget by Country"
)

fig.write_html("global_revenue_map.html", include_plotlyjs="cdn")
fig.show()


The library used by the *country names* `locationmode` option is changing in an upcoming version. Country names in existing plots may not work in the new version. To ensure consistent behavior, consider setting `locationmode` to *ISO-3*.



In [22]:
df_m = pd.read_csv("tmdb_5000_movies.csv")
df_m["production_countries"].iloc[:10].tolist()

['[{"iso_3166_1": "US", "name": "United States of America"}, {"iso_3166_1": "GB", "name": "United Kingdom"}]',
 '[{"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "GB", "name": "United Kingdom"}, {"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "GB", "name": "United Kingdom"}, {"iso_3166_1": "US", "name": "United States of America"}]',
 '[{"iso_3166_1": "US", "name": "United States of America"}]']