In [129]:
# Import the necessary libraries
import pandas as pd
import altair as alt
import numpy as np
import country_converter as coco
import plotly.express as px
from vega_datasets import data

In [130]:
# Load the dataset in a df
df = pd.read_csv("netflix_titles.csv")
df["country"].value_counts()

United States                                      2555
India                                               923
United Kingdom                                      397
Japan                                               226
South Korea                                         183
                                                   ... 
United States, Australia, Samoa, United Kingdom       1
Spain, France, United States                          1
Netherlands, Belgium, Germany, Jordan                 1
United States, United Kingdom, Morocco                1
China, United States, Australia                       1
Name: country, Length: 681, dtype: int64

In [131]:
df_countries = df.dropna(subset=['country']).loc[:, 'country']

In [132]:
countries = df_countries.str.cat(sep=',').split(",")
for i in range(len(countries)):
    countries[i] = countries[i].strip()
df_countries1 = pd.DataFrame(countries)

df_countries_u = df_countries1[0].unique()

df_countries_u = np.delete(df_countries_u, np.where(df_countries_u == ''))

df_countries_count = pd.DataFrame({'country':df_countries_u, 'count':np.zeros(len(df_countries_u))}, columns = ['country', 'count'])

c = df['country'] 
for i in range(len(df_countries_count)):
    count = 0
    for index, value in c.items():
        if (df_countries_count.at[i, 'country'] in str(value)):
            count += 1
    df_countries_count.at[i, 'count'] = count
df_countries_count
        
    

Unnamed: 0,country,count
0,Brazil,88.0
1,Mexico,154.0
2,Singapore,39.0
3,United States,3297.0
4,Turkey,108.0
...,...,...
112,Sudan,1.0
113,Panama,1.0
114,Uganda,1.0
115,East Germany,1.0


In [133]:
# Creata a bar chart using altair
alt.Chart(df_countries_count).mark_bar().encode(
    x=alt.X('country', sort= '-y'),
    y='count'
).interactive()

In [134]:
# Drop some of the countries that are not foound in coco
df_countries_count = df_countries_count.drop(index=[48, 87, 115])


In [135]:
# Get the json countries url
country_json = alt.topo_feature(data.world_110m.url, 'countries')


In [136]:
# use the coco library to get contruy codes
ISOCodes = coco.convert(names=df_countries_count['country'], to='ISOnumeric') 
df_countries_count['id'] = ISOCodes
df_countries_count['id']

0       76
1      484
2      702
3      840
4      792
      ... 
111    706
112    729
113    591
114    800
116    499
Name: id, Length: 114, dtype: int64

In [137]:
# Display a world map of the data
netflix_map = alt.Chart(country_json).mark_geoshape().encode(
    color='count:Q',
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(df_countries_count, 'id', ['count'])
).project(
    type='equirectangular'
).properties(
    width=700,
    height=400,
    title="Total Netflix Content Produced by each Country (1925-2021)"
)
netflix_map

In [124]:
#Save the dataframe into a csv file
df_countries_count.to_csv('AmountOfNetflixContentProducedPerCountry.csv')
                          

In [None]:
fig = px.choropleth(df , locations = 'country', locationmode = 'country names', color = 'show_id')
fig.update_layout(title_text = 'Global spread of Covid19')
fig.show()
