In [201]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf

from pandas_datareader import wb
import pandas_datareader as web

plt.style.use('ggplot')

In [202]:
# download country info to get region and income info
df_info = wb.get_countries()
df_info = df_info[['name', 'region', 'incomeLevel', 'iso3c']].copy()

# rename column in order to merge later on
df_info.rename(columns = {'name' : 'country'}, inplace = True)

print(len(df_info))
df_info.head()

299


Unnamed: 0,country,region,incomeLevel,iso3c
0,Aruba,Latin America & Caribbean,High income,ABW
1,Africa Eastern and Southern,Aggregates,Aggregates,AFE
2,Afghanistan,South Asia,Low income,AFG
3,Africa,Aggregates,Aggregates,AFR
4,Africa Western and Central,Aggregates,Aggregates,AFW


In [203]:
# list of the indicators that I want to download
indicators = [
    'EN.ATM.CO2E.KT',    # CO2 emissions per capita
    'SP.POP.TOTL'       # total population
]

# download data
df = wb.download(indicator = indicators, 
                 country = 'all', 
                 start = 2018, 
                 end = 2018).reset_index()

# give columns more intuitive names
df.rename(columns = {'EN.ATM.CO2E.KT'    : 'co2', 
                     'SP.POP.TOTL'       : 'totalpop'}, 
          inplace = True)

# calculate tons C02 emissions per capita (notice that we multiply with 1000 to convert from kilo ton to ton)
df['co2_pc'] = df['co2'] * 1000 / df['totalpop']

print('Number of rows: ' + str(len(df)))
print('Number of countries: ' + str(df['country'].nunique()))
df.head()

Number of rows: 266
Number of countries: 266


Unnamed: 0,country,year,co2,totalpop,co2_pc
0,Africa Eastern and Southern,2018,600351.1,643090131.0,0.933541
1,Africa Western and Central,2018,224380.0,435229381.0,0.515544
2,Arab World,2018,1863604.0,419851989.0,4.438716
3,Caribbean small states,2018,36920.0,7358929.0,5.017034
4,Central Europe and the Baltics,2018,676470.0,102538451.0,6.597232


In [204]:
# add country info to df (left merge)
df = df.merge(df_info, on = 'country', how = 'left')

print(len(df)) # (print length to make sure that the merge worked as expected)
df.head()

266


Unnamed: 0,country,year,co2,totalpop,co2_pc,region,incomeLevel,iso3c
0,Africa Eastern and Southern,2018,600351.1,643090131.0,0.933541,Aggregates,Aggregates,AFE
1,Africa Western and Central,2018,224380.0,435229381.0,0.515544,Aggregates,Aggregates,AFW
2,Arab World,2018,1863604.0,419851989.0,4.438716,Aggregates,Aggregates,ARB
3,Caribbean small states,2018,36920.0,7358929.0,5.017034,Aggregates,Aggregates,CSS
4,Central Europe and the Baltics,2018,676470.0,102538451.0,6.597232,Aggregates,Aggregates,CEB


Notice that some observations in the final data are not countries but "aggregates".

In [205]:
# drop observatons with missing region info
df.dropna(subset = ['region'], inplace = True)

# drop aggregate regions, i.e. non-countries
df = df[df['region'] != 'Aggregates'].copy()

print('Number of rows: ' + str(len(df)))
print('Number of countries: ' + str(df['country'].nunique()))
df.head()

Number of rows: 217
Number of countries: 217


Unnamed: 0,country,year,co2,totalpop,co2_pc,region,incomeLevel,iso3c
49,Afghanistan,2018,7440.0,37171922.0,0.200151,South Asia,Low income,AFG
50,Albania,2018,5560.0,2866376.0,1.939732,Europe & Central Asia,Upper middle income,ALB
51,Algeria,2018,151670.0,42228415.0,3.591657,Middle East & North Africa,Lower middle income,DZA
52,American Samoa,2018,,55461.0,,East Asia & Pacific,Upper middle income,ASM
53,Andorra,2018,460.0,77008.0,5.973405,Europe & Central Asia,High income,AND


Also, notice that some countries lack observations.

In [206]:
# drop missing
df.dropna(inplace = True)

print('Number of final countries: ' + str(len(df)))
df.head()

Number of final countries: 190


Unnamed: 0,country,year,co2,totalpop,co2_pc,region,incomeLevel,iso3c
49,Afghanistan,2018,7440.0,37171922.0,0.200151,South Asia,Low income,AFG
50,Albania,2018,5560.0,2866376.0,1.939732,Europe & Central Asia,Upper middle income,ALB
51,Algeria,2018,151670.0,42228415.0,3.591657,Middle East & North Africa,Lower middle income,DZA
53,Andorra,2018,460.0,77008.0,5.973405,Europe & Central Asia,High income,AND
54,Angola,2018,27340.0,30809787.0,0.88738,Sub-Saharan Africa,Lower middle income,AGO


In [207]:
# sum emissions and population for each region...
df_region = df.groupby('region')[['co2', 'totalpop']].sum()

# ...and then calculate emissions per capita
df_region['co2_pc'] = df_region['co2'] * 1000 / df_region['totalpop']

df_region

Unnamed: 0_level_0,co2,totalpop,co2_pc
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
East Asia & Pacific,14118290.0,2305740000.0,6.123106
Europe & Central Asia,6132830.0,915768700.0,6.69692
Latin America & Caribbean,1661730.0,636708500.0,2.609876
Middle East & North Africa,2523250.0,444405100.0,5.677815
North America,5555700.0,363903400.0,15.266965
South Asia,2770040.0,1814455000.0,1.526651
Sub-Saharan Africa,816360.0,1074867000.0,0.759499


In [208]:
# sum emissions and population for each region...
df_income = df.groupby('incomeLevel')[['co2', 'totalpop']].sum()

# ...and then calculate emissions per capita
df_income['co2_pc'] = df_income['co2'] * 1000 / df_income['totalpop']

# drop if index is equal to "Not classified"
df_income.drop('Not classified', inplace = True)

df_income

Unnamed: 0_level_0,co2,totalpop,co2_pc
incomeLevel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
High income,12134270.0,1169209000.0,10.378183
Low income,145090.0,627694400.0,0.231148
Lower middle income,5597900.0,3235463000.0,1.73017
Upper middle income,15562780.0,2494594000.0,6.238603


In [209]:
# list of the indicators that I want to download
indicators = [
    'SP.URB.TOTL.IN.ZS', 
    'NY.GDP.PCAP.CD',    
    'EG.ELC.ACCS.ZS',    
    'AG.LND.AGRI.ZS',    
    'NY.GDP.TOTL.RT.ZS',
    'EG.FEC.RNEW.ZS'     
]

# download data
df_ind = wb.download(
    indicator = indicators, 
    country = 'all', 
    start = 2018, 
    end = 2018).reset_index()

# give columns more intuitive names
df_ind.rename(columns = {'SP.URB.TOTL.IN.ZS' : 'urban',
                         'NY.GDP.PCAP.CD'    : 'gdp_pc',
                         'EG.ELC.ACCS.ZS'    : 'electricity',
                         'AG.LND.AGRI.ZS'    : 'agriculture',
                         'NY.GDP.TOTL.RT.ZS' : 'nat_resources',
                         'EG.FEC.RNEW.ZS'    : 'renew_energy'}, 
              inplace = True)

print('Number of rows: ' + str(len(df_ind)))
print('Number of countries: ' + str(df_ind['country'].nunique()))
df_ind.head()

Number of rows: 266
Number of countries: 266


Unnamed: 0,country,year,urban,gdp_pc,electricity,agriculture,nat_resources,renew_energy
0,Africa Eastern and Southern,2018,35.80777,1574.978648,42.168241,44.11798,7.234045,
1,Africa Western and Central,2018,46.633138,1704.139603,51.253253,39.953545,11.13215,
2,Arab World,2018,58.926595,6504.148493,88.306324,36.563558,22.881712,
3,Caribbean small states,2018,51.19935,10234.211179,97.735516,5.174386,5.218111,
4,Central Europe and the Baltics,2018,62.333667,16045.953635,100.0,46.687669,0.679358,


In [210]:
# left merge with data on emissions
df = df.merge(df_ind, on = ['country', 'year'], how = 'left')

print(len(df)) # print length to make sure that the merge worked as expected
df.head()

190


Unnamed: 0,country,year,co2,totalpop,co2_pc,region,incomeLevel,iso3c,urban,gdp_pc,electricity,agriculture,nat_resources,renew_energy
0,Afghanistan,2018,7440.0,37171922.0,0.200151,South Asia,Low income,AFG,25.495,485.668419,98.715622,58.081365,0.915366,21.422701
1,Albania,2018,5560.0,2866376.0,1.939732,Europe & Central Asia,Upper middle income,ALB,60.319,5287.663694,100.0,42.849672,1.818065,38.266399
2,Algeria,2018,151670.0,42228415.0,3.591657,Middle East & North Africa,Lower middle income,DZA,72.629,4142.018558,99.641922,17.364964,18.474129,0.1933
3,Andorra,2018,460.0,77008.0,5.973405,Europe & Central Asia,High income,AND,88.062,41791.969837,100.0,40.06383,0.0,18.506001
4,Angola,2018,27340.0,30809787.0,0.88738,Sub-Saharan Africa,Lower middle income,AGO,65.514,3289.643995,45.29,45.682594,27.42822,56.7855


In [211]:
# drop missing
df.dropna(inplace = True)

print('Number of final countries: ' + str(len(df)))
df.head()

Number of final countries: 185


Unnamed: 0,country,year,co2,totalpop,co2_pc,region,incomeLevel,iso3c,urban,gdp_pc,electricity,agriculture,nat_resources,renew_energy
0,Afghanistan,2018,7440.0,37171922.0,0.200151,South Asia,Low income,AFG,25.495,485.668419,98.715622,58.081365,0.915366,21.422701
1,Albania,2018,5560.0,2866376.0,1.939732,Europe & Central Asia,Upper middle income,ALB,60.319,5287.663694,100.0,42.849672,1.818065,38.266399
2,Algeria,2018,151670.0,42228415.0,3.591657,Middle East & North Africa,Lower middle income,DZA,72.629,4142.018558,99.641922,17.364964,18.474129,0.1933
3,Andorra,2018,460.0,77008.0,5.973405,Europe & Central Asia,High income,AND,88.062,41791.969837,100.0,40.06383,0.0,18.506001
4,Angola,2018,27340.0,30809787.0,0.88738,Sub-Saharan Africa,Lower middle income,AGO,65.514,3289.643995,45.29,45.682594,27.42822,56.7855


In [212]:
def get_emissions(country):
    
    # download data
    df_co2 = wb.download(indicator = ['EN.ATM.CO2E.KT', 'SP.POP.TOTL'], 
                         country = country, 
                         start = 2000, 
                         end = 2021).reset_index()

    # calculate tons C02 emissions per capita 
    # (notice that we multiply with 1000 to convert from kilo ton to ton)
    df_co2['co2'] = df_co2['EN.ATM.CO2E.KT'] #Total
    df_co2['co2_pc'] = df_co2['EN.ATM.CO2E.KT'] * 1000 / df_co2['SP.POP.TOTL'] # Per Capita
    df_co2 = df_co2[['year', 'co2_pc', 'co2']].copy()

    # convert year to int
    df_co2['year'] = df_co2['year'].astype(int)
    df_co2['iso3c'] = country

    # drop nan
    df_co2.dropna(inplace = True)
    
    # convert year to datatime ad set as index
    df_co2['year'] = pd.to_datetime(df_co2['year'], format = '%Y')
    df_co2.set_index('year', inplace = True)
    
    return df_co2


#df_co2 = get_emissions('USA').reset_index().sort_values(by='year')

df_co2 = pd.merge(df_co2,df[['iso3c','country']],on='iso3c', how='left')



In [213]:
import pandas as pd
import plotly.express as px
import pandas_datareader.data as web
from datetime import datetime
from jupyter_dash import JupyterDash
from dash import dcc, html
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
from dash_bootstrap_templates import load_figure_template
from pandas_datareader import wb      
from datetime import datetime, timedelta

In [214]:
dbc_css = 'https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates@V1.0.2/dbc.min.css'

app = JupyterDash(external_stylesheets = [dbc.themes.BOOTSTRAP, dbc_css])

In [215]:
#Create the different options in the program

options1 = [{'label' : 'CO2 Emissions per Capita', 'value' : 'co2_pc'},
           {'label' : 'Total CO2 Emissions', 'value' : 'co2'}]
options = []
for country in df['country'].unique():
    options.append({'label' : country, 'value' : country}) 

options2 = []

df['country'] = df['country'].unique()
df['iso3c'] = df['iso3c'].unique()
df = df.reset_index()

for i in range(len(df['country'].unique())):
    options2.append({'label': df['country'][i], 'value': df['iso3c'][i]})

In [216]:
multi_countries = dcc.Dropdown(
    id = 'multi_countries',                
    options = options, 
    value = options[1]['value'],   
    multi = True,
)

single_country = dcc.Dropdown(
    id = 'single_country',                
    options = options2, 
    value = options2[0]['value'],   
    multi = False
)

co2_type = dcc.RadioItems(
            id = 'type_co2',                                                             
            options = options1, 
            value = options1[0]['value'],                                             
)

template = 'bootstrap'
load_figure_template(template)

In [226]:
app = JupyterDash(external_stylesheets = [dbc.themes.BOOTSTRAP, dbc_css])

app.layout = dbc.Container(
    children = [
        
        # header
        html.H1('CO2 Emissions Around The World in 2018'),
        html.P('Examine the CO2 emissions around the world and factors driving it.'),
        
        
        html.Label('Choose which form of CO2 you want to display'),
        html.Br(),
        co2_type,
        
        # select multiple countries
        html.Label('Select countries:'),
        html.Br(),
        multi_countries,
        
        html.H3('CO2 Emissions By Region and Country'),
        
        # row with animated scatter plot
        dbc.Row(dcc.Graph(id = 'my_scatter')),
        html.Br(),
        
        # select single country
        
        html.Br(),
        html.H3('CO2 Emissions vs GDP per Capita'),
        
        # row with line plots
        dbc.Row(
            children = [
                dbc.Col(dcc.Graph(id = 'internet_plot'), width = 12)
            ]
        ),
        html.Br(),
        html.H3('CO2 Emissions vs Renewable Energy Consumption'),
        
        dbc.Row(
            children = [
                dbc.Col(dcc.Graph(id = 'renew_plot'), width = 12)
            ]
        ),
        html.Br(),
        html.H3('CO2 Emissions vs Natural Resources'),
        
        dbc.Row(
            children = [
                dbc.Col(dcc.Graph(id = 'life_plot'), width = 12)
            ]
        ),
        html.Br(),
        
        dbc.Row(
            children = [
                dbc.Col(dcc.Graph(id = 'map'), width = 12)
            ]),
        html.Br(), 
        html.H3('Single Country Emissions over Time'),
        html.Label('Select a single country:'),
        html.Br(),
        
        
        single_country,
        
        dbc.Row(
            children = [
                dbc.Col(dcc.Graph(id = 'series'), width = 12)
            ]
        )
        
    ],
    className = 'dbc'
)

@app.callback(
    Output('my_scatter', 'figure'), 
    Input('multi_countries', 'value'),
    Input('type_co2', 'value')
)
def wb_scatter(country_lst, xvar, df = df):

    df_no_nan = df.dropna(subset = ['co2_pc', 'co2', 'gdp_pc']).copy()
    df_no_nan.sort_values(['country'], inplace = True)

    df_no_nan['label'] = ''

    for country in country_lst:
        df_no_nan.loc[df_no_nan['country'] == country, 'label'] = country

    fig = px.scatter(
        df_no_nan,
        x = xvar,
        y = 'region',
        size = 'co2_pc',
        color = 'region',
        hover_name = 'country',
        hover_data = {'label' :False},
        text = 'label',
        animation_group = 'country'
    )

    fig.update_traces(textposition='top center') 

    fig.update_layout(
        yaxis_title = 'Region',
        xaxis_title = 'CO2 Emissions',
        #xticks="",
        showlegend = True,
        margin = {'l' : 0, 'r' : 0}
    )

    return fig


@app.callback(
    Output('internet_plot', 'figure'),
    Input('multi_countries', 'value'),
    Input('type_co2', 'value')
)

def gdp_scatter(country_lst, xvar, df = df): # as before, we give a default value to the df parameter

    df_no_nan = df.dropna(subset = ['co2_pc', 'co2', 'gdp_pc']).copy()
    df_no_nan.sort_values(['region'], inplace = True)

    df_no_nan['label'] = ''

    for country in country_lst:
        df_no_nan.loc[df_no_nan['country'] == country, 'label'] = country

    fig = px.scatter(
        df_no_nan,
        x = xvar,
        y = 'gdp_pc',
        size = 'co2_pc',
        color = 'region',
        hover_name = 'country',
        hover_data = {'label' :False},
        text = 'label',                           
        #animation_frame = 'year',     
        animation_group = 'country',
        trendline="ols",
        trendline_scope="overall",
        trendline_options=dict(log_x=True)  
    )

    

    fig.update_layout(
        yaxis_title = 'GDP Per Capita',
        xaxis_title = 'CO2 Emissions',
        
        showlegend = True,
        margin = {'l' : 0, 'r' : 0}
    )
    fig.update_traces(textposition='top center') 

    return fig

@app.callback(
    Output('life_plot', 'figure'),
    Input('multi_countries', 'value'),
    Input('type_co2', 'value')
)    
def natural_resource_scatter(country_lst, xvar, df = df): # as before, we give a default value to the df parameter

    df_no_nan = df.dropna(subset = ['co2_pc', 'co2', 'nat_resources']).copy()
    df_no_nan.sort_values(['region'], inplace = True)

    df_no_nan['label'] = ''

    for country in country_lst:
        df_no_nan.loc[df_no_nan['country'] == country, 'label'] = country

    fig = px.scatter(
        df_no_nan,
        x = xvar,
        y = 'nat_resources',
        size = 'co2_pc',
        color = 'region',
        hover_name = 'country',
        hover_data = {'label' :False},
        text = 'label',                           
        #animation_frame = 'year',     
        animation_group = 'country',
        trendline="ols",
        trendline_scope="overall",
        trendline_options=dict(log_x=True)  
    )

    fig.update_traces(textposition='top center') 

    fig.update_layout(
        yaxis_title = 'Natural Resources in Country',
        xaxis_title = 'CO2 Emissions',
        showlegend = True,
        margin = {'l' : 0, 'r' : 0}
    )

    return fig

@app.callback(
    Output('renew_plot', 'figure'),
    Input('multi_countries', 'value'),
    Input('type_co2', 'value')
)    
def renew_scatter(country_lst, xvar, df = df): # as before, we give a default value to the df parameter

    df_no_nan = df.dropna(subset = ['co2_pc', 'co2', 'renew_energy']).copy()
    df_no_nan.sort_values(['region'], inplace = True)

    df_no_nan['label'] = ''

    for country in country_lst:
        df_no_nan.loc[df_no_nan['country'] == country, 'label'] = country

    fig = px.scatter(
        df_no_nan,
        x = xvar,
        y = 'renew_energy',
        size = 'co2_pc',
        color = 'region',
        hover_name = 'country',
        hover_data = {'label' :False},
        text = 'label',                           
        #animation_frame = 'year',     
        animation_group = 'country',
        trendline="ols",
        trendline_scope="overall",
        trendline_options=dict(log_x=True)
        #range_x = [-5, 105], 
        #range_y = [35, 90],
    )

    

    fig.update_layout(
        yaxis_title = 'Renewable Energy Consumption',
        xaxis_title = 'CO2 Emissions',
        showlegend = True,
        margin = {'l' : 0, 'r' : 0}
    )
    fig.update_traces(textposition='top center') 
    

    return fig

@app.callback(
    Output('map', 'figure'),
    Input('multi_countries', 'value'),
    Input('type_co2', 'value')
)    

def return_map(country_lst, xvar, df = df):
    
    
    df_no_nan = df.dropna(subset = ['co2_pc', 'co2']).copy()
    df_no_nan.sort_values(['region'], inplace = True)

    df_no_nan['label'] = ''

    for country in country_lst:
        df_no_nan.loc[df_no_nan['country'] == country, 'label'] = country
        
    fig = px.scatter_geo(
        df_no_nan, 
        locations = 'iso3c',
        size = xvar,               
        hover_name = 'country',
        hover_data = {'country' : False, 'region' : False, 'label' :False, 'iso3c' : False},
        labels = {xvar : 'CO2 Emissions'},
        color = 'region',
        text='label'
)
    fig.update_traces(textposition='top center') 
    
    fig.update_layout(
        title = 'CO2 Emissions, 2018',
        title_x = 0.5,
        legend_title = True, 
        geo_showcountries = True,   
)
    
    return fig

#Create scatter time series CO2 against years

@app.callback(
    Output('series', 'figure'),
    Input('single_country', 'value'),
    Input('type_co2', 'value')
)    
def single_series(country_lst, xvar, df = df): # as before, we give a default value to the df parameter
    
    df_co2 = get_emissions(country_lst).reset_index().sort_values(by='year').copy().dropna()
    df_co2 = pd.merge(df_co2,df[['iso3c','country']],on='iso3c', how='left')
    
    df_co2['label'] = ''

    for country in country_lst:
        df_no_nan.loc[df_no_nan['country'] == country, 'label'] = country

    fig = px.line(
        df_co2,
        x = 'year',
        y = xvar,
        #size = xvar,
        #color = 'region',
        hover_name = 'country',
        hover_data = {'label' :False},
    )

    fig.update_layout(
        yaxis_title = 'CO2 Emissions',
        xaxis_title = 'Year',
        showlegend = True,
        margin = {'l' : 0, 'r' : 0}
    )

    return fig


app.run_server(port=8070) #Change port if necessary

Dash app running on http://127.0.0.1:8070/
