In [2]:
import pandas as pd
import pycountry
import country_converter

A helper for country matching:

In [142]:
def ISO3(country_name):
    matches = { ## Hardcoding Strings pycountry won't catch
        'Bolivia (Plurinational State of)':'BOL',
        'Democratic Republic of the Congo':'COD',
        'Dem. Rep. Congo': 'COD',
        'Iran (Islamic Republic of)':'IRN',
        'Micronesia (Federated States of)':'FSM',
        'Türkiye': 'TUR',
        'TÃ¼rkiye': 'TUR',
        'Venezuela (Bolivarian Republic of)':'VEN',
        'Hong Kong SAR, China' : 'HKG',
        'SÃ£o TomÃ© and Principe' : 'STP',
        'Swaziland' : 'SWZ',
        'Laos' : 'LAO',
        'Lao PDR' : 'LAO',
        'Cape Verde' : 'CPV',
        "CÃ´te d'Ivoire": 'CIV'
    }
    if country_name in matches:
        return matches[country_name]
    try:
        return pycountry.countries.search_fuzzy(country_name)[0].alpha_3
    except:
        print(f'No Match for {country_name}')
        return None

In [8]:
def ISO3(country_name):
    matches = { ## Hardcoding Strings pycountry won't catch
        'Bolivia (Plurinational State of)':'BOL',
        'Democratic Republic of the Congo':'COD',
        'Dem. Rep. Congo': 'COD',
        'Iran (Islamic Republic of)':'IRN',
        'Micronesia (Federated States of)':'FSM',
        'Türkiye': 'TUR',
        'TÃ¼rkiye': 'TUR',
        'Venezuela (Bolivarian Republic of)':'VEN',
        'Hong Kong SAR, China' : 'HKG',
        'SÃ£o TomÃ© and Principe' : 'STP',
        'Swaziland' : 'SWZ',
        'Laos' : 'LAO',
        'Lao PDR' : 'LAO',
        'Cape Verde' : 'CPV',
        "CÃ´te d'Ivoire": 'CIV'
    }
    if country_name in matches:
        return matches[country_name]
    try:
        return country_converter.convert(names=country_name)
    except:
        print(f'No Match for {country_name}')
        return None

Population Data:

In [9]:
pop_df = pd.read_excel("WPP2022_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT_REV1.xlsx", skiprows=16)
pop_df = pop_df[['Region, subregion, country or area *', 'ISO3 Alpha-code','Year', 'Total Population, as of 1 July (thousands)']]
pop_df.columns = ['Country', 'ISO3', 'Year', 'Population']

Carbon Data:

In [10]:
# Data from: https://www.icos-cp.eu/science-and-impact/global-carbon-budget/2022
co2_df = pd.read_excel("National_Fossil_Carbon_Emissions_2022v1.0.xlsx", sheet_name='Territorial Emissions', skiprows=11)
co2_df = co2_df.T
co2_df.columns = co2_df.iloc[0,:]
co2_df = co2_df.iloc[1:,:]
co2_df['Country'] = co2_df.index
co2_df = co2_df.melt('Country', var_name='Year', value_name='Total Emmisions')


Finding ISO3s for for CO2 data and then joining population data:

In [11]:
countries = set(co2_df.Country)
iso3s = [ISO3(country) for country in countries]
iso3s = dict(zip(countries, iso3s))
co2_df['ISO3']  = co2_df.apply(lambda row: iso3s[row.Country] , axis = 1)
co2_df = pd.merge(co2_df, pop_df, on=['ISO3', 'Year'], how='inner')
co2_df = co2_df[['ISO3', 'Country_x', 'Year', 'Total Emmisions', 'Population']]
co2_df.columns = ['ISO3', 'Country', 'Year', 'Total Emmisions', 'Population']
co2_df['Emmisions Per Capita'] = co2_df['Total Emmisions']/co2_df['Population']
co2_df

Non-OECD not found in regex
Oceania not found in regex
Bunkers not found in regex
South America not found in regex
Statistical Difference not found in regex
Central America not found in regex
Non KP Annex B not found in regex
Europe not found in regex
Middle East not found in regex
Asia not found in regex
KP Annex B not found in regex
EU27 not found in regex
North America not found in regex
World not found in regex
OECD not found in regex
Africa not found in regex


Unnamed: 0,ISO3,Country,Year,Total Emmisions,Population,Emmisions Per Capita
0,AFG,Afghanistan,1950.0,0.023000,7480.461,0.000003
1,ALB,Albania,1950.0,0.080984,1252.582,0.000065
2,DZA,Algeria,1950.0,1.032643,9019.866,0.000114
3,AND,Andorra,1950.0,,6.005,
4,AGO,Angola,1950.0,0.051000,4478.184,0.000011
...,...,...,...,...,...,...
15763,VNM,Viet Nam,2021.0,88.977529,97468.029,0.000913
15764,WLF,Wallis and Futuna Islands,2021.0,0.007592,11.627,0.000653
15765,YEM,Yemen,2021.0,3.405185,32981.641,0.000103
15766,ZMB,Zambia,2021.0,2.095027,19473.125,0.000108


Then GDP Data:

In [13]:
gdp_df = pd.read_csv('UNdata_Export_20230102_161208339.csv').iloc[:,:-1]
gdp_df.columns = ['Country', 'Year', 'GDP Pc']

countries = set(gdp_df.Country)
iso3s = [ISO3(country) for country in countries]
iso3s = dict(zip(countries, iso3s))
gdp_df['ISO3']  = gdp_df.apply(lambda row: iso3s[row.Country] , axis = 1)


gdp_df


South Asia not found in regex
Heavily indebted poor countries (HIPC) not found in regex
Fragile and conflict affected situations not found in regex
Africa Western and Central not found in regex
Central Europe and the Baltics not found in regex
Other small states not found in regex
Pre-demographic dividend not found in regex
IDA total not found in regex
Low income not found in regex
Sub-Saharan Africa ( not found in regex
European Union not found in regex
South Asia (IDA & IBRD) not found in regex
Lower middle income not found in regex
IDA only not found in regex
Euro area not found in regex
East Asia & Pacific not found in regex
Middle East & North Africa not found in regex
Sub-Saharan Africa not found in regex
Small states not found in regex
Middle income not found in regex
Upper middle income not found in regex
Post-demographic dividend not found in regex
Early-demographic dividend not found in regex
Pacific island small states not found in regex
Sub-Saharan Africa (IDA & IBRD) not f

Unnamed: 0,Country,Year,GDP Pc,ISO3
0,Afghanistan,2020,2078.479082,AFG
1,Afghanistan,2019,2152.190243,AFG
2,Afghanistan,2018,2082.392197,AFG
3,Afghanistan,2017,2058.400221,AFG
4,Afghanistan,2016,1981.118069,AFG
...,...,...,...,...
7238,Zimbabwe,1994,1923.209711,ZWE
7239,Zimbabwe,1993,1750.061859,ZWE
7240,Zimbabwe,1992,1721.591128,ZWE
7241,Zimbabwe,1991,1888.041180,ZWE


Then joining GDP data with CO<sub>2</sub> data and getting the most recent year for each:

In [14]:
df = pd.merge(co2_df, gdp_df, how='inner', on=['ISO3', 'Year'])
df = df[~df.Country_x.isin(['Mayotte', 'Niger', 'Martinique', 'Guadeloupe', 'Curaçao'])] #Having issues with Dependent Territories receiving European ISO3 (and so pop)


df = df[ (df.ISO3!=None) & (df['Emmisions Per Capita'].notnull()) ]
df = df[df['Year'] == df.groupby('ISO3')['Year'].transform('max')]

and finally, merging in Regions:

In [15]:
region_df = pd.read_csv('regions.csv')
region_df = region_df[['alpha-3', 'region']]
region_df.columns = ['ISO3', 'Region']

df = pd.merge(df, region_df, how='inner', on=['ISO3'])

and just selecting the columns of interest:

In [16]:
df = df.loc[:, df.columns != 'Country_y']
df = df.rename(columns={'Country_x':'Country'})
df

Unnamed: 0,ISO3,Country,Year,Total Emmisions,Population,Emmisions Per Capita,GDP Pc,Region
0,ERI,Eritrea,2011.0,0.151701,3207.57,0.000047,1625.507673,Africa
1,VEN,Venezuela,2011.0,48.019916,29096.159,0.00165,17527.748738,Americas
2,YEM,Yemen,2013.0,7.412774,26984.002,0.000275,3688.519409,Asia
3,SSD,South Sudan,2015.0,0.522000,11194.299,0.000047,1234.725576,Africa
4,SXM,Sint Maarten (Dutch part),2018.0,0.188000,42.246,0.00445,35973.182502,Americas
...,...,...,...,...,...,...,...,...
187,UZB,Uzbekistan,2020.0,32.270660,33526.656,0.000963,7746.430969,Asia
188,VUT,Vanuatu,2020.0,0.046438,311.685,0.000149,3036.116767,Oceania
189,VNM,Viet Nam,2020.0,89.765211,96648.685,0.000929,10904.452146,Asia
190,ZMB,Zambia,2020.0,1.987080,18927.715,0.000105,3457.328102,Africa


In [17]:
with open('emmisions_gdp.json', 'w') as f:
    df.to_json(f, orient='records')

### Second Graph: Consumption Emmisions Per Capita Per Year 

In [18]:
# Data from: https://www.icos-cp.eu/science-and-impact/global-carbon-budget/2022
cons_df = pd.read_excel("National_Fossil_Carbon_Emissions_2022v1.0.xlsx", sheet_name='Consumption Emissions', skiprows=8)
cons_df = cons_df.T
cons_df.columns = cons_df.iloc[0,:]
cons_df = cons_df.iloc[1:,:]
cons_df['Country'] = cons_df.index
cons_df = cons_df.melt('Country', var_name='Year', value_name='Consumption Emmisions')

#Getting ISO3s
countries = set(cons_df.Country)
iso3s = [ISO3(country) for country in countries]
iso3s = dict(zip(countries, iso3s))
cons_df['ISO3']  = cons_df.apply(lambda row: iso3s[row.Country] , axis = 1)

#Merging in Population Data
cons_df = pd.merge(cons_df, pop_df, on=['ISO3', 'Year'], how='inner')
cons_df = cons_df[['ISO3', 'Country_x', 'Year', 'Consumption Emmisions', 'Population']]
cons_df.columns = ['ISO3', 'Country', 'Year', 'Consumption Emmisions', 'Population']
cons_df['Consumption Emmisions Per Capita'] = cons_df['Consumption Emmisions']/cons_df['Population']
cons_df = cons_df[cons_df['Consumption Emmisions Per Capita'].notnull()]

#Ints for years
cons_df['Year'] = cons_df['Year'].astype(int)

#Filtering Country Range
cons_df = cons_df[cons_df.ISO3.isin(['CHN', 'USA', 'GBR', 'FRA', 'DEU', 'ITA', 'POL' ])].sort_values(by=['Year'])

Non-OECD not found in regex
Oceania not found in regex
Bunkers not found in regex
South America not found in regex
Statistical Difference not found in regex
Central America not found in regex
Non KP Annex B not found in regex
Europe not found in regex
Middle East not found in regex
Asia not found in regex
KP Annex B not found in regex
EU27 not found in regex
North America not found in regex
World not found in regex
OECD not found in regex
Africa not found in regex


In [19]:
cons_df

Unnamed: 0,ISO3,Country,Year,Consumption Emmisions,Population,Consumption Emmisions Per Capita
38,CHN,China,1990,633.474245,1153704.252,0.000549
67,FRA,France,1990,134.440030,56412.897,0.002383
73,DEU,Germany,1990,325.294796,79370.196,0.004098
94,ITA,Italy,1990,154.732791,56756.561,0.002726
154,POL,Poland,1990,87.980696,38064.255,0.002311
...,...,...,...,...,...,...
6637,FRA,France,2020,102.436338,64480.053,0.001589
6643,DEU,Germany,2020,210.014207,83328.988,0.00252
6664,ITA,Italy,2020,106.119463,59500.579,0.001784
6724,POL,Poland,2020,78.352677,38428.366,0.002039


In [20]:
with open('consumption.json', 'w') as f:
    cons_df.to_json(f, orient='records')