In [119]:
import pandas as pd
import pycountry

A helper for country matching:

In [120]:
def ISO3(country_name):
    matches = { ## Hardcoding Strings pycountry won't catch
        'Bolivia (Plurinational State of)':'BOL',
        'Democratic Republic of the Congo':'COD',
        'Dem. Rep. Congo': 'COD',
        'Iran (Islamic Republic of)':'IRN',
        'Micronesia (Federated States of)':'FSM',
        'Türkiye': 'TUR',
        'TÃ¼rkiye': 'TUR',
        'Venezuela (Bolivarian Republic of)':'VEN',
        'Hong Kong SAR, China' : 'HKG',
        'SÃ£o TomÃ© and Principe' : 'STP',
        'Swaziland' : 'SWZ',
        'Laos' : 'LAO',
        'Lao PDR' : 'LAO',
        'Cape Verde' : 'CPV',
        "CÃ´te d'Ivoire": 'CIV'
    }
    if country_name in matches:
        return matches[country_name]
    try:
        return pycountry.countries.search_fuzzy(country_name)[0].alpha_3
    except:
        print(f'No Match for {country_name}')
        return None

Population Data:

In [121]:
pop_df = pd.read_excel("WPP2022_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT_REV1.xlsx", skiprows=16)
pop_df = pop_df[['Region, subregion, country or area *', 'ISO3 Alpha-code','Year', 'Total Population, as of 1 July (thousands)']]
pop_df.columns = ['Country', 'ISO3', 'Year', 'Population']

Carbon Data:

In [122]:
# Data from: https://www.icos-cp.eu/science-and-impact/global-carbon-budget/2022
co2_df = pd.read_excel("National_Fossil_Carbon_Emissions_2022v1.0.xlsx", sheet_name='Territorial Emissions', skiprows=11)
co2_df = co2_df.T
co2_df.columns = co2_df.iloc[0,:]
co2_df = co2_df.iloc[1:,:]
co2_df['Country'] = co2_df.index
co2_df = co2_df.melt('Country', var_name='Year', value_name='Total Emmisions')


Finding ISO3s for for CO2 data and then joining population data:

In [123]:
countries = set(co2_df.Country)
iso3s = [ISO3(country) for country in countries]
iso3s = dict(zip(countries, iso3s))
co2_df['ISO3']  = co2_df.apply(lambda row: iso3s[row.Country] , axis = 1)
co2_df = pd.merge(co2_df, pop_df, on=['ISO3', 'Year'], how='inner')
co2_df = co2_df[['ISO3', 'Country_x', 'Year', 'Total Emmisions', 'Population']]
co2_df.columns = ['ISO3', 'Country', 'Year', 'Total Emmisions', 'Population']
co2_df['Emmisions Per Capita'] = co2_df['Total Emmisions']/co2_df['Population']
co2_df

No Match for EU27
No Match for OECD
No Match for World
No Match for South America
No Match for Europe
No Match for Wallis and Futuna Islands
No Match for Faeroe Islands
No Match for Non KP Annex B
No Match for North America
No Match for Central America
No Match for Non-OECD
No Match for Statistical Difference
No Match for Bonaire, Saint Eustatius and Saba
No Match for Occupied Palestinian Territory
No Match for Bunkers
No Match for KP Annex B
No Match for Oceania
No Match for Asia
No Match for Middle East


Unnamed: 0,ISO3,Country,Year,Total Emmisions,Population,Emmisions Per Capita
0,AFG,Afghanistan,1950.0,0.023000,7480.461,0.000003
1,ALB,Albania,1950.0,0.080984,1252.582,0.000065
2,DZA,Algeria,1950.0,1.032643,9019.866,0.000114
3,AND,Andorra,1950.0,,6.005,
4,AGO,Angola,1950.0,0.051000,4478.184,0.000011
...,...,...,...,...,...,...
82579,VEN,Venezuela,2021.0,21.764902,28199.867,0.000772
82580,VNM,Viet Nam,2021.0,88.977529,97468.029,0.000913
82581,YEM,Yemen,2021.0,3.405185,32981.641,0.000103
82582,ZMB,Zambia,2021.0,2.095027,19473.125,0.000108


Then GDP Data:

In [124]:
gdp_df = pd.read_csv('UNdata_Export_20230102_161208339.csv').iloc[:,:-1]
gdp_df.columns = ['Country', 'Year', 'GDP Pc']

countries = set(gdp_df.Country)
iso3s = [ISO3(country) for country in countries]
iso3s = dict(zip(countries, iso3s))
gdp_df['ISO3']  = gdp_df.apply(lambda row: iso3s[row.Country] , axis = 1)


gdp_df


No Match for Low & middle income
No Match for St. Kitts and Nevis
No Match for Pacific island small states
No Match for OECD members
No Match for IBRD only
No Match for East Asia & Pacific
No Match for Arab World
No Match for Least developed countries: UN classification
No Match for IDA total
No Match for Latin America & Caribbean (excluding high income)
No Match for Middle income
No Match for Africa Eastern and Southern
No Match for Late-demographic dividend
No Match for Sub-Saharan Africa
No Match for World
No Match for Sub-Saharan Africa (IDA & IBRD)
No Match for High income
No Match for Lower middle income
No Match for CuraÃ§ao
No Match for IDA only
No Match for Euro area
No Match for Middle East & North Africa
No Match for Latin America & Caribbean
No Match for Europe & Central Asia (excluding high income)
No Match for Middle East & North Africa (excluding high income)
No Match for Upper middle income
No Match for Heavily indebted poor countries (HIPC)
No Match for Latin America &

Unnamed: 0,Country,Year,GDP Pc,ISO3
0,Afghanistan,2020,2078.479082,AFG
1,Afghanistan,2019,2152.190243,AFG
2,Afghanistan,2018,2082.392197,AFG
3,Afghanistan,2017,2058.400221,AFG
4,Afghanistan,2016,1981.118069,AFG
...,...,...,...,...
7238,Zimbabwe,1994,1923.209711,ZWE
7239,Zimbabwe,1993,1750.061859,ZWE
7240,Zimbabwe,1992,1721.591128,ZWE
7241,Zimbabwe,1991,1888.041180,ZWE


Then joining GDP data with CO<sub>2</sub> data and getting the most recent year for each:

In [125]:
df = pd.merge(co2_df, gdp_df, how='inner', on=['ISO3', 'Year'])
df = df[ (df.ISO3!=None) & (df['Emmisions Per Capita'].notnull()) ]
df = df[df['Year'] == df.groupby('ISO3')['Year'].transform('max')]

and finally, merging in Regions:

In [126]:
region_df = pd.read_csv('regions.csv')
region_df = region_df[['alpha-3', 'region']]
region_df.columns = ['ISO3', 'Region']

df = pd.merge(df, region_df, how='inner', on=['ISO3'])

and just selecting the columns of interest:

In [127]:
df = df.loc[:, df.columns != 'Country_y']
df = df.rename(columns={'Country_x':'Country'})
df

Unnamed: 0,ISO3,Country,Year,Total Emmisions,Population,Emmisions Per Capita,GDP Pc,Region
0,ERI,Eritrea,2011.0,0.151701,3207.57,0.000047,1625.507673,Africa
1,VEN,Venezuela,2011.0,48.019916,29096.159,0.00165,17527.748738,Americas
2,YEM,Yemen,2013.0,7.412774,26984.002,0.000275,3688.519409,Asia
3,SSD,South Sudan,2015.0,0.522000,11194.299,0.000047,1234.725576,Africa
4,SXM,Sint Maarten (Dutch part),2018.0,0.188000,42.246,0.00445,35973.182502,Americas
...,...,...,...,...,...,...,...,...
193,UZB,Uzbekistan,2020.0,32.270660,33526.656,0.000963,7746.430969,Asia
194,VUT,Vanuatu,2020.0,0.046438,311.685,0.000149,3036.116767,Oceania
195,VNM,Viet Nam,2020.0,89.765211,96648.685,0.000929,10904.452146,Asia
196,ZMB,Zambia,2020.0,1.987080,18927.715,0.000105,3457.328102,Africa


In [130]:
with open('emmisions_gdp.json', 'w') as f:
    df.to_json(f, orient='records')

### Second Graph: Consumption Emmisions Per Capita Per Year 

In [98]:
# Data from: https://www.icos-cp.eu/science-and-impact/global-carbon-budget/2022
cons_df = pd.read_excel("National_Fossil_Carbon_Emissions_2022v1.0.xlsx", sheet_name='Consumption Emissions', skiprows=8)
cons_df = cons_df.T
cons_df.columns = cons_df.iloc[0,:]
cons_df = cons_df.iloc[1:,:]
cons_df['Country'] = cons_df.index
cons_df = cons_df.melt('Country', var_name='Year', value_name='Consumption Emmisions')

#Getting ISO3s
countries = set(cons_df.Country)
iso3s = [ISO3(country) for country in countries]
iso3s = dict(zip(countries, iso3s))
cons_df['ISO3']  = cons_df.apply(lambda row: iso3s[row.Country] , axis = 1)

#Merging in Population Data
cons_df = pd.merge(cons_df, pop_df, on=['ISO3', 'Year'], how='inner')
cons_df = cons_df[['ISO3', 'Country_x', 'Year', 'Consumption Emmisions', 'Population']]
cons_df.columns = ['ISO3', 'Country', 'Year', 'Consumption Emmisions', 'Population']
cons_df['Consumption Emmisions Per Capita'] = cons_df['Consumption Emmisions']/cons_df['Population']
cons_df = cons_df[cons_df['Consumption Emmisions Per Capita'].notnull()]

No Match for EU27
No Match for OECD
No Match for World
No Match for South America
No Match for Europe
No Match for Wallis and Futuna Islands
No Match for Faeroe Islands
No Match for Non KP Annex B
No Match for North America
No Match for Central America
No Match for Non-OECD
No Match for Statistical Difference
No Match for Bonaire, Saint Eustatius and Saba
No Match for Occupied Palestinian Territory
No Match for Bunkers
No Match for KP Annex B
No Match for Oceania
No Match for Asia
No Match for Middle East


In [97]:
cons_df = cons_df[cons_df.ISO3.isin(['CHN', 'USA', 'GBR', 'FRA', 'DEU', 'ITA', 'POL' ])].sort_values(by=['Year'])

Unnamed: 0,ISO3,Country,Year,Consumption Emmisions,Population,Consumption Emmisions Per Capita
968,CHN,China,1990.0,633.474245,1153704.252,0.000549
997,FRA,France,1990.0,134.440030,56412.897,0.002383
1006,DEU,Germany,1990.0,325.294796,79370.196,0.004098
1026,ITA,Italy,1990.0,154.732791,56756.561,0.002726
1083,POL,Poland,1990.0,87.980696,38064.255,0.002311
...,...,...,...,...,...,...
35407,FRA,France,2020.0,102.436338,64480.053,0.001589
35416,DEU,Germany,2020.0,210.014207,83328.988,0.00252
35436,ITA,Italy,2020.0,106.119463,59500.579,0.001784
35493,POL,Poland,2020.0,78.352677,38428.366,0.002039
