In [1]:
#import dependencies
import psycopg2
import pandas as pd
import datetime as dt

import warnings
warnings.filterwarnings('ignore')


In [2]:
emissions = pd.read_csv('global_emissions.csv')
emissions

Unnamed: 0,iso_code,country,year,temperature(C),population,gdp,co2,share_global_co2,co2_per_capita,co2_per_gdp,co2_growth_prct,co2_growth_abs,cumulative_co2,share_global_cumulative_co2
0,AFG,Afghanistan,1950,11.619864,7752000.0,1.949480e+10,0.084,0.001,0.011,0.004,475.000,0.070,0.099,0.000
1,AFG,Afghanistan,1951,12.647118,7840000.0,2.006385e+10,0.092,0.001,0.012,0.005,8.696,0.007,0.191,0.000
2,AFG,Afghanistan,1953,13.272074,8040000.0,2.201546e+10,0.106,0.002,0.013,0.005,16.000,0.015,0.388,0.000
3,AFG,Afghanistan,1955,13.275182,8271000.0,2.292989e+10,0.154,0.002,0.019,0.007,44.828,0.048,0.649,0.000
4,AFG,Afghanistan,1956,12.785808,8399000.0,2.395993e+10,0.183,0.002,0.022,0.008,19.048,0.029,0.832,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11268,ZWE,Zimbabwe,2012,21.910075,13115000.0,2.048226e+10,7.659,0.022,0.584,0.374,-19.213,-1.821,690.720,0.049
11269,ZWE,Zimbabwe,2013,21.624350,13350000.0,2.374258e+10,11.617,0.033,0.870,0.489,51.675,3.958,702.337,0.049
11270,ZWE,Zimbabwe,2014,21.710483,13587000.0,2.474828e+10,11.973,0.034,0.881,0.484,3.065,0.356,714.309,0.048
11271,ZWE,Zimbabwe,2015,22.327625,13815000.0,2.503057e+10,12.170,0.035,0.881,0.486,1.653,0.198,726.480,0.048


In [3]:
# how many countries in merged dataset?
countrylist = emissions['country'].unique().tolist()
len(countrylist)

160

In [4]:
countrylist

['Afghanistan',
 'Albania',
 'Algeria',
 'Angola',
 'Argentina',
 'Armenia',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Benin',
 'Bolivia',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Cape Verde',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Colombia',
 'Comoros',
 'Republic of the Congo ',
 'Costa Rica',
 'Croatia',
 'Cuba',
 'Cyprus',
 'Czech Republic',
 'Democratic Republic of Congo',
 'Denmark',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Equatorial Guinea',
 'Estonia',
 'Ethiopia',
 'Finland',
 'France',
 'Gabon',
 'Gambia',
 'Georgia',
 'Germany',
 'Ghana',
 'Greece',
 'Guatemala',
 'Guinea',
 'Guinea-Bissau',
 'Haiti',
 'Honduras',
 'Hungary',
 'Iceland',
 'India',
 'Indonesia',
 'Iran',
 'Iraq',
 'Ireland',
 'Israel',
 'Italy',
 'Jamaica',
 'Japan',
 'Jordan',
 'Kazakhs

In [5]:
# create new column for 'gdp per capita'
emissions['gdp_per_capita'] = emissions['gdp'] / emissions['population']
emissions = emissions[['iso_code', 'country', 'year', 'temperature(C)', 'population', 'gdp', 'co2', 'co2_per_capita', 'gdp_per_capita', 'co2_per_gdp', 'share_global_co2', 'co2_growth_prct', 'co2_growth_abs', 'cumulative_co2', 'share_global_cumulative_co2']]
emissions.head()

Unnamed: 0,iso_code,country,year,temperature(C),population,gdp,co2,co2_per_capita,gdp_per_capita,co2_per_gdp,share_global_co2,co2_growth_prct,co2_growth_abs,cumulative_co2,share_global_cumulative_co2
0,AFG,Afghanistan,1950,11.619864,7752000.0,19494800000.0,0.084,0.011,2514.808999,0.004,0.001,475.0,0.07,0.099,0.0
1,AFG,Afghanistan,1951,12.647118,7840000.0,20063850000.0,0.092,0.012,2559.164343,0.005,0.001,8.696,0.007,0.191,0.0
2,AFG,Afghanistan,1953,13.272074,8040000.0,22015460000.0,0.106,0.013,2738.241719,0.005,0.002,16.0,0.015,0.388,0.0
3,AFG,Afghanistan,1955,13.275182,8271000.0,22929890000.0,0.154,0.019,2772.323695,0.007,0.002,44.828,0.048,0.649,0.0
4,AFG,Afghanistan,1956,12.785808,8399000.0,23959930000.0,0.183,0.022,2852.712089,0.008,0.002,19.048,0.029,0.832,0.0


In [6]:
data_2016 = emissions.loc[emissions['year'] == 2016]
#export new combined dataframe as csv
data_2016.to_csv("Data2016.csv", index=False)

To determine a baseline for whether a country is a disproportionate polluter, I broke out the data for 2016, the most recent year in the dataset. I exported it as a csv to get a full look at the data and pick a baseline for a "bad" ratio of co2 to gdp.  

In [7]:
data_2016

Unnamed: 0,iso_code,country,year,temperature(C),population,gdp,co2,co2_per_capita,gdp_per_capita,co2_per_gdp,share_global_co2,co2_growth_prct,co2_growth_abs,cumulative_co2,share_global_cumulative_co2
64,AFG,Afghanistan,2016,14.513565,35383000.0,6.429743e+10,6.745,0.191,1817.184155,0.105,0.019,-14.670,-1.160,153.343,0.010
131,ALB,Albania,2016,12.722338,2886000.0,3.435154e+10,4.496,1.558,11902.820590,0.131,0.013,-0.808,-0.037,266.109,0.017
178,DZA,Algeria,2016,24.067067,40551000.0,5.340000e+11,148.839,3.670,13168.602501,0.279,0.423,-1.312,-1.979,3956.021,0.256
220,AGO,Angola,2016,22.555908,28842000.0,1.480000e+11,34.111,1.183,5131.405589,0.230,0.097,0.388,0.132,597.797,0.039
335,ARG,Argentina,2016,14.506255,43508000.0,8.200000e+11,190.930,4.388,18847.108578,0.233,0.542,-0.747,-1.436,7737.747,0.501
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11005,VEN,Venezuela,2016,26.478200,29851000.0,3.960000e+11,163.191,5.467,13265.887240,0.412,0.463,-4.511,-7.709,7348.288,0.476
11073,VNM,Vietnam,2016,25.073158,93640000.0,5.750000e+11,185.432,1.980,6140.538232,0.322,0.526,0.540,0.997,2993.867,0.194
11139,YEM,Yemen,2016,24.221200,27168000.0,6.272208e+10,10.426,0.384,2308.674789,0.166,0.030,-19.840,-2.580,585.103,0.038
11205,ZMB,Zambia,2016,22.737050,16363000.0,5.757741e+10,4.838,0.296,3518.756375,0.084,0.014,12.623,0.542,214.398,0.014


After looking at the 2016 data in Excel, i've determined it's useful to separate countries into categories based on their emissions compared to other factors.  

To show the relationship between each country's co2 output and economic outout, the co2 to gdp column is compared by country.  A ratio of co2 to gdp of over 0.3 is chosen as a cutoff between 'high' and 'low' emission ratio countries, as it encompasses the world's largest economies and some surprising countries, giving a good baseline for analysis. So, countries with a ratio of co2 to gdp over .3 will be labeled as 'high', and those below .3 will be labeled as 'low,' in a new column called 'Emission Ratio'

A column will also be created to show a country's impact compared on a global scale, by measuring its share of global co2 against it's population.  A high ratio shows that that country emits a share of global pollution that is disproportionate to its population size. Share global co2 divided by population (times 100000000 for readability). A ratio over 4 will rate as 'high', between 4 and 1 will rate as 'medium', and less than 1 will rate as low.


In [8]:
# new column for global co2 divided by population

emissions['global_co2_to_pop'] = (emissions['share_global_co2'] / emissions['population'])*100000000
emissions

Unnamed: 0,iso_code,country,year,temperature(C),population,gdp,co2,co2_per_capita,gdp_per_capita,co2_per_gdp,share_global_co2,co2_growth_prct,co2_growth_abs,cumulative_co2,share_global_cumulative_co2,global_co2_to_pop
0,AFG,Afghanistan,1950,11.619864,7752000.0,1.949480e+10,0.084,0.011,2514.808999,0.004,0.001,475.000,0.070,0.099,0.000,0.012900
1,AFG,Afghanistan,1951,12.647118,7840000.0,2.006385e+10,0.092,0.012,2559.164343,0.005,0.001,8.696,0.007,0.191,0.000,0.012755
2,AFG,Afghanistan,1953,13.272074,8040000.0,2.201546e+10,0.106,0.013,2738.241719,0.005,0.002,16.000,0.015,0.388,0.000,0.024876
3,AFG,Afghanistan,1955,13.275182,8271000.0,2.292989e+10,0.154,0.019,2772.323695,0.007,0.002,44.828,0.048,0.649,0.000,0.024181
4,AFG,Afghanistan,1956,12.785808,8399000.0,2.395993e+10,0.183,0.022,2852.712089,0.008,0.002,19.048,0.029,0.832,0.000,0.023812
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11268,ZWE,Zimbabwe,2012,21.910075,13115000.0,2.048226e+10,7.659,0.584,1561.743118,0.374,0.022,-19.213,-1.821,690.720,0.049,0.167747
11269,ZWE,Zimbabwe,2013,21.624350,13350000.0,2.374258e+10,11.617,0.870,1778.470621,0.489,0.033,51.675,3.958,702.337,0.049,0.247191
11270,ZWE,Zimbabwe,2014,21.710483,13587000.0,2.474828e+10,11.973,0.881,1821.467867,0.484,0.034,3.065,0.356,714.309,0.048,0.250239
11271,ZWE,Zimbabwe,2015,22.327625,13815000.0,2.503057e+10,12.170,0.881,1811.840028,0.486,0.035,1.653,0.198,726.480,0.048,0.253348


In [9]:
# create new column labelling data as 'high' or 'low' based on co2_per_gdp

emissions['emission_ratio'] = emissions['co2_per_gdp'].apply(lambda x: 'High' if x > 0.3 else 'Low')
emissions

Unnamed: 0,iso_code,country,year,temperature(C),population,gdp,co2,co2_per_capita,gdp_per_capita,co2_per_gdp,share_global_co2,co2_growth_prct,co2_growth_abs,cumulative_co2,share_global_cumulative_co2,global_co2_to_pop,emission_ratio
0,AFG,Afghanistan,1950,11.619864,7752000.0,1.949480e+10,0.084,0.011,2514.808999,0.004,0.001,475.000,0.070,0.099,0.000,0.012900,Low
1,AFG,Afghanistan,1951,12.647118,7840000.0,2.006385e+10,0.092,0.012,2559.164343,0.005,0.001,8.696,0.007,0.191,0.000,0.012755,Low
2,AFG,Afghanistan,1953,13.272074,8040000.0,2.201546e+10,0.106,0.013,2738.241719,0.005,0.002,16.000,0.015,0.388,0.000,0.024876,Low
3,AFG,Afghanistan,1955,13.275182,8271000.0,2.292989e+10,0.154,0.019,2772.323695,0.007,0.002,44.828,0.048,0.649,0.000,0.024181,Low
4,AFG,Afghanistan,1956,12.785808,8399000.0,2.395993e+10,0.183,0.022,2852.712089,0.008,0.002,19.048,0.029,0.832,0.000,0.023812,Low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11268,ZWE,Zimbabwe,2012,21.910075,13115000.0,2.048226e+10,7.659,0.584,1561.743118,0.374,0.022,-19.213,-1.821,690.720,0.049,0.167747,High
11269,ZWE,Zimbabwe,2013,21.624350,13350000.0,2.374258e+10,11.617,0.870,1778.470621,0.489,0.033,51.675,3.958,702.337,0.049,0.247191,High
11270,ZWE,Zimbabwe,2014,21.710483,13587000.0,2.474828e+10,11.973,0.881,1821.467867,0.484,0.034,3.065,0.356,714.309,0.048,0.250239,High
11271,ZWE,Zimbabwe,2015,22.327625,13815000.0,2.503057e+10,12.170,0.881,1811.840028,0.486,0.035,1.653,0.198,726.480,0.048,0.253348,High


In [10]:
# new column to assess global impact ratio
emissions['global_impact'] = emissions['global_co2_to_pop'].apply(lambda x: 'High' if x >= 4 else ('Medium' if x >= 1 else 'Low'))
emissions

Unnamed: 0,iso_code,country,year,temperature(C),population,gdp,co2,co2_per_capita,gdp_per_capita,co2_per_gdp,share_global_co2,co2_growth_prct,co2_growth_abs,cumulative_co2,share_global_cumulative_co2,global_co2_to_pop,emission_ratio,global_impact
0,AFG,Afghanistan,1950,11.619864,7752000.0,1.949480e+10,0.084,0.011,2514.808999,0.004,0.001,475.000,0.070,0.099,0.000,0.012900,Low,Low
1,AFG,Afghanistan,1951,12.647118,7840000.0,2.006385e+10,0.092,0.012,2559.164343,0.005,0.001,8.696,0.007,0.191,0.000,0.012755,Low,Low
2,AFG,Afghanistan,1953,13.272074,8040000.0,2.201546e+10,0.106,0.013,2738.241719,0.005,0.002,16.000,0.015,0.388,0.000,0.024876,Low,Low
3,AFG,Afghanistan,1955,13.275182,8271000.0,2.292989e+10,0.154,0.019,2772.323695,0.007,0.002,44.828,0.048,0.649,0.000,0.024181,Low,Low
4,AFG,Afghanistan,1956,12.785808,8399000.0,2.395993e+10,0.183,0.022,2852.712089,0.008,0.002,19.048,0.029,0.832,0.000,0.023812,Low,Low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11268,ZWE,Zimbabwe,2012,21.910075,13115000.0,2.048226e+10,7.659,0.584,1561.743118,0.374,0.022,-19.213,-1.821,690.720,0.049,0.167747,High,Low
11269,ZWE,Zimbabwe,2013,21.624350,13350000.0,2.374258e+10,11.617,0.870,1778.470621,0.489,0.033,51.675,3.958,702.337,0.049,0.247191,High,Low
11270,ZWE,Zimbabwe,2014,21.710483,13587000.0,2.474828e+10,11.973,0.881,1821.467867,0.484,0.034,3.065,0.356,714.309,0.048,0.250239,High,Low
11271,ZWE,Zimbabwe,2015,22.327625,13815000.0,2.503057e+10,12.170,0.881,1811.840028,0.486,0.035,1.653,0.198,726.480,0.048,0.253348,High,Low


In [16]:
# export to csv to make sure it all worked.  hooray!
#emissions.to_csv('emissions.csv', index=False)