#### Purpose: Find data that will support the claim, or help showcase correalations between 
#### Focus Areas: 
* Great Recession: 2007-2012
* Pandemic: COVID-19/Cornoavirus: explore years between 2010-2019, Records show the years between 2010-2019 marked a decline in CO2 Emissions, primarily after the Paris Agreement

In [1]:
# Import dependencies
import pandas as  pd

In [2]:
# Load OurWorld in Data --dataset
source='https://raw.githubusercontent.com/owid/co2-data/master/owid-co2-data.csv'
ow_data_df = pd.read_csv(source)
ow_data_df.head()

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,ghg_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
0,AFG,Afghanistan,1949,0.015,,,,,,0.002,...,,,,,,,,,7663783.0,
1,AFG,Afghanistan,1950,0.084,475.0,0.07,,,,0.011,...,,,,,,,,,7752000.0,19494800000.0
2,AFG,Afghanistan,1951,0.092,8.696,0.007,,,,0.012,...,,,,,,,,,7840000.0,20063850000.0
3,AFG,Afghanistan,1952,0.092,,,,,,0.012,...,,,,,,,,,7936000.0,20742350000.0
4,AFG,Afghanistan,1953,0.106,16.0,0.015,,,,0.013,...,,,,,,,,,8040000.0,22015460000.0


In [3]:
#check columns
ow_data_df.columns

Index(['iso_code', 'country', 'year', 'co2', 'co2_growth_prct',
       'co2_growth_abs', 'consumption_co2', 'trade_co2', 'trade_co2_share',
       'co2_per_capita', 'consumption_co2_per_capita', 'share_global_co2',
       'cumulative_co2', 'share_global_cumulative_co2', 'co2_per_gdp',
       'consumption_co2_per_gdp', 'co2_per_unit_energy', 'cement_co2',
       'coal_co2', 'flaring_co2', 'gas_co2', 'oil_co2', 'other_industry_co2',
       'cement_co2_per_capita', 'coal_co2_per_capita',
       'flaring_co2_per_capita', 'gas_co2_per_capita', 'oil_co2_per_capita',
       'other_co2_per_capita', 'share_global_coal_co2', 'share_global_oil_co2',
       'share_global_gas_co2', 'share_global_flaring_co2',
       'share_global_cement_co2', 'cumulative_coal_co2', 'cumulative_oil_co2',
       'cumulative_gas_co2', 'cumulative_flaring_co2', 'cumulative_cement_co2',
       'share_global_cumulative_coal_co2', 'share_global_cumulative_oil_co2',
       'share_global_cumulative_gas_co2',
       'share_g

In [4]:
# Check data types
# List dataframe data types
ow_data_df.dtypes

iso_code                                object
country                                 object
year                                     int64
co2                                    float64
co2_growth_prct                        float64
co2_growth_abs                         float64
consumption_co2                        float64
trade_co2                              float64
trade_co2_share                        float64
co2_per_capita                         float64
consumption_co2_per_capita             float64
share_global_co2                       float64
cumulative_co2                         float64
share_global_cumulative_co2            float64
co2_per_gdp                            float64
consumption_co2_per_gdp                float64
co2_per_unit_energy                    float64
cement_co2                             float64
coal_co2                               float64
flaring_co2                            float64
gas_co2                                float64
oil_co2      

In [5]:
# check for missing data?
# Find null values , isnull() method
for c in ow_data_df.columns:
    print(f"Column {c} has {ow_data_df[c].isnull().sum()} null values")

Column iso_code has 2778 null values
Column country has 0 null values
Column year has 0 null values
Column co2 has 538 null values
Column co2_growth_prct has 1798 null values
Column co2_growth_abs has 1691 null values
Column consumption_co2 has 20358 null values
Column trade_co2 has 20390 null values
Column trade_co2_share has 20390 null values
Column co2_per_capita has 1325 null values
Column consumption_co2_per_capita has 20358 null values
Column share_global_co2 has 605 null values
Column cumulative_co2 has 130 null values
Column share_global_cumulative_co2 has 130 null values
Column co2_per_gdp has 8790 null values
Column consumption_co2_per_gdp has 20620 null values
Column co2_per_unit_energy has 16965 null values
Column cement_co2 has 11526 null values
Column coal_co2 has 6717 null values
Column flaring_co2 has 19406 null values
Column gas_co2 has 15015 null values
Column oil_co2 has 3997 null values
Column other_industry_co2 has 22145 null values
Column cement_co2_per_capita has

In [6]:
# start "clean-up"
# copy original dataframe
emissions_df = ow_data_df.copy()
emissions_df.head()

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,ghg_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
0,AFG,Afghanistan,1949,0.015,,,,,,0.002,...,,,,,,,,,7663783.0,
1,AFG,Afghanistan,1950,0.084,475.0,0.07,,,,0.011,...,,,,,,,,,7752000.0,19494800000.0
2,AFG,Afghanistan,1951,0.092,8.696,0.007,,,,0.012,...,,,,,,,,,7840000.0,20063850000.0
3,AFG,Afghanistan,1952,0.092,,,,,,0.012,...,,,,,,,,,7936000.0,20742350000.0
4,AFG,Afghanistan,1953,0.106,16.0,0.015,,,,0.013,...,,,,,,,,,8040000.0,22015460000.0


In [7]:
# address missing values
emissions_df.fillna(0, inplace=True)

In [8]:
# add the iso_code and country to new dataframe
country_names_df = ow_data_df[['iso_code', 'country']]
country_names_df.head()

Unnamed: 0,iso_code,country
0,AFG,Afghanistan
1,AFG,Afghanistan
2,AFG,Afghanistan
3,AFG,Afghanistan
4,AFG,Afghanistan


In [9]:
# Covert population column to int
emissions_df['population'] = emissions_df['population'].astype(int)

In [10]:
emissions_df.head()

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,ghg_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
0,AFG,Afghanistan,1949,0.015,0.0,0.0,0.0,0.0,0.0,0.002,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7663783,0.0
1,AFG,Afghanistan,1950,0.084,475.0,0.07,0.0,0.0,0.0,0.011,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7752000,19494800000.0
2,AFG,Afghanistan,1951,0.092,8.696,0.007,0.0,0.0,0.0,0.012,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7840000,20063850000.0
3,AFG,Afghanistan,1952,0.092,0.0,0.0,0.0,0.0,0.0,0.012,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7936000,20742350000.0
4,AFG,Afghanistan,1953,0.106,16.0,0.015,0.0,0.0,0.0,0.013,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8040000,22015460000.0


In [11]:
# drop columns
emissions_df.drop(columns=['methane', 'total_ghg','nitrous_oxide_per_capita','nitrous_oxide','methane_per_capita','ghg_per_capita' ], inplace=True)

In [12]:
# check shape
emissions_df.shape

(23708, 49)

In [13]:
#check columns..again...
emissions_df.columns

Index(['iso_code', 'country', 'year', 'co2', 'co2_growth_prct',
       'co2_growth_abs', 'consumption_co2', 'trade_co2', 'trade_co2_share',
       'co2_per_capita', 'consumption_co2_per_capita', 'share_global_co2',
       'cumulative_co2', 'share_global_cumulative_co2', 'co2_per_gdp',
       'consumption_co2_per_gdp', 'co2_per_unit_energy', 'cement_co2',
       'coal_co2', 'flaring_co2', 'gas_co2', 'oil_co2', 'other_industry_co2',
       'cement_co2_per_capita', 'coal_co2_per_capita',
       'flaring_co2_per_capita', 'gas_co2_per_capita', 'oil_co2_per_capita',
       'other_co2_per_capita', 'share_global_coal_co2', 'share_global_oil_co2',
       'share_global_gas_co2', 'share_global_flaring_co2',
       'share_global_cement_co2', 'cumulative_coal_co2', 'cumulative_oil_co2',
       'cumulative_gas_co2', 'cumulative_flaring_co2', 'cumulative_cement_co2',
       'share_global_cumulative_coal_co2', 'share_global_cumulative_oil_co2',
       'share_global_cumulative_gas_co2',
       'share_g

#### Filter out data to two main focus areas

#### Great Recession: 2007-2012
#### Pre-COVID: Review 2010-2019, Last Decade, Paris Agreement 2015

#### Great Recession: 2007-2012

In [14]:
# Create a Great Recession Dataframe
great_recession_df = emissions_df.loc[(emissions_df['year']>= 2007) & (emissions_df['year'] <= 2012)]
great_recession_df

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,share_global_cumulative_coal_co2,share_global_cumulative_oil_co2,share_global_cumulative_gas_co2,share_global_cumulative_flaring_co2,share_global_cumulative_cement_co2,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
58,AFG,Afghanistan,2007,2.269,38.721,0.633,0.000,0.000,0.000,0.084,...,0.002,0.010,0.01,0.047,0.008,0.000,0.000,0.000,27101000,3.962899e+10
59,AFG,Afghanistan,2008,4.200,85.115,1.931,0.000,0.000,0.000,0.151,...,0.002,0.010,0.01,0.046,0.008,0.000,0.000,0.000,27722000,4.056158e+10
60,AFG,Afghanistan,2009,6.740,60.494,2.540,0.000,0.000,0.000,0.237,...,0.003,0.011,0.01,0.045,0.008,0.000,0.000,0.000,28395000,4.753980e+10
61,AFG,Afghanistan,2010,8.398,24.597,1.658,0.000,0.000,0.000,0.288,...,0.003,0.012,0.01,0.044,0.007,0.000,0.000,0.000,29186000,4.906888e+10
62,AFG,Afghanistan,2011,12.106,44.155,3.708,0.000,0.000,0.000,0.402,...,0.003,0.013,0.01,0.042,0.007,0.000,0.000,0.000,30117000,5.332634e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23696,ZWE,Zimbabwe,2008,7.719,-21.508,-2.115,9.293,1.574,20.389,0.623,...,0.087,0.024,0.00,0.000,0.055,95.216,7691.091,6.658,12380000,1.430100e+10
23697,ZWE,Zimbabwe,2009,5.476,-29.058,-2.243,8.066,2.590,47.294,0.437,...,0.086,0.024,0.00,0.000,0.054,96.386,7694.253,6.366,12527000,1.514130e+10
23698,ZWE,Zimbabwe,2010,7.864,43.615,2.388,9.350,1.485,18.887,0.619,...,0.085,0.024,0.00,0.000,0.053,99.165,7809.533,5.972,12698000,1.660410e+10
23699,ZWE,Zimbabwe,2011,9.480,20.550,1.616,10.608,1.128,11.894,0.735,...,0.084,0.024,0.00,0.000,0.052,106.857,8287.342,5.837,12894000,1.830726e+10


In [15]:
# drop columns

great_recession_df = great_recession_df.drop(columns=['co2_growth_abs','consumption_co2_per_gdp', 'co2_per_unit_energy', 'cement_co2',
       'coal_co2', 'flaring_co2', 'gas_co2', 'oil_co2', 'other_industry_co2',
       'cement_co2_per_capita', 'coal_co2_per_capita',
       'flaring_co2_per_capita', 'gas_co2_per_capita', 'oil_co2_per_capita',
       'other_co2_per_capita', 'share_global_coal_co2', 'share_global_oil_co2',
       'share_global_gas_co2', 'share_global_flaring_co2',
       'share_global_cement_co2', 'cumulative_coal_co2', 'cumulative_oil_co2',
       'cumulative_gas_co2', 'cumulative_flaring_co2', 'cumulative_cement_co2',
       'share_global_cumulative_coal_co2', 'share_global_cumulative_oil_co2',
       'share_global_cumulative_gas_co2',
       'share_global_cumulative_flaring_co2',
       'share_global_cumulative_cement_co2', 'primary_energy_consumption','energy_per_capita', 'energy_per_gdp','cumulative_co2','trade_co2','trade_co2_share','share_global_cumulative_co2'])

In [16]:
great_recession_df.columns

Index(['iso_code', 'country', 'year', 'co2', 'co2_growth_prct',
       'consumption_co2', 'co2_per_capita', 'consumption_co2_per_capita',
       'share_global_co2', 'co2_per_gdp', 'population', 'gdp'],
      dtype='object')

In [17]:
great_recession_df.shape

(1361, 12)

In [18]:
great_recession_df.dtypes

iso_code                       object
country                        object
year                            int64
co2                           float64
co2_growth_prct               float64
consumption_co2               float64
co2_per_capita                float64
consumption_co2_per_capita    float64
share_global_co2              float64
co2_per_gdp                   float64
population                      int32
gdp                           float64
dtype: object

In [19]:
# replace zeros in iso_code
# great_recession_df['iso_code'] = great_recession_df['iso_code'].replace(['0'],'notfound')
# great_recession_df.fillna(, inplace=True)

In [22]:
#re-order columns
great_recession_df = great_recession_df[['iso_code', 'country', 'population','year','co2','co2_per_capita','co2_per_gdp','co2_growth_prct',
       'consumption_co2',  'consumption_co2_per_capita',
       'share_global_co2']]

In [23]:
# export to csv file
file_path="resources/data/great_recession.csv"
great_recession_df.to_csv(file_path, index_label ='record_no', index=True)

In [20]:
# great_recession_df.loc[great_recession_df['iso_code']=="notfound"]

#### Pre-COVID: Review 2010-2019

In [25]:
# filter for the last ten years PRE-COVID
# pre_covid_df = emissions_df.loc[(emissions_df['year'] >= 2010) & (emissions_df['year'] <= 2019)]
pre_covid_df = emissions_df.loc[(emissions_df['year'] >= 2010)]

In [26]:
pre_covid_df.tail()

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,share_global_cumulative_coal_co2,share_global_cumulative_oil_co2,share_global_cumulative_gas_co2,share_global_cumulative_flaring_co2,share_global_cumulative_cement_co2,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
23703,ZWE,Zimbabwe,2015,12.17,1.653,0.198,13.308,1.138,9.35,0.881,...,0.081,0.025,0.0,0.0,0.049,0.0,0.0,0.0,13815000,25030570000.0
23704,ZWE,Zimbabwe,2016,10.815,-11.139,-1.356,12.171,1.356,12.542,0.771,...,0.08,0.025,0.0,0.0,0.049,0.0,0.0,0.0,14030000,25151760000.0
23705,ZWE,Zimbabwe,2017,10.247,-5.251,-0.568,11.774,1.527,14.902,0.72,...,0.08,0.025,0.0,0.0,0.048,0.0,0.0,0.0,14237000,0.0
23706,ZWE,Zimbabwe,2018,11.341,10.674,1.094,12.815,1.475,13.006,0.785,...,0.079,0.025,0.0,0.0,0.048,0.0,0.0,0.0,14439000,0.0
23707,ZWE,Zimbabwe,2019,10.374,-8.521,-0.966,0.0,0.0,0.0,0.708,...,0.078,0.025,0.0,0.0,0.048,0.0,0.0,0.0,14645000,0.0


In [27]:
# drop columns
pre_covid_df = pre_covid_df.drop(columns=['co2_growth_abs','consumption_co2_per_gdp', 'co2_per_unit_energy', 'cement_co2',
       'coal_co2', 'flaring_co2', 'gas_co2', 'oil_co2', 'other_industry_co2',
       'cement_co2_per_capita', 'coal_co2_per_capita',
       'flaring_co2_per_capita', 'gas_co2_per_capita', 'oil_co2_per_capita',
       'other_co2_per_capita', 'share_global_coal_co2', 'share_global_oil_co2',
       'share_global_gas_co2', 'share_global_flaring_co2',
       'share_global_cement_co2', 'cumulative_coal_co2', 'cumulative_oil_co2',
       'cumulative_gas_co2', 'cumulative_flaring_co2', 'cumulative_cement_co2',
       'share_global_cumulative_coal_co2', 'share_global_cumulative_oil_co2',
       'share_global_cumulative_gas_co2',
       'share_global_cumulative_flaring_co2',
       'share_global_cumulative_cement_co2', 'primary_energy_consumption','energy_per_capita', 'energy_per_gdp','cumulative_co2','trade_co2','trade_co2_share','share_global_cumulative_co2'])

In [28]:
# drop empty rows
# pre_covid_df=pre_covid_df.dropna()
# pre_covid_df.loc[pre_covid_df['year']== 2019]

In [29]:
# replace null cells, with zero
# pre_covid_df = pre_covid_df.replace(np.nan,0)

In [30]:
# re-order columns

pre_covid_df = pre_covid_df[['iso_code', 'country', 'population','year','co2','co2_per_capita','co2_per_gdp','co2_growth_prct',
       'consumption_co2',  'consumption_co2_per_capita',
       'share_global_co2']]
pre_covid_df.columns

Index(['iso_code', 'country', 'population', 'year', 'co2', 'co2_per_capita',
       'co2_per_gdp', 'co2_growth_prct', 'consumption_co2',
       'consumption_co2_per_capita', 'share_global_co2'],
      dtype='object')

In [31]:
pre_covid_df.tail()

Unnamed: 0,iso_code,country,population,year,co2,co2_per_capita,co2_per_gdp,co2_growth_prct,consumption_co2,consumption_co2_per_capita,share_global_co2
23703,ZWE,Zimbabwe,13815000,2015,12.17,0.881,0.486,1.653,13.308,0.963,0.035
23704,ZWE,Zimbabwe,14030000,2016,10.815,0.771,0.43,-11.139,12.171,0.868,0.031
23705,ZWE,Zimbabwe,14237000,2017,10.247,0.72,0.0,-5.251,11.774,0.827,0.029
23706,ZWE,Zimbabwe,14439000,2018,11.341,0.785,0.0,10.674,12.815,0.888,0.031
23707,ZWE,Zimbabwe,14645000,2019,10.374,0.708,0.0,-8.521,0.0,0.0,0.028


In [32]:
pre_covid_df.dtypes

iso_code                       object
country                        object
population                      int32
year                            int64
co2                           float64
co2_per_capita                float64
co2_per_gdp                   float64
co2_growth_prct               float64
consumption_co2               float64
consumption_co2_per_capita    float64
share_global_co2              float64
dtype: object

In [33]:
# write to dataframe cleaned data to csv
file_path="resources/data/pre_covid.csv"
pre_covid_df.to_csv(file_path, index_label='record_no', index=True)