#### Purpose: Find data that will support the claim, or help  substantiate correalations  between World Events and Carbon Events

#### Focus Areas: 
* Great Recession: 2007-2012
* Pandemic: COVID-19/Cornoavirus: explore years between 2010-2019, Records show the years between 2010-2019 
* Additional Notes: marked a decline in CO2 Emissions, primarily after the Paris Agreement


###### DataSource: Our World In Data, (ourworldindata.org)

GitHub:https://github.com/owid/co2-data

    *Guidebook: https://github.com/owid/co2-data/blob/master/owid-co2-codebook.csv
    

In [1]:
# Import dependencies
import pandas as  pd
import os

#### Time to explore...

In [2]:
# Load OurWorld in Data --dataset
source='https://raw.githubusercontent.com/owid/co2-data/master/owid-co2-data.csv'
ow_data_df = pd.read_csv(source)
ow_data_df.head()

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,ghg_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
0,AFG,Afghanistan,1949,0.015,,,,,,0.002,...,,,,,,,,,7663783.0,
1,AFG,Afghanistan,1950,0.084,475.0,0.07,,,,0.011,...,,,,,,,,,7752000.0,19494800000.0
2,AFG,Afghanistan,1951,0.092,8.696,0.007,,,,0.012,...,,,,,,,,,7840000.0,20063850000.0
3,AFG,Afghanistan,1952,0.092,,,,,,0.012,...,,,,,,,,,7936000.0,20742350000.0
4,AFG,Afghanistan,1953,0.106,16.0,0.015,,,,0.013,...,,,,,,,,,8040000.0,22015460000.0


In [3]:
# check for shape of dataframe, original dataset
ow_data_df.shape

(23708, 55)

In [4]:
#check for data 2020-2021
ow_data_df.loc[(ow_data_df['year']== 2020 & 2021)]

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,ghg_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp


In [5]:
#check columns
ow_data_df.columns

Index(['iso_code', 'country', 'year', 'co2', 'co2_growth_prct',
       'co2_growth_abs', 'consumption_co2', 'trade_co2', 'trade_co2_share',
       'co2_per_capita', 'consumption_co2_per_capita', 'share_global_co2',
       'cumulative_co2', 'share_global_cumulative_co2', 'co2_per_gdp',
       'consumption_co2_per_gdp', 'co2_per_unit_energy', 'cement_co2',
       'coal_co2', 'flaring_co2', 'gas_co2', 'oil_co2', 'other_industry_co2',
       'cement_co2_per_capita', 'coal_co2_per_capita',
       'flaring_co2_per_capita', 'gas_co2_per_capita', 'oil_co2_per_capita',
       'other_co2_per_capita', 'share_global_coal_co2', 'share_global_oil_co2',
       'share_global_gas_co2', 'share_global_flaring_co2',
       'share_global_cement_co2', 'cumulative_coal_co2', 'cumulative_oil_co2',
       'cumulative_gas_co2', 'cumulative_flaring_co2', 'cumulative_cement_co2',
       'share_global_cumulative_coal_co2', 'share_global_cumulative_oil_co2',
       'share_global_cumulative_gas_co2',
       'share_g

In [6]:
# Check data types
# List dataframe data types
ow_data_df.dtypes

iso_code                                object
country                                 object
year                                     int64
co2                                    float64
co2_growth_prct                        float64
co2_growth_abs                         float64
consumption_co2                        float64
trade_co2                              float64
trade_co2_share                        float64
co2_per_capita                         float64
consumption_co2_per_capita             float64
share_global_co2                       float64
cumulative_co2                         float64
share_global_cumulative_co2            float64
co2_per_gdp                            float64
consumption_co2_per_gdp                float64
co2_per_unit_energy                    float64
cement_co2                             float64
coal_co2                               float64
flaring_co2                            float64
gas_co2                                float64
oil_co2      

In [7]:
# check for missing data?
# Find null values , isnull() method
for c in ow_data_df.columns:
    print(f"Column {c} has {ow_data_df[c].isnull().sum()} null values")

Column iso_code has 2778 null values
Column country has 0 null values
Column year has 0 null values
Column co2 has 538 null values
Column co2_growth_prct has 1798 null values
Column co2_growth_abs has 1691 null values
Column consumption_co2 has 20358 null values
Column trade_co2 has 20390 null values
Column trade_co2_share has 20390 null values
Column co2_per_capita has 1325 null values
Column consumption_co2_per_capita has 20358 null values
Column share_global_co2 has 605 null values
Column cumulative_co2 has 130 null values
Column share_global_cumulative_co2 has 130 null values
Column co2_per_gdp has 8790 null values
Column consumption_co2_per_gdp has 20620 null values
Column co2_per_unit_energy has 16965 null values
Column cement_co2 has 11526 null values
Column coal_co2 has 6717 null values
Column flaring_co2 has 19406 null values
Column gas_co2 has 15015 null values
Column oil_co2 has 3997 null values
Column other_industry_co2 has 22145 null values
Column cement_co2_per_capita has

In [8]:
# copy original dataframe
emissions_df = ow_data_df.copy()
emissions_df.head()

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,ghg_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
0,AFG,Afghanistan,1949,0.015,,,,,,0.002,...,,,,,,,,,7663783.0,
1,AFG,Afghanistan,1950,0.084,475.0,0.07,,,,0.011,...,,,,,,,,,7752000.0,19494800000.0
2,AFG,Afghanistan,1951,0.092,8.696,0.007,,,,0.012,...,,,,,,,,,7840000.0,20063850000.0
3,AFG,Afghanistan,1952,0.092,,,,,,0.012,...,,,,,,,,,7936000.0,20742350000.0
4,AFG,Afghanistan,1953,0.106,16.0,0.015,,,,0.013,...,,,,,,,,,8040000.0,22015460000.0


In [9]:
# check columns..again...
emissions_df.columns

Index(['iso_code', 'country', 'year', 'co2', 'co2_growth_prct',
       'co2_growth_abs', 'consumption_co2', 'trade_co2', 'trade_co2_share',
       'co2_per_capita', 'consumption_co2_per_capita', 'share_global_co2',
       'cumulative_co2', 'share_global_cumulative_co2', 'co2_per_gdp',
       'consumption_co2_per_gdp', 'co2_per_unit_energy', 'cement_co2',
       'coal_co2', 'flaring_co2', 'gas_co2', 'oil_co2', 'other_industry_co2',
       'cement_co2_per_capita', 'coal_co2_per_capita',
       'flaring_co2_per_capita', 'gas_co2_per_capita', 'oil_co2_per_capita',
       'other_co2_per_capita', 'share_global_coal_co2', 'share_global_oil_co2',
       'share_global_gas_co2', 'share_global_flaring_co2',
       'share_global_cement_co2', 'cumulative_coal_co2', 'cumulative_oil_co2',
       'cumulative_gas_co2', 'cumulative_flaring_co2', 'cumulative_cement_co2',
       'share_global_cumulative_coal_co2', 'share_global_cumulative_oil_co2',
       'share_global_cumulative_gas_co2',
       'share_g

In [10]:
# emissions_df.loc[(emissions_df['year'] >= 2019)]

In [11]:
# add the iso_code and country to new dataframe
# country_names_df = ow_data_df[['iso_code', 'country']]
# country_names_df.head()

In [12]:
# drop columns
# (columns=['co2_per_unit_energy','share_global_coal_co2', 'share_global_oil_co2',
#        'share_global_gas_co2', 'share_global_flaring_co2',
#        'share_global_cement_co2', 'cumulative_coal_co2', 'cumulative_oil_co2',
#        'cumulative_gas_co2', 'cumulative_flaring_co2', 'cumulative_cement_co2',
#        'share_global_cumulative_coal_co2', 'share_global_cumulative_oil_co2',
#        'share_global_cumulative_gas_co2',
#        'share_global_cumulative_flaring_co2',
#        'share_global_cumulative_cement_co2', 'primary_energy_consumption',
#        'energy_per_capita', 'energy_per_gdp','share_global_cumulative_co2','total_ghg', 'ghg_per_capita', 'methane',
#        'methane_per_capita', 'nitrous_oxide', 'nitrous_oxide_per_capita'], inplace=True)

In [13]:
#create new dataframe with dropped columns
new_emissions_df = emissions_df.drop(columns=['co2_per_unit_energy','share_global_coal_co2', 'share_global_oil_co2',
       'share_global_gas_co2', 'share_global_flaring_co2',
       'share_global_cement_co2', 'cumulative_coal_co2', 'cumulative_oil_co2',
       'cumulative_gas_co2', 'cumulative_flaring_co2', 'cumulative_cement_co2',
       'share_global_cumulative_coal_co2', 'share_global_cumulative_oil_co2',
       'share_global_cumulative_gas_co2',
       'share_global_cumulative_flaring_co2',
       'share_global_cumulative_cement_co2', 'primary_energy_consumption',
       'energy_per_capita', 'energy_per_gdp','share_global_cumulative_co2','total_ghg', 'ghg_per_capita', 'methane',
       'methane_per_capita', 'nitrous_oxide', 'nitrous_oxide_per_capita'])

In [14]:
# check dataframe
new_emissions_df.head()

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,oil_co2,other_industry_co2,cement_co2_per_capita,coal_co2_per_capita,flaring_co2_per_capita,gas_co2_per_capita,oil_co2_per_capita,other_co2_per_capita,population,gdp
0,AFG,Afghanistan,1949,0.015,,,,,,0.002,...,,,,0.002,,,,,7663783.0,
1,AFG,Afghanistan,1950,0.084,475.0,0.07,,,,0.011,...,0.063,,,0.003,,,0.008,,7752000.0,19494800000.0
2,AFG,Afghanistan,1951,0.092,8.696,0.007,,,,0.012,...,0.066,,,0.003,,,0.008,,7840000.0,20063850000.0
3,AFG,Afghanistan,1952,0.092,,,,,,0.012,...,0.06,,,0.004,,,0.008,,7936000.0,20742350000.0
4,AFG,Afghanistan,1953,0.106,16.0,0.015,,,,0.013,...,0.068,,,0.005,,,0.008,,8040000.0,22015460000.0


In [15]:
new_emissions_df.columns

Index(['iso_code', 'country', 'year', 'co2', 'co2_growth_prct',
       'co2_growth_abs', 'consumption_co2', 'trade_co2', 'trade_co2_share',
       'co2_per_capita', 'consumption_co2_per_capita', 'share_global_co2',
       'cumulative_co2', 'co2_per_gdp', 'consumption_co2_per_gdp',
       'cement_co2', 'coal_co2', 'flaring_co2', 'gas_co2', 'oil_co2',
       'other_industry_co2', 'cement_co2_per_capita', 'coal_co2_per_capita',
       'flaring_co2_per_capita', 'gas_co2_per_capita', 'oil_co2_per_capita',
       'other_co2_per_capita', 'population', 'gdp'],
      dtype='object')

In [16]:
# address missing values
new_emissions_df.fillna(value=0, inplace=True)

In [17]:
#reorder columns
new_emissions_df=new_emissions_df[['iso_code', 'country', 'year', 'population','gdp', 'co2', 'co2_growth_prct',
       'co2_growth_abs', 'consumption_co2', 'trade_co2', 'trade_co2_share',
       'co2_per_capita', 'consumption_co2_per_capita', 'share_global_co2',
       'cumulative_co2', 'co2_per_gdp', 'consumption_co2_per_gdp',
       'cement_co2', 'coal_co2', 'flaring_co2', 'gas_co2', 'oil_co2',
       'other_industry_co2', 'cement_co2_per_capita', 'coal_co2_per_capita',
       'flaring_co2_per_capita', 'gas_co2_per_capita', 'oil_co2_per_capita']]

In [18]:
new_emissions_df.tail()

Unnamed: 0,iso_code,country,year,population,gdp,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,...,coal_co2,flaring_co2,gas_co2,oil_co2,other_industry_co2,cement_co2_per_capita,coal_co2_per_capita,flaring_co2_per_capita,gas_co2_per_capita,oil_co2_per_capita
23703,ZWE,Zimbabwe,2015,13815000.0,25030570000.0,12.17,1.653,0.198,13.308,1.138,...,8.035,0.0,0.0,3.631,0.0,0.037,0.582,0.0,0.0,0.263
23704,ZWE,Zimbabwe,2016,14030000.0,25151760000.0,10.815,-11.139,-1.356,12.171,1.356,...,7.061,0.0,0.0,3.173,0.0,0.041,0.503,0.0,0.0,0.226
23705,ZWE,Zimbabwe,2017,14237000.0,0.0,10.247,-5.251,-0.568,11.774,1.527,...,6.383,0.0,0.0,3.283,0.0,0.041,0.448,0.0,0.0,0.231
23706,ZWE,Zimbabwe,2018,14439000.0,0.0,11.341,10.674,1.094,12.815,1.475,...,7.369,0.0,0.0,3.39,0.0,0.04,0.51,0.0,0.0,0.235
23707,ZWE,Zimbabwe,2019,14645000.0,0.0,10.374,-8.521,-0.966,0.0,0.0,...,6.304,0.0,0.0,3.489,0.0,0.04,0.43,0.0,0.0,0.238


#### Events Essentially Review (of last 20 years CO2 Emissions to cover both time periods),  2000-2019

In [19]:
new_emissions_df.shape

(23708, 28)

In [20]:
# filter between 2000-2019, or 2010 -2019-->create new dataframe
# new_emissions_df.loc[(new_emissions_df['year'] >= 2010) & (emissions_df['year'] <= 2020)]
new_emissions_df.loc[new_emissions_df['year'] >=2000]

Unnamed: 0,iso_code,country,year,population,gdp,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,...,coal_co2,flaring_co2,gas_co2,oil_co2,other_industry_co2,cement_co2_per_capita,coal_co2_per_capita,flaring_co2_per_capita,gas_co2_per_capita,oil_co2_per_capita
51,AFG,Afghanistan,2000,20780000.0,1.650883e+10,0.758,-6.404,-0.052,0.000,0.000,...,0.004,0.022,0.224,0.498,0.0,0.000,0.000,0.001,0.011,0.024
52,AFG,Afghanistan,2001,21607000.0,1.557484e+10,0.798,5.318,0.040,0.000,0.000,...,0.070,0.022,0.209,0.491,0.0,0.000,0.003,0.001,0.010,0.023
53,AFG,Afghanistan,2002,22601000.0,2.567680e+10,1.052,31.787,0.254,0.000,0.000,...,0.055,0.000,0.546,0.440,0.0,0.000,0.002,0.000,0.024,0.019
54,AFG,Afghanistan,2003,23681000.0,2.780556e+10,1.186,12.775,0.134,0.000,0.000,...,0.092,0.000,0.465,0.619,0.0,0.000,0.004,0.000,0.020,0.026
55,AFG,Afghanistan,2004,24727000.0,2.844769e+10,0.889,-25.025,-0.297,0.000,0.000,...,0.092,0.000,0.227,0.561,0.0,0.000,0.004,0.000,0.009,0.023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23703,ZWE,Zimbabwe,2015,13815000.0,2.503057e+10,12.170,1.653,0.198,13.308,1.138,...,8.035,0.000,0.000,3.631,0.0,0.037,0.582,0.000,0.000,0.263
23704,ZWE,Zimbabwe,2016,14030000.0,2.515176e+10,10.815,-11.139,-1.356,12.171,1.356,...,7.061,0.000,0.000,3.173,0.0,0.041,0.503,0.000,0.000,0.226
23705,ZWE,Zimbabwe,2017,14237000.0,0.000000e+00,10.247,-5.251,-0.568,11.774,1.527,...,6.383,0.000,0.000,3.283,0.0,0.041,0.448,0.000,0.000,0.231
23706,ZWE,Zimbabwe,2018,14439000.0,0.000000e+00,11.341,10.674,1.094,12.815,1.475,...,7.369,0.000,0.000,3.390,0.0,0.040,0.510,0.000,0.000,0.235


In [21]:
#new dataframe
decade_ems_df =new_emissions_df.loc[new_emissions_df['year'] >=2000]
decade_ems_df

Unnamed: 0,iso_code,country,year,population,gdp,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,...,coal_co2,flaring_co2,gas_co2,oil_co2,other_industry_co2,cement_co2_per_capita,coal_co2_per_capita,flaring_co2_per_capita,gas_co2_per_capita,oil_co2_per_capita
51,AFG,Afghanistan,2000,20780000.0,1.650883e+10,0.758,-6.404,-0.052,0.000,0.000,...,0.004,0.022,0.224,0.498,0.0,0.000,0.000,0.001,0.011,0.024
52,AFG,Afghanistan,2001,21607000.0,1.557484e+10,0.798,5.318,0.040,0.000,0.000,...,0.070,0.022,0.209,0.491,0.0,0.000,0.003,0.001,0.010,0.023
53,AFG,Afghanistan,2002,22601000.0,2.567680e+10,1.052,31.787,0.254,0.000,0.000,...,0.055,0.000,0.546,0.440,0.0,0.000,0.002,0.000,0.024,0.019
54,AFG,Afghanistan,2003,23681000.0,2.780556e+10,1.186,12.775,0.134,0.000,0.000,...,0.092,0.000,0.465,0.619,0.0,0.000,0.004,0.000,0.020,0.026
55,AFG,Afghanistan,2004,24727000.0,2.844769e+10,0.889,-25.025,-0.297,0.000,0.000,...,0.092,0.000,0.227,0.561,0.0,0.000,0.004,0.000,0.009,0.023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23703,ZWE,Zimbabwe,2015,13815000.0,2.503057e+10,12.170,1.653,0.198,13.308,1.138,...,8.035,0.000,0.000,3.631,0.0,0.037,0.582,0.000,0.000,0.263
23704,ZWE,Zimbabwe,2016,14030000.0,2.515176e+10,10.815,-11.139,-1.356,12.171,1.356,...,7.061,0.000,0.000,3.173,0.0,0.041,0.503,0.000,0.000,0.226
23705,ZWE,Zimbabwe,2017,14237000.0,0.000000e+00,10.247,-5.251,-0.568,11.774,1.527,...,6.383,0.000,0.000,3.283,0.0,0.041,0.448,0.000,0.000,0.231
23706,ZWE,Zimbabwe,2018,14439000.0,0.000000e+00,11.341,10.674,1.094,12.815,1.475,...,7.369,0.000,0.000,3.390,0.0,0.040,0.510,0.000,0.000,0.235


In [22]:
# find the unique values of the dataframe, column
# decade_ems_df.country.unquie()
decade_ems_df['country'].nunique()

227

In [23]:
decade_ems_df['country'].unique()

array(['Afghanistan', 'Africa', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Asia', 'Asia (excl. China & India)', 'Australia', 'Austria',
       'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados',
       'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan',
       'Bolivia', 'Bonaire Sint Eustatius and Saba',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso',
       'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo', 'Cook Islands', 'Costa Rica', "Cote d'Ivoire",
       'Croatia', 'Cuba', 'Cyprus', 'Czechia',
       'Democratic Republic of Congo', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'EU-27', 'EU-28', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia'

Regions found:
Asia, Europe, Oceania, Australia, Africa, North America and South America

Regions Missing: Central America, (The) Carribean

    * Create "missing" regions, filter countries by regions
    * House in dataframe
    * Append new regions to "end" of 'decade_ems' dataframe

### Create  Geo-Regions for: The Caribbeans, Central America
* Create new regions from great_recession_index.csv

    * Central America: 'Belize', 'Costa Rica', 'El Salvador', 'Guatemala', 'Honduras', 'Nicaragua', 'Panama'
* Caribbean:

     'British Virgin Islands', 'Anguilla', 'Antigua and Barbuda', 'Aruba', 'Bahamas', 'Barbados', 'Bermuda', 'British Virgin Islands', 'Cayman Islands', 'Cuba', 'Dominica', 'Dominican Republic', 'Grenada', 'Guadeloupe', 'Haiti', 'Jamaica', 'Martinique', 'Montserrat', 'Netherlands Antilles', 'Puerto Rico', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Vincent and the 'Grenadines', 'Trinidad and Tobago', 'Turks and Caicos Islands', 'U.S. Virgin Islands'

#### (The) Caribbean

In [24]:
#create a dataframe of all carribean countries:
# filter for Carribean Countries
carb=['British Virgin Islands', 'Anguilla', 'Antigua and Barbuda', 'Aruba',  'Bahamas', 'Barbados', 'Bermuda', 'British Virgin Islands', 'Cayman Islands', 'Cuba', 'Dominica', 'Dominican Republic', 'Grenada', 'Guadeloupe', 'Haiti', 'Jamaica', 'Martinique', 'Montserrat', 'Netherlands Antilles', 'Puerto Rico', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Vincent and the Grenadines', 'Trinidad and Tobago', 'Turks and Caicos Islands', 'U.S. Virgin Islands']
caribbean_df=decade_ems_df[decade_ems_df.country.isin(carb)]

In [25]:
caribbean_df.columns

Index(['iso_code', 'country', 'year', 'population', 'gdp', 'co2',
       'co2_growth_prct', 'co2_growth_abs', 'consumption_co2', 'trade_co2',
       'trade_co2_share', 'co2_per_capita', 'consumption_co2_per_capita',
       'share_global_co2', 'cumulative_co2', 'co2_per_gdp',
       'consumption_co2_per_gdp', 'cement_co2', 'coal_co2', 'flaring_co2',
       'gas_co2', 'oil_co2', 'other_industry_co2', 'cement_co2_per_capita',
       'coal_co2_per_capita', 'flaring_co2_per_capita', 'gas_co2_per_capita',
       'oil_co2_per_capita'],
      dtype='object')

In [26]:
caribbean_df.shape

(380, 28)

In [27]:
caribbean_df.head()

Unnamed: 0,iso_code,country,year,population,gdp,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,...,coal_co2,flaring_co2,gas_co2,oil_co2,other_industry_co2,cement_co2_per_capita,coal_co2_per_capita,flaring_co2_per_capita,gas_co2_per_capita,oil_co2_per_capita
508,AIA,Anguilla,2000,11000.0,0.0,0.088,9.091,0.007,0.0,0.0,...,0.0,0.0,0.0,0.088,0.0,0.0,0.0,0.0,0.0,7.994
509,AIA,Anguilla,2001,12000.0,0.0,0.095,8.333,0.007,0.0,0.0,...,0.0,0.0,0.0,0.095,0.0,0.0,0.0,0.0,0.0,7.939
510,AIA,Anguilla,2002,12000.0,0.0,0.095,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.095,0.0,0.0,0.0,0.0,0.0,7.939
511,AIA,Anguilla,2003,12000.0,0.0,0.103,7.692,0.007,0.0,0.0,...,0.0,0.0,0.0,0.103,0.0,0.0,0.0,0.0,0.0,8.549
512,AIA,Anguilla,2004,12000.0,0.0,0.121,17.857,0.018,0.0,0.0,...,0.0,0.0,0.0,0.121,0.0,0.0,0.0,0.0,0.0,10.076


In [28]:
# add the sum of c02 and co2_per_capita, groupby year, create new dataframe
car_df=caribbean_df.groupby('year').sum()[['population', 'gdp', 'co2',
       'co2_growth_prct', 'co2_growth_abs', 'consumption_co2', 'trade_co2',
       'trade_co2_share', 'co2_per_capita', 'consumption_co2_per_capita',
       'share_global_co2', 'cumulative_co2', 'co2_per_gdp',
       'consumption_co2_per_gdp', 'cement_co2', 'coal_co2', 'flaring_co2',
       'gas_co2', 'oil_co2', 'other_industry_co2', 'cement_co2_per_capita',
       'coal_co2_per_capita', 'flaring_co2_per_capita', 'gas_co2_per_capita',
       'oil_co2_per_capita']]
car_df

Unnamed: 0_level_0,population,gdp,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,consumption_co2_per_capita,...,coal_co2,flaring_co2,gas_co2,oil_co2,other_industry_co2,cement_co2_per_capita,coal_co2_per_capita,flaring_co2_per_capita,gas_co2_per_capita,oil_co2_per_capita
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000,33323000.0,171378000000.0,88.943,250.91,5.686,43.775,-10.324,-54.541,110.13,15.614,...,0.525,0.0,20.742,65.188,0.0,1.031,0.121,0.0,15.706,93.274
2001,33674000.0,177302300000.0,91.242,79.519,2.302,49.03,-7.711,-25.37,111.688,18.576,...,0.748,0.0,23.72,64.282,0.0,0.982,0.133,0.0,17.936,92.635
2002,34015000.0,180198900000.0,95.824,136.627,4.579,48.8,-11.455,-51.079,119.369,19.672,...,0.927,0.0,25.816,66.386,0.0,1.1,0.176,0.0,19.523,98.57
2003,34354000.0,184236800000.0,99.777,101.644,3.956,51.329,-13.211,-42.431,122.945,21.72,...,3.136,0.004,29.315,64.644,0.0,1.121,0.433,0.013,21.666,99.711
2004,34682000.0,188937300000.0,96.196,71.106,-3.579,47.829,-13.162,-35.62,125.64,20.659,...,2.348,0.004,31.309,59.949,0.0,1.102,0.327,0.013,23.269,100.932
2005,35001000.0,204523100000.0,102.562,51.136,6.365,51.508,-14.755,-16.788,131.105,21.071,...,1.502,0.004,35.57,62.862,0.0,1.073,0.225,0.013,26.244,103.549
2006,35308000.0,225964500000.0,110.025,94.238,7.466,49.167,-23.911,-31.955,136.584,17.925,...,1.975,0.004,40.762,64.114,0.0,1.179,0.254,0.013,29.586,105.552
2007,35602000.0,241426300000.0,112.905,110.614,2.883,57.976,-17.959,-2.24,143.249,22.77,...,2.103,0.0,43.287,64.251,0.0,1.165,0.259,0.0,30.957,110.869
2008,35891000.0,251481000000.0,114.638,39.286,1.728,59.447,-15.696,1.376,142.678,22.934,...,2.334,0.004,41.887,67.122,0.0,1.157,0.288,0.013,29.865,111.356
2009,36187000.0,248240500000.0,110.816,-10.926,-3.822,61.695,-10.272,-3.705,137.193,27.905,...,2.291,0.004,42.331,63.067,0.0,1.028,0.281,0.013,30.007,105.862


In [29]:
#convert index to column
car_df.reset_index()

Unnamed: 0,year,population,gdp,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,coal_co2,flaring_co2,gas_co2,oil_co2,other_industry_co2,cement_co2_per_capita,coal_co2_per_capita,flaring_co2_per_capita,gas_co2_per_capita,oil_co2_per_capita
0,2000,33323000.0,171378000000.0,88.943,250.91,5.686,43.775,-10.324,-54.541,110.13,...,0.525,0.0,20.742,65.188,0.0,1.031,0.121,0.0,15.706,93.274
1,2001,33674000.0,177302300000.0,91.242,79.519,2.302,49.03,-7.711,-25.37,111.688,...,0.748,0.0,23.72,64.282,0.0,0.982,0.133,0.0,17.936,92.635
2,2002,34015000.0,180198900000.0,95.824,136.627,4.579,48.8,-11.455,-51.079,119.369,...,0.927,0.0,25.816,66.386,0.0,1.1,0.176,0.0,19.523,98.57
3,2003,34354000.0,184236800000.0,99.777,101.644,3.956,51.329,-13.211,-42.431,122.945,...,3.136,0.004,29.315,64.644,0.0,1.121,0.433,0.013,21.666,99.711
4,2004,34682000.0,188937300000.0,96.196,71.106,-3.579,47.829,-13.162,-35.62,125.64,...,2.348,0.004,31.309,59.949,0.0,1.102,0.327,0.013,23.269,100.932
5,2005,35001000.0,204523100000.0,102.562,51.136,6.365,51.508,-14.755,-16.788,131.105,...,1.502,0.004,35.57,62.862,0.0,1.073,0.225,0.013,26.244,103.549
6,2006,35308000.0,225964500000.0,110.025,94.238,7.466,49.167,-23.911,-31.955,136.584,...,1.975,0.004,40.762,64.114,0.0,1.179,0.254,0.013,29.586,105.552
7,2007,35602000.0,241426300000.0,112.905,110.614,2.883,57.976,-17.959,-2.24,143.249,...,2.103,0.0,43.287,64.251,0.0,1.165,0.259,0.0,30.957,110.869
8,2008,35891000.0,251481000000.0,114.638,39.286,1.728,59.447,-15.696,1.376,142.678,...,2.334,0.004,41.887,67.122,0.0,1.157,0.288,0.013,29.865,111.356
9,2009,36187000.0,248240500000.0,110.816,-10.926,-3.822,61.695,-10.272,-3.705,137.193,...,2.291,0.004,42.331,63.067,0.0,1.028,0.281,0.013,30.007,105.862


In [30]:
#add new columns
#add country/region to Carribean
#pd.Dataframe[new_column] = value 
car_df['country'] = "Caribbean"
car_df['iso_code']=" "
#add year
car_df['year']=['2000','2001','2002','2003', '2004','2005', '2006','2007','2008','2009','2010','2011','2012','2013','2014', '2015','2016','2017','2018', '2019']

In [31]:
#check columns
car_df.columns

Index(['population', 'gdp', 'co2', 'co2_growth_prct', 'co2_growth_abs',
       'consumption_co2', 'trade_co2', 'trade_co2_share', 'co2_per_capita',
       'consumption_co2_per_capita', 'share_global_co2', 'cumulative_co2',
       'co2_per_gdp', 'consumption_co2_per_gdp', 'cement_co2', 'coal_co2',
       'flaring_co2', 'gas_co2', 'oil_co2', 'other_industry_co2',
       'cement_co2_per_capita', 'coal_co2_per_capita',
       'flaring_co2_per_capita', 'gas_co2_per_capita', 'oil_co2_per_capita',
       'country', 'iso_code', 'year'],
      dtype='object')

In [32]:
car_df.shape

(20, 28)

In [33]:
#reorder columns
car_df=car_df[['iso_code', 'country', 'year', 'population', 'gdp', 'co2',
       'co2_growth_prct', 'co2_growth_abs', 'consumption_co2', 'trade_co2',
       'trade_co2_share', 'co2_per_capita', 'consumption_co2_per_capita',
       'share_global_co2', 'cumulative_co2', 'co2_per_gdp',
       'consumption_co2_per_gdp', 'cement_co2', 'coal_co2', 'flaring_co2',
       'gas_co2', 'oil_co2', 'other_industry_co2', 'cement_co2_per_capita',
       'coal_co2_per_capita', 'flaring_co2_per_capita', 'gas_co2_per_capita',
       'oil_co2_per_capita']]

In [34]:
car_df.head()

Unnamed: 0_level_0,iso_code,country,year,population,gdp,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,...,coal_co2,flaring_co2,gas_co2,oil_co2,other_industry_co2,cement_co2_per_capita,coal_co2_per_capita,flaring_co2_per_capita,gas_co2_per_capita,oil_co2_per_capita
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000,,Caribbean,2000,33323000.0,171378000000.0,88.943,250.91,5.686,43.775,-10.324,...,0.525,0.0,20.742,65.188,0.0,1.031,0.121,0.0,15.706,93.274
2001,,Caribbean,2001,33674000.0,177302300000.0,91.242,79.519,2.302,49.03,-7.711,...,0.748,0.0,23.72,64.282,0.0,0.982,0.133,0.0,17.936,92.635
2002,,Caribbean,2002,34015000.0,180198900000.0,95.824,136.627,4.579,48.8,-11.455,...,0.927,0.0,25.816,66.386,0.0,1.1,0.176,0.0,19.523,98.57
2003,,Caribbean,2003,34354000.0,184236800000.0,99.777,101.644,3.956,51.329,-13.211,...,3.136,0.004,29.315,64.644,0.0,1.121,0.433,0.013,21.666,99.711
2004,,Caribbean,2004,34682000.0,188937300000.0,96.196,71.106,-3.579,47.829,-13.162,...,2.348,0.004,31.309,59.949,0.0,1.102,0.327,0.013,23.269,100.932


In [35]:
# # drop index
car_df.reset_index(drop=True, inplace=True)

In [36]:
#check_dataframe
car_df.shape

(20, 28)

#### Central America

In [37]:
#create a dataframe for countries in Central America
#filter for Central American Countries
ca=['Belize', 'Costa Rica', 'El Salvador', 'Guatemala', 'Honduras', 'Nicaragua', 'Panama']
central_america_df=decade_ems_df[decade_ems_df.country.isin(ca)]

In [38]:
central_america_df.columns

Index(['iso_code', 'country', 'year', 'population', 'gdp', 'co2',
       'co2_growth_prct', 'co2_growth_abs', 'consumption_co2', 'trade_co2',
       'trade_co2_share', 'co2_per_capita', 'consumption_co2_per_capita',
       'share_global_co2', 'cumulative_co2', 'co2_per_gdp',
       'consumption_co2_per_gdp', 'cement_co2', 'coal_co2', 'flaring_co2',
       'gas_co2', 'oil_co2', 'other_industry_co2', 'cement_co2_per_capita',
       'coal_co2_per_capita', 'flaring_co2_per_capita', 'gas_co2_per_capita',
       'oil_co2_per_capita'],
      dtype='object')

In [39]:
#check dataframe
central_america_df.shape

(140, 28)

In [40]:
central_america_df.head()

Unnamed: 0,iso_code,country,year,population,gdp,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,...,coal_co2,flaring_co2,gas_co2,oil_co2,other_industry_co2,cement_co2_per_capita,coal_co2_per_capita,flaring_co2_per_capita,gas_co2_per_capita,oil_co2_per_capita
2694,BLZ,Belize,2000,247000.0,0.0,0.396,13.684,0.048,0.0,0.0,...,0.0,0.0,0.0,0.396,0.0,0.0,0.0,0.0,0.0,1.602
2695,BLZ,Belize,2001,255000.0,0.0,0.447,12.963,0.051,0.0,0.0,...,0.0,0.0,0.0,0.447,0.0,0.0,0.0,0.0,0.0,1.753
2696,BLZ,Belize,2002,262000.0,0.0,0.432,-3.279,-0.015,0.0,0.0,...,0.0,0.0,0.0,0.432,0.0,0.0,0.0,0.0,0.0,1.65
2697,BLZ,Belize,2003,269000.0,0.0,0.432,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.432,0.0,0.0,0.0,0.0,0.0,1.607
2698,BLZ,Belize,2004,277000.0,0.0,0.392,-9.322,-0.04,0.0,0.0,...,0.0,0.0,0.0,0.392,0.0,0.0,0.0,0.0,0.0,1.415


In [41]:
# add the sum of c02 and co2_per_capita, groupby year, create new dataframe
cen_df=central_america_df.groupby('year').sum()[['population', 'gdp', 'co2',
       'co2_growth_prct', 'co2_growth_abs', 'consumption_co2', 'trade_co2',
       'trade_co2_share', 'co2_per_capita', 'consumption_co2_per_capita',
       'share_global_co2', 'cumulative_co2', 'co2_per_gdp',
       'consumption_co2_per_gdp', 'cement_co2', 'coal_co2', 'flaring_co2',
       'gas_co2', 'oil_co2', 'other_industry_co2', 'cement_co2_per_capita',
       'coal_co2_per_capita', 'flaring_co2_per_capita', 'gas_co2_per_capita',
       'oil_co2_per_capita']]
cen_df

Unnamed: 0_level_0,population,gdp,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,consumption_co2_per_capita,...,coal_co2,flaring_co2,gas_co2,oil_co2,other_industry_co2,cement_co2_per_capita,coal_co2_per_capita,flaring_co2_per_capita,gas_co2_per_capita,oil_co2_per_capita
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000,36422000.0,181905700000.0,35.655,37.724,1.609,48.561,13.303,231.118,8.146,8.856,...,1.092,0.0,0.0,31.646,0.0,0.523,0.157,0.0,0.0,7.467
2001,37128000.0,188005000000.0,38.734,66.438,3.078,51.931,13.646,222.036,8.854,9.263,...,1.147,0.0,0.0,34.611,0.0,0.519,0.178,0.0,0.0,8.159
2002,37831000.0,195669400000.0,39.188,5.302,0.453,60.697,21.942,332.548,8.533,10.8,...,1.701,0.0,0.0,34.528,0.0,0.503,0.222,0.0,0.0,7.808
2003,38534000.0,206326100000.0,40.986,34.949,1.799,49.709,9.155,124.82,8.752,7.867,...,1.623,0.0,0.0,36.21,0.0,0.537,0.189,0.0,0.0,8.027
2004,39240000.0,215031600000.0,42.005,2.32,1.016,51.088,9.475,118.161,8.504,8.035,...,1.764,0.0,0.0,36.966,0.0,0.556,0.191,0.0,0.0,7.757
2005,39946000.0,234125100000.0,44.138,34.854,2.133,52.724,9.008,118.663,8.844,7.83,...,3.016,0.0,0.0,37.637,0.0,0.59,0.449,0.0,0.0,7.805
2006,40652000.0,248127600000.0,44.989,19.957,0.85,51.077,6.532,86.426,9.022,7.276,...,2.573,0.0,0.0,39.022,0.0,0.546,0.332,0.0,0.0,8.146
2007,41363000.0,260778600000.0,47.772,48.716,2.784,56.894,9.595,109.72,9.405,8.45,...,2.107,0.0,0.0,41.873,0.0,0.618,0.297,0.0,0.0,8.492
2008,42072000.0,270220500000.0,45.922,-28.211,-1.851,60.43,14.943,188.03,8.951,9.516,...,1.707,0.0,0.0,40.216,0.0,0.676,0.162,0.0,0.0,8.111
2009,42786000.0,276311600000.0,46.958,30.771,1.037,67.766,21.325,265.115,9.294,11.976,...,1.472,0.0,0.0,41.998,0.0,0.611,0.165,0.0,0.0,8.518


In [42]:
#convert index to column
cen_df.reset_index()

Unnamed: 0,year,population,gdp,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,coal_co2,flaring_co2,gas_co2,oil_co2,other_industry_co2,cement_co2_per_capita,coal_co2_per_capita,flaring_co2_per_capita,gas_co2_per_capita,oil_co2_per_capita
0,2000,36422000.0,181905700000.0,35.655,37.724,1.609,48.561,13.303,231.118,8.146,...,1.092,0.0,0.0,31.646,0.0,0.523,0.157,0.0,0.0,7.467
1,2001,37128000.0,188005000000.0,38.734,66.438,3.078,51.931,13.646,222.036,8.854,...,1.147,0.0,0.0,34.611,0.0,0.519,0.178,0.0,0.0,8.159
2,2002,37831000.0,195669400000.0,39.188,5.302,0.453,60.697,21.942,332.548,8.533,...,1.701,0.0,0.0,34.528,0.0,0.503,0.222,0.0,0.0,7.808
3,2003,38534000.0,206326100000.0,40.986,34.949,1.799,49.709,9.155,124.82,8.752,...,1.623,0.0,0.0,36.21,0.0,0.537,0.189,0.0,0.0,8.027
4,2004,39240000.0,215031600000.0,42.005,2.32,1.016,51.088,9.475,118.161,8.504,...,1.764,0.0,0.0,36.966,0.0,0.556,0.191,0.0,0.0,7.757
5,2005,39946000.0,234125100000.0,44.138,34.854,2.133,52.724,9.008,118.663,8.844,...,3.016,0.0,0.0,37.637,0.0,0.59,0.449,0.0,0.0,7.805
6,2006,40652000.0,248127600000.0,44.989,19.957,0.85,51.077,6.532,86.426,9.022,...,2.573,0.0,0.0,39.022,0.0,0.546,0.332,0.0,0.0,8.146
7,2007,41363000.0,260778600000.0,47.772,48.716,2.784,56.894,9.595,109.72,9.405,...,2.107,0.0,0.0,41.873,0.0,0.618,0.297,0.0,0.0,8.492
8,2008,42072000.0,270220500000.0,45.922,-28.211,-1.851,60.43,14.943,188.03,8.951,...,1.707,0.0,0.0,40.216,0.0,0.676,0.162,0.0,0.0,8.111
9,2009,42786000.0,276311600000.0,46.958,30.771,1.037,67.766,21.325,265.115,9.294,...,1.472,0.0,0.0,41.998,0.0,0.611,0.165,0.0,0.0,8.518


In [43]:
#add new columns
#add country/region to Central America
#pd.Dataframe[new_column] = value 
cen_df['country'] = "Central America"
cen_df['iso_code']=" "
#add year
cen_df['year']=['2000','2001','2002','2003', '2004','2005', '2006','2007','2008','2009','2010','2011','2012','2013','2014', '2015','2016','2017','2018', '2019']

In [44]:
#check columns
cen_df.columns

Index(['population', 'gdp', 'co2', 'co2_growth_prct', 'co2_growth_abs',
       'consumption_co2', 'trade_co2', 'trade_co2_share', 'co2_per_capita',
       'consumption_co2_per_capita', 'share_global_co2', 'cumulative_co2',
       'co2_per_gdp', 'consumption_co2_per_gdp', 'cement_co2', 'coal_co2',
       'flaring_co2', 'gas_co2', 'oil_co2', 'other_industry_co2',
       'cement_co2_per_capita', 'coal_co2_per_capita',
       'flaring_co2_per_capita', 'gas_co2_per_capita', 'oil_co2_per_capita',
       'country', 'iso_code', 'year'],
      dtype='object')

In [45]:
cen_df.shape

(20, 28)

In [46]:
#reorder columns
cen_df=cen_df[['iso_code', 'country', 'year', 'population', 'gdp', 'co2',
       'co2_growth_prct', 'co2_growth_abs', 'consumption_co2', 'trade_co2',
       'trade_co2_share', 'co2_per_capita', 'consumption_co2_per_capita',
       'share_global_co2', 'cumulative_co2', 'co2_per_gdp',
       'consumption_co2_per_gdp', 'cement_co2', 'coal_co2', 'flaring_co2',
       'gas_co2', 'oil_co2', 'other_industry_co2', 'cement_co2_per_capita',
       'coal_co2_per_capita', 'flaring_co2_per_capita', 'gas_co2_per_capita',
       'oil_co2_per_capita']]

In [47]:
cen_df.head()

Unnamed: 0_level_0,iso_code,country,year,population,gdp,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,...,coal_co2,flaring_co2,gas_co2,oil_co2,other_industry_co2,cement_co2_per_capita,coal_co2_per_capita,flaring_co2_per_capita,gas_co2_per_capita,oil_co2_per_capita
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000,,Central America,2000,36422000.0,181905700000.0,35.655,37.724,1.609,48.561,13.303,...,1.092,0.0,0.0,31.646,0.0,0.523,0.157,0.0,0.0,7.467
2001,,Central America,2001,37128000.0,188005000000.0,38.734,66.438,3.078,51.931,13.646,...,1.147,0.0,0.0,34.611,0.0,0.519,0.178,0.0,0.0,8.159
2002,,Central America,2002,37831000.0,195669400000.0,39.188,5.302,0.453,60.697,21.942,...,1.701,0.0,0.0,34.528,0.0,0.503,0.222,0.0,0.0,7.808
2003,,Central America,2003,38534000.0,206326100000.0,40.986,34.949,1.799,49.709,9.155,...,1.623,0.0,0.0,36.21,0.0,0.537,0.189,0.0,0.0,8.027
2004,,Central America,2004,39240000.0,215031600000.0,42.005,2.32,1.016,51.088,9.475,...,1.764,0.0,0.0,36.966,0.0,0.556,0.191,0.0,0.0,7.757


In [48]:
# # drop index
cen_df.reset_index(drop=True, inplace=True)

In [49]:
#check_dataframe
cen_df.shape

(20, 28)

#### Add "new" regions to "decade_ems_df" --dataframe

In [50]:
#add both dataframes to co_type dataframes,
# append to end with #continous index value, ignore_index=True
#Central America
decade_ems_df=decade_ems_df.append(cen_df, ignore_index=True)

In [51]:
#Caribbean
decade_ems_df=decade_ems_df.append(car_df, ignore_index=True)

In [52]:
decade_ems_df.shape

(4563, 28)

#### Additional Clean-up...

In [53]:
## alphabetical sort 'decade_ems_df' dataframe, a-z
decade_ems_df=decade_ems_df.sort_values('country', ascending=True)

In [54]:
# Covert population column to int
decade_ems_df['population'] = decade_ems_df['population'].astype(int)

In [None]:
#add placeholder text to 'iso_code' for missing "country/region codes"
#df.replace()
decade_ems_df['iso_code']=decade_ems_df['iso_code'].str.replace(' ','notfound') 

In [70]:
#df.fillna()
decade_ems_df['iso_code'].fillna('notfound', inplace = True)

In [81]:
decade_ems_df.loc[decade_ems_df['country'] =='Africa']

Unnamed: 0,iso_code,country,year,population,gdp,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,...,coal_co2,flaring_co2,gas_co2,oil_co2,other_industry_co2,cement_co2_per_capita,coal_co2_per_capita,flaring_co2_per_capita,gas_co2_per_capita,oil_co2_per_capita
32,notfound,Africa,2012,1094343040,0.0,1240.355,0.466,5.75,0.0,0.0,...,426.07,57.001,231.541,458.2,0.0,0.063,0.398,0.053,0.216,0.428
33,notfound,Africa,2013,1123044992,0.0,1256.838,1.329,16.483,0.0,0.0,...,424.514,53.9,232.067,478.012,0.0,0.062,0.387,0.049,0.211,0.435
34,notfound,Africa,2014,1152434048,0.0,1348.065,7.258,91.227,0.0,0.0,...,453.111,53.981,234.538,532.612,0.0,0.065,0.402,0.048,0.208,0.473
35,notfound,Africa,2015,1182439040,0.0,1302.266,-3.397,-45.799,0.0,0.0,...,426.486,51.043,241.575,505.787,0.0,0.067,0.369,0.044,0.209,0.437
39,notfound,Africa,2019,1308064000,0.0,1434.873,1.603,22.641,0.0,0.0,...,463.038,46.038,289.131,558.911,0.0,0.061,0.362,0.036,0.226,0.437
37,notfound,Africa,2017,1244221952,0.0,1377.21,4.011,53.106,0.0,0.0,...,443.656,46.038,269.421,539.637,0.0,0.064,0.365,0.038,0.221,0.443
38,notfound,Africa,2018,1275921024,0.0,1412.232,2.543,35.021,0.0,0.0,...,455.371,46.038,286.483,545.074,0.0,0.064,0.365,0.037,0.23,0.437
31,notfound,Africa,2011,1066409984,0.0,1234.606,2.306,27.825,0.0,0.0,...,429.485,55.351,220.902,469.831,0.0,0.057,0.412,0.053,0.212,0.451
36,notfound,Africa,2016,1213041024,0.0,1324.104,1.677,21.838,0.0,0.0,...,438.846,44.469,251.799,509.723,0.0,0.067,0.37,0.037,0.212,0.43
30,notfound,Africa,2010,1039304000,0.0,1206.78,1.757,20.834,0.0,0.0,...,439.305,62.855,200.428,444.973,0.0,0.058,0.432,0.062,0.197,0.438


In [80]:
#final check for missing values
# check for missing data?
# Find null values , isnull() method
# decade_ems_df.isnull().sum()['iso_code']
decade_ems_df.isnull().sum()

iso_code                      0
country                       0
year                          0
population                    0
gdp                           0
co2                           0
co2_growth_prct               0
co2_growth_abs                0
consumption_co2               0
trade_co2                     0
trade_co2_share               0
co2_per_capita                0
consumption_co2_per_capita    0
share_global_co2              0
cumulative_co2                0
co2_per_gdp                   0
consumption_co2_per_gdp       0
cement_co2                    0
coal_co2                      0
flaring_co2                   0
gas_co2                       0
oil_co2                       0
other_industry_co2            0
cement_co2_per_capita         0
coal_co2_per_capita           0
flaring_co2_per_capita        0
gas_co2_per_capita            0
oil_co2_per_capita            0
dtype: int64

#### Write to "final" dataset

In [56]:
#write final dataset to csv file for furture use
# file_path ="../../resources/data/decade_co_emissions.csv"