# Title: Carbon Emissions Observations
By: Laura Suchomska, Ravi Malde, Augustin Goudet

# I. Package Install

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
import warnings
warnings.filterwarnings("ignore")

# II. Data Collection - CIA WorldFactbook

In [3]:
#Collating all the potential sources of data from the CIA WorldFactbook

files = ['AIRPORTS',
'AREA',
'BUDGET',
'continent',
'CURACCT',
'DEBTFOREIGNERS',
'EDUCATIONSPEND',
'electricconsupmtion',
'emissions',
'EXPORTS',
'fossilelectricity',
'GDP',
'GDPGROWTH',
'GDPPERCAP',
'hydroelectricity',
'IMPORTS',
'INDUSTPRODGR',
'internetusers',
'LIFEXP',
'MARMERCHANT',
'MIGRATION',
'MILIEXPENDITURE',
'naturalgasconsumption',
'nuclearelectricity',
'otherrenewableelectricity',
'petroleumconsumption',
'POPGROWTH',
'POPULATION',
'PUBLICDEBT',
'RAILWAYS',
'ROADWAYS',
'UNEMPLOYMENT',
'WATERWAYS',
'WORKFORCE']

In [4]:
#storing all the csv in a dictionary

pd_dict = {}
for file in files:
    pd_dict[file] = pd.read_csv('{}.csv'.format(file))

In [5]:
#Creating the dataframes from the dictionary

base_df = pd_dict['continent']
area_df = pd_dict['AREA']
petrol_df = pd_dict['petroleumconsumption']
users_df = pd_dict['internetusers']
hydro_df = pd_dict['hydroelectricity']
nuclear_df = pd_dict['nuclearelectricity']
otherrenew_df = pd_dict['otherrenewableelectricity']
mili_df = pd_dict['MILIEXPENDITURE']
co2_df = pd_dict['emissions']
mili_df.rename(columns = {'% OF GDP':'mili_exp_gdp'}, inplace = True)

# III. Data Transformation 

In [6]:
#Standardization of country names to merge

for i, row in base_df.iterrows():
    row['country_name'] = row['country_name'].split(",")
    row ['country_name'] = row['country_name'][0]

In [7]:
#Renaming columns to facilitate merge

base_df.rename(columns = {'country_name':'Country'}, inplace = True)
base_df.merge(area_df, on = 'Country', how = 'left')
mili_df.rename(columns = {"mili_exp_%_gdp": "mili_exp_gdp"})


Unnamed: 0.1,Unnamed: 0,COUNTRY,mili_exp_gdp,YEAR,Unnamed: 4
0,1,Saudi Arabia,8.78,2018,
1,2,Oman,8.17,2018,
2,3,United Arab Emirates,5.70,2016,
3,4,Algeria,5.27,2018,
4,5,Kuwait,5.06,2018,
...,...,...,...,...,...
153,154,Iceland,0.30,2018,
154,155,Papua New Guinea,0.27,2018,
155,156,Laos,0.19,2013,
156,157,Equatorial Guinea,0.18,2016,
