In [1]:
import warnings
import numpy as np
warnings.filterwarnings('ignore')
import pandas as pd
from plotnine import *
import statsmodels.api as sm
import statsmodels.formula.api as smf
import os
import sys
from stargazer.stargazer import Stargazer
from IPython.core.display import HTML

In [2]:
#read the data
data = pd.read_csv("f7664209-d97b-4f9b-a7bd-e1fef352177c_Data.csv")

print(data.head())

   Time Time Code    Country Name Country Code  \
0  1992    YR1992     Afghanistan          AFG   
1  1992    YR1992         Albania          ALB   
2  1992    YR1992         Algeria          DZA   
3  1992    YR1992  American Samoa          ASM   
4  1992    YR1992         Andorra          AND   

  GDP per capita, PPP (constant 2017 international $) [NY.GDP.PCAP.PP.KD]  \
0                                                 ..                        
1                                   3275.44433583801                        
2                                   8383.77024779121                        
3                                                 ..                        
4                                                 ..                        

  CO2 emissions (metric tons per capita) [EN.ATM.CO2E.PC]  
0                                 0.0961965810608727       
1                                  0.774724910911141       
2                                   2.96498636282543    

In [3]:
data.columns

Index(['Time', 'Time Code', 'Country Name', 'Country Code',
       'GDP per capita, PPP (constant 2017 international $) [NY.GDP.PCAP.PP.KD]',
       'CO2 emissions (metric tons per capita) [EN.ATM.CO2E.PC]'],
      dtype='object')

In [4]:
data.drop(['Time Code','Country Code'],axis=1,inplace=True)

In [5]:
cols = {'GDP per capita, PPP (constant 2017 international $) [NY.GDP.PCAP.PP.KD]':'gdppc',
          'CO2 emissions (metric tons per capita) [EN.ATM.CO2E.PC]':'co2pc',
        'Country Name':'country',
       'Time':'year'}
data.rename(columns=cols, inplace=True)

In [6]:
data.columns

Index(['year', 'country', 'gdppc', 'co2pc'], dtype='object')

In [7]:
data.country=data.country.astype("category")
data.year=data.year.astype("category")

In [8]:
data.head()

Unnamed: 0,year,country,gdppc,co2pc
0,1992,Afghanistan,..,0.0961965810608727
1,1992,Albania,3275.44433583801,0.774724910911141
2,1992,Algeria,8383.77024779121,2.96498636282543
3,1992,American Samoa,..,..
4,1992,Andorra,..,6.91205338948512


In [9]:
data.replace('..',np.NaN,inplace=True)

In [10]:
data.isnull().sum()

year         3
country      5
gdppc      848
co2pc      848
dtype: int64

In [11]:
data.groupby('country').agg(lambda x: x.isnull().sum()).sort_values(['gdppc']+['co2pc'],ascending=False).head(60)

Unnamed: 0_level_0,year,gdppc,co2pc
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
American Samoa,0,27,27
Channel Islands,0,27,27
Guam,0,27,27
Isle of Man,0,27,27
Monaco,0,27,27
Northern Mariana Islands,0,27,27
St. Martin (French part),0,27,27
Virgin Islands (U.S.),0,27,27
South Sudan,0,27,22
Liechtenstein,0,27,17


In [12]:
temp=data.dropna(axis=0,thresh=4)

In [13]:
temp.isnull().sum()

year       0
country    0
gdppc      0
co2pc      0
dtype: int64

In [14]:
temp

Unnamed: 0,year,country,gdppc,co2pc
1,1992,Albania,3275.44433583801,0.774724910911141
2,1992,Algeria,8383.77024779121,2.96498636282543
5,1992,Angola,5148.25282361678,0.410522931864339
6,1992,Antigua and Barbuda,15497.4647441974,4.0952366180143
7,1992,Argentina,16209.3230774449,3.61928035773818
...,...,...,...,...
5417,2016,Vanuatu,3061.78723983667,0.527000323357166
5419,2016,Vietnam,6767.90249535442,2.05756591685295
5421,2016,West Bank and Gaza,6438.93364028868,0.740606555214825
5423,2016,Zambia,3467.87515599636,0.314182894901441


In [15]:
temp.groupby('country').agg(lambda x: x.isnull().sum()).sort_values(['gdppc']+['co2pc'],ascending=False).head(60)

Unnamed: 0_level_0,year,gdppc,co2pc
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,0,0.0,0.0
Albania,0,0.0,0.0
Algeria,0,0.0,0.0
Angola,0,0.0,0.0
Antigua and Barbuda,0,0.0,0.0
Argentina,0,0.0,0.0
Armenia,0,0.0,0.0
Aruba,0,0.0,0.0
Australia,0,0.0,0.0
Austria,0,0.0,0.0


In [16]:
temp.head()

Unnamed: 0,year,country,gdppc,co2pc
1,1992,Albania,3275.44433583801,0.774724910911141
2,1992,Algeria,8383.77024779121,2.96498636282543
5,1992,Angola,5148.25282361678,0.410522931864339
6,1992,Antigua and Barbuda,15497.4647441974,4.0952366180143
7,1992,Argentina,16209.3230774449,3.61928035773818


In [17]:
temp.columns

Index(['year', 'country', 'gdppc', 'co2pc'], dtype='object')

In [18]:
print(type(temp.gdppc[1]))
print(type(temp.co2pc[1]))

<class 'str'>
<class 'str'>


In [19]:
temp.gdppc=pd.to_numeric(temp.gdppc)
temp.co2pc=pd.to_numeric(temp.co2pc)
temp.year=pd.to_numeric(temp.year)

In [20]:
countries_grouped = temp.groupby("country")

In [21]:
temp["lngdppc"] = countries_grouped["gdppc"].transform(np.log)
temp["lnco2pc"] = countries_grouped["co2pc"].transform(np.log)
temp["d_lngdppc"] = countries_grouped["lngdppc"].transform("diff")
temp["d_lnco2pc"] = countries_grouped["lnco2pc"].transform("diff")

In [22]:
temp=temp.dropna(axis=0,thresh=7)

In [23]:
temp.isnull().sum()

year         0
country      0
gdppc        0
co2pc        0
lngdppc      0
lnco2pc      0
d_lngdppc    0
d_lnco2pc    0
dtype: int64

In [24]:
temp.groupby('country').size().sort_values().head(20)

country
Eritrea                      0
Somalia                      0
San Marino                   0
Faroe Islands                0
French Polynesia             0
Puerto Rico                  0
Gibraltar                    0
Syrian Arab Republic         0
Greenland                    0
Guam                         0
Northern Mariana Islands     0
Cuba                         0
Isle of Man                  0
South Sudan                  0
Korea, Dem. People’s Rep.    0
Channel Islands              0
St. Martin (French part)     0
Venezuela, RB                0
Monaco                       0
Virgin Islands (U.S.)        0
dtype: int64

In [25]:
###Dropping contries wiht poor coverage (<16 observations)
countries_to_drop=['Sao Tome and Principe','Sudan','Timor-Leste','Djibouti','Curacao','Cayman Islands',
                   'Sint Maarten (Dutch part)','Kosovo','Montenegro','Nauru','Serbia',
                   'Afghanistan','Turks and Caicos Islands',]
temp=temp.set_index('country').drop(index=countries_to_drop,axis=0)

In [26]:
temp.groupby('country').size().sort_values().head(20)

country
Qatar                     16
Liberia                   16
Palau                     16
Libya                     17
Lithuania                 21
Maldives                  21
Moldova                   21
Croatia                   21
Kuwait                    21
Latvia                    21
France                    22
West Bank and Gaza        22
Bosnia and Herzegovina    22
Italy                     22
Estonia                   23
Cambodia                  23
Madagascar                24
Namibia                   24
Nepal                     24
Netherlands               24
dtype: int64

In [27]:
temp.reset_index(inplace=True)


In [28]:
temp.query("year == 2000").head(20)

Unnamed: 0,country,year,gdppc,co2pc,lngdppc,lnco2pc,d_lngdppc,d_lnco2pc
1194,Albania,2000,5911.956097,0.978175,8.684732,-0.022067,0.073565,0.018584
1195,Algeria,2000,8710.455991,2.83038,9.072279,1.040411,0.023712,-0.060106
1196,Angola,2000,4727.967467,0.581961,8.461251,-0.541351,-0.002683,0.008418
1197,Antigua and Barbuda,2000,18311.013715,4.534545,9.815258,1.511725,0.042374,0.025673
1198,Argentina,2000,18625.288101,3.854992,9.832276,1.349369,-0.018927,-0.044544
1199,Armenia,2000,4048.257817,1.128918,8.306042,0.121259,0.063635,0.145755
1200,Aruba,2000,41022.321554,26.194875,10.621872,3.265564,0.052854,1.05672
1201,Australia,2000,38462.015363,17.20061,10.557426,2.844945,0.026655,0.0006
1202,Austria,2000,46551.459787,7.771971,10.748314,2.050524,0.030795,0.001608
1203,Azerbaijan,2000,4063.471641,3.666271,8.309793,1.299175,0.097045,0.023858


In [29]:
formula="d_lnco2pc ~ d_lngdppc"

###Pooled regression 2000
ols_pooled2000 = smf.ols("d_lnco2pc ~ d_lngdppc + country", temp.query("year == 2000")).fit(cov_type="HC0")

In [33]:
models = [ols_pooled2000]
names = ["Pooled regression 2000"]
stargazer = Stargazer(models)
stargazer.rename_covariates(
    {
        "Intercept": "Constant",
        "d_lngdppc": "GDP per capita log change, cumulative coeff.",
        "d_lnco2pc": "CO2 emissions per capita log change, cumulative coeff."
    }
)

stargazer.covariate_order(
    [
        "d_lngdppc",
        "Intercept"
    ]
)

stargazer.custom_columns(names, [1])
HTML(stargazer.render_html())

0,1
,
,Dependent variable:d_lnco2pc
,
,Pooled regression 2000
,(1)
,
"GDP per capita log change, cumulative coeff.",0.132***
,(0.000)
Constant,0.017***
,(0.000)


The per capita CO2 emissions were 0.132% higher, on average, in 2000 for every 1% increase in GDP per capita, controlling for a country.

In [34]:
temp.query("year == 2015")

Unnamed: 0,country,year,gdppc,co2pc,lngdppc,lnco2pc,d_lngdppc,d_lnco2pc
3861,Albania,2015,11916.422315,1.602648,9.385673,0.471657,0.024857,-0.170233
3862,Algeria,2015,11696.963757,3.854557,9.367085,1.349256,0.015878,0.031293
3863,Angola,2015,8036.410610,1.240245,8.991738,0.215309,-0.024997,-0.294360
3864,Antigua and Barbuda,2015,18595.084904,5.839546,9.830653,1.764653,0.026751,0.023345
3865,Argentina,2015,23933.886613,4.664011,10.083051,1.539876,0.016165,0.016382
...,...,...,...,...,...,...,...,...
4032,Vanuatu,2015,3037.784742,0.486896,8.018884,-0.719706,-0.025446,-0.181224
4033,Vietnam,2015,6438.260271,2.032108,8.770014,0.709073,0.054217,0.107750
4034,West Bank and Gaza,2015,6048.976597,0.704186,8.707644,-0.350712,0.013633,0.034828
4035,Zambia,2015,3443.555206,0.285428,8.144260,-1.253766,-0.001883,-0.024175


In [35]:
formula="d_lnco2pc ~ d_lngdppc"

###Pooled regression 2015
ols_pooled2015 = smf.ols("d_lnco2pc ~ d_lngdppc + country", temp.query("year == 2015")).fit(cov_type="HC0")

In [38]:
models = [ols_pooled2015]
names = ["Pooled regression 2015"]
stargazer = Stargazer(models)
stargazer.rename_covariates(
    {
        "Intercept": "Constant",
        "d_lngdppc": "GDP per capita log change, cumulative coeff.",
        "d_lnco2pc": "CO2 emissions per capita log change, cumulative coeff."
    }
)

stargazer.covariate_order(
    [
        "d_lngdppc",
        "Intercept"
    ]
)

stargazer.custom_columns(names, [1])
HTML(stargazer.render_html())

0,1
,
,Dependent variable:d_lnco2pc
,
,Pooled regression 2015
,(1)
,
"GDP per capita log change, cumulative coeff.",0.149***
,(0.000)
Constant,-0.003***
,(0.000)


The per capita CO2 emissions were 0.149% higher, on average, in 2015 for every 1% increase in GDP per capita, controlling for a country.

In [40]:
# Fd, time trend, no lags

model = smf.wls("d_lnco2pc ~ d_lngdppc + year", temp)
fd_lm = model.fit(
    cov_type="cluster",
    cov_kwds={"groups": temp.loc[model.data.row_labels, "country"]},
)
fd_lm.summary()

0,1,2,3
Dep. Variable:,d_lnco2pc,R-squared:,0.075
Model:,WLS,Adj. R-squared:,0.074
Method:,Least Squares,F-statistic:,18.55
Date:,"Mon, 29 Mar 2021",Prob (F-statistic):,4.87e-08
Time:,09:58:51,Log-Likelihood:,2858.4
No. Observations:,4213,AIC:,-5711.0
Df Residuals:,4210,BIC:,-5692.0
Df Model:,2,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.1407,0.489,0.288,0.773,-0.817,1.098
d_lngdppc,0.6142,0.102,6.046,0.000,0.415,0.813
year,-7.012e-05,0.000,-0.288,0.774,-0.001,0.000

0,1,2,3
Omnibus:,1844.627,Durbin-Watson:,1.975
Prob(Omnibus):,0.0,Jarque-Bera (JB):,166438.455
Skew:,1.162,Prob(JB):,0.0
Kurtosis:,33.704,Cond. No.,584000.0


A 1% increase in GDP per capita tend to be followed by 0.61% increase in CO2 emissions per capita, on average, in the data, relative to the country trend.

In [42]:
# FD, time trend, 2 year lags

fd_lm_5_cumul_trend_formula = "d_lnco2pc ~ d_lngdppc.shift(0) + d_lngdppc.shift(1) + \
d_lngdppc.shift(2)+ year"

model = smf.wls(
    fd_lm_5_cumul_trend_formula,
    temp
)

fd_lm_5_cumul_trend = model.fit(
    cov_type="cluster",
    cov_kwds={"groups":  temp.loc[model.data.row_labels, "country"]},
)

fd_lm_5_cumul_trend.summary()

0,1,2,3
Dep. Variable:,d_lnco2pc,R-squared:,0.075
Model:,WLS,Adj. R-squared:,0.074
Method:,Least Squares,F-statistic:,9.375
Date:,"Mon, 29 Mar 2021",Prob (F-statistic):,6.77e-07
Time:,10:01:35,Log-Likelihood:,2857.1
No. Observations:,4211,AIC:,-5704.0
Df Residuals:,4206,BIC:,-5672.0
Df Model:,4,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.1525,0.490,0.312,0.755,-0.807,1.112
d_lngdppc.shift(0),0.6156,0.102,6.046,0.000,0.416,0.815
d_lngdppc.shift(1),-0.0222,0.023,-0.966,0.334,-0.067,0.023
d_lngdppc.shift(2),0.0238,0.027,0.879,0.380,-0.029,0.077
year,-7.603e-05,0.000,-0.311,0.756,-0.001,0.000

0,1,2,3
Omnibus:,1842.843,Durbin-Watson:,1.974
Prob(Omnibus):,0.0,Jarque-Bera (JB):,166181.82
Skew:,1.161,Prob(JB):,0.0
Kurtosis:,33.688,Cond. No.,585000.0


A 1% increase in GDP per capita tend to be followed by 0.024% increase in CO2 emissions per capita, on average, within two years in the data, relative to the country trend.

In [None]:
# FD, time trend, 6 year lags

fd_lm_5_cumul_trend_formula = "d_lnco2pc ~ d_lngdppc.shift(0) + d_lngdppc.shift(1) + \
d_lngdppc.shift(2)+d_lngdppc.shift(3)+d_lngdppc.shift(4)+d_lngdppc.shift(5)+d_lngdppc.shift(6)+year"

model = smf.wls(
    fd_lm_5_cumul_trend_formula,
    temp
)

fd_lm_5_cumul_trend = model.fit(
    cov_type="cluster",
    cov_kwds={"groups":  temp.loc[model.data.row_labels, "country"]},
)

fd_lm_5_cumul_trend.summary()

A 1% increase in GDP per capita tend to be followed by 0.024% increase in CO2 emissions per capita, on average, within six years in the data, relative to the country trend.

In [None]:
###Panel regression
from linearmodels import PanelOLS

Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
            ...
            4203, 4204, 4205, 4206, 4207, 4208, 4209, 4210, 4211, 4212],
           dtype='int64', length=4213)

In [None]:
temp.head()

In [None]:
lm = PanelOLS.from_formula(
    "surv ~ imm + TimeEffects + EntityEffects",
    data_balanced,
    weights=data_balanced["avgpop"],
).fit(cov_type="clustered", cluster_entity=True)