In [331]:
import warnings
import numpy as np
warnings.filterwarnings('ignore')
import pandas as pd
from plotnine import *
import statsmodels.api as sm
import statsmodels.formula.api as smf
import os
import sys
from stargazer.stargazer import Stargazer
from IPython.core.display import HTML

In [332]:
#read the data
data = pd.read_csv("f7664209-d97b-4f9b-a7bd-e1fef352177c_Data.csv")

print(data.head())

   Time Time Code    Country Name Country Code  \
0  1992    YR1992     Afghanistan          AFG   
1  1992    YR1992         Albania          ALB   
2  1992    YR1992         Algeria          DZA   
3  1992    YR1992  American Samoa          ASM   
4  1992    YR1992         Andorra          AND   

  GDP per capita, PPP (constant 2017 international $) [NY.GDP.PCAP.PP.KD]  \
0                                                 ..                        
1                                   3275.44433583801                        
2                                   8383.77024779121                        
3                                                 ..                        
4                                                 ..                        

  CO2 emissions (metric tons per capita) [EN.ATM.CO2E.PC]  
0                                 0.0961965810608727       
1                                  0.774724910911141       
2                                   2.96498636282543    

In [333]:
data.columns

Index(['Time', 'Time Code', 'Country Name', 'Country Code',
       'GDP per capita, PPP (constant 2017 international $) [NY.GDP.PCAP.PP.KD]',
       'CO2 emissions (metric tons per capita) [EN.ATM.CO2E.PC]'],
      dtype='object')

In [334]:
data.drop(['Time Code','Country Code'],axis=1,inplace=True)

In [335]:
cols = {'GDP per capita, PPP (constant 2017 international $) [NY.GDP.PCAP.PP.KD]':'gdppc',
          'CO2 emissions (metric tons per capita) [EN.ATM.CO2E.PC]':'co2pc',
        'Country Name':'country',
       'Time':'year'}
data.rename(columns=cols, inplace=True)

In [336]:
data.columns

Index(['year', 'country', 'gdppc', 'co2pc'], dtype='object')

In [337]:
data.country=data.country.astype("category")
data.year=data.year.astype("category")

In [338]:
data.head()

Unnamed: 0,year,country,gdppc,co2pc
0,1992,Afghanistan,..,0.0961965810608727
1,1992,Albania,3275.44433583801,0.774724910911141
2,1992,Algeria,8383.77024779121,2.96498636282543
3,1992,American Samoa,..,..
4,1992,Andorra,..,6.91205338948512


In [339]:
data.replace('..',np.NaN,inplace=True)

In [340]:
data.isnull().sum()

year         3
country      5
gdppc      848
co2pc      848
dtype: int64

In [341]:
data.groupby('country').agg(lambda x: x.isnull().sum()).sort_values(['gdppc']+['co2pc'],ascending=False).head(60)

Unnamed: 0_level_0,year,gdppc,co2pc
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
American Samoa,0,27,27
Channel Islands,0,27,27
Guam,0,27,27
Isle of Man,0,27,27
Monaco,0,27,27
Northern Mariana Islands,0,27,27
St. Martin (French part),0,27,27
Virgin Islands (U.S.),0,27,27
South Sudan,0,27,22
Liechtenstein,0,27,17


In [342]:
temp=data.dropna(axis=0,thresh=4)

In [343]:
temp.isnull().sum()

year       0
country    0
gdppc      0
co2pc      0
dtype: int64

In [344]:
temp

Unnamed: 0,year,country,gdppc,co2pc
1,1992,Albania,3275.44433583801,0.774724910911141
2,1992,Algeria,8383.77024779121,2.96498636282543
5,1992,Angola,5148.25282361678,0.410522931864339
6,1992,Antigua and Barbuda,15497.4647441974,4.0952366180143
7,1992,Argentina,16209.3230774449,3.61928035773818
...,...,...,...,...
5417,2016,Vanuatu,3061.78723983667,0.527000323357166
5419,2016,Vietnam,6767.90249535442,2.05756591685295
5421,2016,West Bank and Gaza,6438.93364028868,0.740606555214825
5423,2016,Zambia,3467.87515599636,0.314182894901441


In [345]:
temp.groupby('country').agg(lambda x: x.isnull().sum()).sort_values(['gdppc']+['co2pc'],ascending=False).head(60)

Unnamed: 0_level_0,year,gdppc,co2pc
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,0,0.0,0.0
Albania,0,0.0,0.0
Algeria,0,0.0,0.0
Angola,0,0.0,0.0
Antigua and Barbuda,0,0.0,0.0
Argentina,0,0.0,0.0
Armenia,0,0.0,0.0
Aruba,0,0.0,0.0
Australia,0,0.0,0.0
Austria,0,0.0,0.0


In [346]:
temp.head()

Unnamed: 0,year,country,gdppc,co2pc
1,1992,Albania,3275.44433583801,0.774724910911141
2,1992,Algeria,8383.77024779121,2.96498636282543
5,1992,Angola,5148.25282361678,0.410522931864339
6,1992,Antigua and Barbuda,15497.4647441974,4.0952366180143
7,1992,Argentina,16209.3230774449,3.61928035773818


In [347]:
temp.columns

Index(['year', 'country', 'gdppc', 'co2pc'], dtype='object')

In [348]:
print(type(temp.gdppc[1]))
print(type(temp.co2pc[1]))

<class 'str'>
<class 'str'>


In [349]:
temp.gdppc=pd.to_numeric(temp.gdppc)
temp.co2pc=pd.to_numeric(temp.co2pc)
temp.year=pd.to_numeric(temp.year)

In [350]:
countries_grouped = temp.groupby("country")

In [351]:
temp["lngdppc"] = countries_grouped["gdppc"].transform(np.log)
temp["lnco2pc"] = countries_grouped["co2pc"].transform(np.log)
temp["d_lngdppc"] = countries_grouped["lngdppc"].transform("diff")
temp["d_lnco2pc"] = countries_grouped["lnco2pc"].transform("diff")

In [352]:
temp=temp.dropna(axis=0,thresh=7)

In [353]:
temp.isnull().sum()

year         0
country      0
gdppc        0
co2pc        0
lngdppc      0
lnco2pc      0
d_lngdppc    0
d_lnco2pc    0
dtype: int64

In [354]:
temp.groupby('country').size().sort_values().head(20)

country
Eritrea                      0
Somalia                      0
San Marino                   0
Faroe Islands                0
French Polynesia             0
Puerto Rico                  0
Gibraltar                    0
Syrian Arab Republic         0
Greenland                    0
Guam                         0
Northern Mariana Islands     0
Cuba                         0
Isle of Man                  0
South Sudan                  0
Korea, Dem. People’s Rep.    0
Channel Islands              0
St. Martin (French part)     0
Venezuela, RB                0
Monaco                       0
Virgin Islands (U.S.)        0
dtype: int64

In [355]:
###Dropping contries wiht poor coverage (<16 observations)
countries_to_drop=['Sao Tome and Principe','Sudan','Timor-Leste','Djibouti','Curacao','Cayman Islands',
                   'Sint Maarten (Dutch part)','Kosovo','Montenegro','Nauru','Serbia',
                   'Afghanistan','Turks and Caicos Islands',]
temp=temp.set_index('country').drop(index=countries_to_drop,axis=0)

In [356]:
temp.groupby('country').size().sort_values().head(20)

country
Qatar                     16
Liberia                   16
Palau                     16
Libya                     17
Lithuania                 21
Maldives                  21
Moldova                   21
Croatia                   21
Kuwait                    21
Latvia                    21
France                    22
West Bank and Gaza        22
Bosnia and Herzegovina    22
Italy                     22
Estonia                   23
Cambodia                  23
Madagascar                24
Namibia                   24
Nepal                     24
Netherlands               24
dtype: int64

In [357]:
temp.reset_index(inplace=True)


In [358]:
temp.query("year == 1999 | year == 2000").head(20)

Unnamed: 0,country,year,gdppc,co2pc,lngdppc,lnco2pc,d_lngdppc,d_lnco2pc
1020,Albania,1999,5492.654212,0.960164,8.611167,-0.040651,0.127576,0.538683
1021,Algeria,1999,8506.345679,3.005719,9.048568,1.100517,0.017336,-0.164704
1022,Angola,1999,4740.669833,0.577083,8.463934,-0.549769,-0.010912,0.192956
1023,Antigua and Barbuda,1999,17551.305968,4.419611,9.772884,1.486052,0.016797,0.014293
1024,Argentina,1999,18981.168384,4.030589,9.851203,1.393912,-0.045574,0.03887
1025,Armenia,1999,3798.671291,0.975802,8.242407,-0.024496,0.038813,-0.103021
1026,Aruba,1999,38910.416818,9.105185,10.569017,2.208844,-0.007301,-0.01507
1027,Australia,1999,37450.358069,17.190298,10.530772,2.844345,0.038066,0.014922
1028,Austria,1999,45139.742923,7.759483,10.717518,2.048916,0.033,-0.030392
1029,Azerbaijan,1999,3687.661015,3.579835,8.212748,1.275317,0.062614,-0.111721


In [359]:
formula="d_lnco2pc ~ d_lngdppc"

###Pooled regression 2000
ols_pooled2000 = smf.ols("d_lnco2pc ~ d_lngdppc + country", temp.query("year == 2000")).fit(cov_type="HC0")

In [360]:
models = [ols_pooled]
names = ["Pooled regression 2000"]
stargazer = Stargazer(models)
stargazer.rename_covariates(
    {
        "Intercept": "Constant",
        "d_lngdppc": "GDP per capita log change, cumulative coeff.",
        "d_lnco2pc": "CO2 emissions per capita log change, cumulative coeff."
    }
)

stargazer.covariate_order(
    [
        "d_lngdppc",
        "Intercept"
    ]
)

stargazer.custom_columns(names, [1])
HTML(stargazer.render_html())

0,1
,
,Dependent variable:d_lnco2pc
,
,Pooled regression 2000
,(1)
,
"GDP per capita log change, cumulative coeff.",-0.097***
,(0.000)
Constant,-0.168***
,(0.000)


The per capita CO2 emissions were 0.091% lower, on average, in 2000 for every 1% increase in GDP per capita, controlling for a country.

In [361]:
temp.query("year == 2015")

Unnamed: 0,country,year,gdppc,co2pc,lngdppc,lnco2pc,d_lngdppc,d_lnco2pc
3861,Albania,2015,11916.422315,1.602648,9.385673,0.471657,0.024857,-0.170233
3862,Algeria,2015,11696.963757,3.854557,9.367085,1.349256,0.015878,0.031293
3863,Angola,2015,8036.410610,1.240245,8.991738,0.215309,-0.024997,-0.294360
3864,Antigua and Barbuda,2015,18595.084904,5.839546,9.830653,1.764653,0.026751,0.023345
3865,Argentina,2015,23933.886613,4.664011,10.083051,1.539876,0.016165,0.016382
...,...,...,...,...,...,...,...,...
4032,Vanuatu,2015,3037.784742,0.486896,8.018884,-0.719706,-0.025446,-0.181224
4033,Vietnam,2015,6438.260271,2.032108,8.770014,0.709073,0.054217,0.107750
4034,West Bank and Gaza,2015,6048.976597,0.704186,8.707644,-0.350712,0.013633,0.034828
4035,Zambia,2015,3443.555206,0.285428,8.144260,-1.253766,-0.001883,-0.024175


In [362]:
formula="d_lnco2pc ~ d_lngdppc"

###Pooled regression 2015
ols_pooled2015 = smf.ols("d_lnco2pc ~ d_lngdppc + country", temp.query("year == 2015")).fit(cov_type="HC0")

In [363]:
models = [ols_pooled]
names = ["Pooled regression 2015"]
stargazer = Stargazer(models)
stargazer.rename_covariates(
    {
        "Intercept": "Constant",
        "d_lngdppc": "GDP per capita log change, cumulative coeff.",
        "d_lnco2pc": "CO2 emissions per capita log change, cumulative coeff."
    }
)

stargazer.covariate_order(
    [
        "d_lngdppc",
        "Intercept"
    ]
)

stargazer.custom_columns(names, [1])
HTML(stargazer.render_html())

0,1
,
,Dependent variable:d_lnco2pc
,
,Pooled regression 2015
,(1)
,
"GDP per capita log change, cumulative coeff.",-0.097***
,(0.000)
Constant,-0.168***
,(0.000)


The per capita CO2 emissions were 0.097% lower, on average, in 2015 for every 1% increase in GDP per capita, controlling for a country.

In [367]:
# Fd, no lags

model = smf.wls("d_lnco2pc ~ d_lngdppc", temp)
fd_lm = model.fit(
    cov_type="cluster",
    cov_kwds={"groups": temp.loc[model.data.row_labels, "country"]},
)
fd_lm.summary()


0,1,2,3
Dep. Variable:,d_lnco2pc,R-squared:,0.074
Model:,WLS,Adj. R-squared:,0.074
Method:,Least Squares,F-statistic:,36.52
Date:,"Sun, 28 Mar 2021",Prob (F-statistic):,8.7e-09
Time:,21:35:54,Log-Likelihood:,2858.4
No. Observations:,4213,AIC:,-5713.0
Df Residuals:,4211,BIC:,-5700.0
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0002,0.003,0.064,0.949,-0.005,0.005
d_lngdppc,0.6141,0.102,6.043,0.000,0.415,0.813

0,1,2,3
Omnibus:,1846.319,Durbin-Watson:,1.975
Prob(Omnibus):,0.0,Jarque-Bera (JB):,166502.385
Skew:,1.164,Prob(JB):,0.0
Kurtosis:,33.71,Cond. No.,17.6


In [31]:
###Panel regression
from linearmodels import PanelOLS

In [33]:
temp.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,gdppc,co2pc
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Albania,1992,3275.44433583801,0.774724910911141
Algeria,1992,8383.77024779121,2.96498636282543
Angola,1992,5148.25282361678,0.410522931864339
Antigua and Barbuda,1992,15497.4647441974,4.0952366180143
Argentina,1992,16209.3230774449,3.61928035773818


In [None]:
lm = PanelOLS.from_formula(
    "surv ~ imm + TimeEffects + EntityEffects",
    data_balanced,
    weights=data_balanced["avgpop"],
).fit(cov_type="clustered", cluster_entity=True)