In [1]:
# Libraries Required
import pandas as pd
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols
from linearmodels import PanelOLS

# Read in Data
conflict = pd.read_csv('./../20_intermediate/confounding_factors.csv')
del conflict['Unnamed: 0']
del conflict['Unnamed: 0.1']

# Create indicator variables for Difference in Difference
conflict["PostConflict"] = conflict['Year'].apply(lambda x : 1 if x >= 2014 else 0)
conflict['Treated'] = conflict['intensity'].apply(lambda x : 1 if x > 1 else 0)

In [2]:
conflict = conflict.reset_index()
conflict.head(1)

Unnamed: 0,index,County,State,Year,Population,Pop_percent_change,Fatalities,intensity,Hospitals,estimated_yield,Population_Percent_Child,Population_Percent_Female,Poverty_Rate,Airport,PostConflict,Treated
0,0,Abiemnhom,Unity,2011,19393,0.0,0,0.0,0,15.0,0.5277,0.5003,0.53,0,0,0


In [3]:
# Conduct base Difference in Difference
BaseModel = smf.ols("Pop_percent_change ~ Treated * PostConflict ", data = conflict).fit()
print(BaseModel.summary())

                            OLS Regression Results                            
Dep. Variable:     Pop_percent_change   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.013
Method:                 Least Squares   F-statistic:                     2.829
Date:                Sat, 23 May 2020   Prob (F-statistic):             0.0383
Time:                        16:19:37   Log-Likelihood:                 363.21
No. Observations:                 405   AIC:                            -718.4
Df Residuals:                     401   BIC:                            -702.4
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
Intercept                0.0481 

In [4]:
# Difference in Difference with Confounding Factors
CFModel = smf.ols("Pop_percent_change ~ Treated * PostConflict + Hospitals + Population_Percent_Child + Population_Percent_Female + Poverty_Rate + Airport", data = conflict).fit()
print(CFModel.summary())

                            OLS Regression Results                            
Dep. Variable:     Pop_percent_change   R-squared:                       0.034
Model:                            OLS   Adj. R-squared:                  0.010
Method:                 Least Squares   F-statistic:                     1.401
Date:                Sat, 23 May 2020   Prob (F-statistic):              0.178
Time:                        16:19:38   Log-Likelihood:                 366.04
No. Observations:                 405   AIC:                            -710.1
Df Residuals:                     394   BIC:                            -666.0
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                                coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------
Intercept             

In [5]:
# Difference in Difference by County
CountyModel = smf.ols("Pop_percent_change ~ C(County) + Treated * PostConflict", data = conflict).fit()
print(CountyModel.summary())

                            OLS Regression Results                            
Dep. Variable:     Pop_percent_change   R-squared:                       0.171
Model:                            OLS   Adj. R-squared:                 -0.034
Method:                 Least Squares   F-statistic:                    0.8337
Date:                Sat, 23 May 2020   Prob (F-statistic):              0.835
Time:                        16:19:39   Log-Likelihood:                 396.88
No. Observations:                 405   AIC:                            -631.8
Df Residuals:                     324   BIC:                            -307.4
Df Model:                          80                                         
Covariance Type:            nonrobust                                         
                                  coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

In [6]:
# Panel OLS 
conflict = conflict.set_index(['County','Year'])
PanelModel = PanelOLS.from_formula('Pop_percent_change ~ Treated * PostConflict + EntityEffects', data = conflict,
                           drop_absorbed=True)
PanelModel.fit(cov_type = 'clustered', cluster_entity = True)

0,1,2,3
Dep. Variable:,Pop_percent_change,R-squared:,0.0212
Estimator:,PanelOLS,R-squared (Between):,-0.2857
No. Observations:,405,R-squared (Within):,0.0212
Date:,"Sat, May 23 2020",R-squared (Overall):,-0.0646
Time:,16:19:39,Log-likelihood,396.88
Cov. Estimator:,Clustered,,
,,F-statistic:,2.3359
Entities:,78,P-value,0.0737
Avg Obs:,5.1923,Distribution:,"F(3,324)"
Min Obs:,5.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Treated,-0.0002,0.0123,-0.0156,0.9876,-0.0245,0.0241
PostConflict,-0.0163,0.0144,-1.1283,0.2600,-0.0446,0.0121
Treated:PostConflict,-0.0174,0.0195,-0.8966,0.3706,-0.0557,0.0208
