In [13]:
# Libraries Required
import pandas as pd
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols
from linearmodels import PanelOLS

# Read in Data
conflict = pd.read_csv('./../20_intermediate/confounding_factors.csv')
del conflict['Unnamed: 0']
del conflict['Unnamed: 0.1']

In [14]:
conflict.head()

Unnamed: 0,County,State,Year,Population,Pop_percent_change,Fatalities,intensity,Hospitals,estimated_yield,Population_Percent_Child,Population_Percent_Female,Poverty_Rate,Airport
0,Abiemnhom,Unity,2011,19393,0.0,0,0.0,0,15.0,0.5277,0.5003,0.53,0
1,Akobo,Jonglei,2011,157474,0.0,1,0.063503,0,39500.0,0.5063,0.5114,0.52,small_airport
2,Aweil Centre,Northern Bahr el Ghazal,2011,49174,0.0,0,0.0,6,143810.86,0.5897,0.4962,0.76,small_airport
3,Aweil East,Northern Bahr el Ghazal,2011,353806,0.0,0,0.0,21,89617.614,0.5946,0.5288,0.75,0
4,Aweil North,Northern Bahr el Ghazal,2011,146303,0.0,0,0.0,22,57690.84,0.5856,0.5275,0.77,0


In [15]:
# Create indicator variables for Difference in Difference
conflict["PostConflict"] = conflict['Year'].apply(lambda x : 1 if x >= 2014 else 0)
conflict['Treated'] = conflict['intensity'].apply(lambda x : 1 if x > 1 else 0)

In [16]:
# Conduct base Difference in Difference
BaseModel = smf.ols("Pop_percent_change ~ Treated * PostConflict ", data = conflict).fit()
print(BaseModel.summary())

                            OLS Regression Results                            
Dep. Variable:     Pop_percent_change   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.013
Method:                 Least Squares   F-statistic:                     2.829
Date:                Mon, 27 Apr 2020   Prob (F-statistic):             0.0383
Time:                        10:02:52   Log-Likelihood:                 363.21
No. Observations:                 405   AIC:                            -718.4
Df Residuals:                     401   BIC:                            -702.4
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
Intercept                0.0481 

In [10]:
conflict.shape

(405, 13)

In [17]:
conflict.head()

Unnamed: 0,County,State,Year,Population,Pop_percent_change,Fatalities,intensity,Hospitals,estimated_yield,Population_Percent_Child,Population_Percent_Female,Poverty_Rate,Airport,PostConflict,Treated
0,Abiemnhom,Unity,2011,19393,0.0,0,0.0,0,15.0,0.5277,0.5003,0.53,0,0,0
1,Akobo,Jonglei,2011,157474,0.0,1,0.063503,0,39500.0,0.5063,0.5114,0.52,small_airport,0,0
2,Aweil Centre,Northern Bahr el Ghazal,2011,49174,0.0,0,0.0,6,143810.86,0.5897,0.4962,0.76,small_airport,0,0
3,Aweil East,Northern Bahr el Ghazal,2011,353806,0.0,0,0.0,21,89617.614,0.5946,0.5288,0.75,0,0,0
4,Aweil North,Northern Bahr el Ghazal,2011,146303,0.0,0,0.0,22,57690.84,0.5856,0.5275,0.77,0,0,0


In [18]:
# Difference in Difference with Confounding Factors
CFModel = smf.ols("Pop_percent_change ~ Treated * PostConflict + C(County) + Hospitals + Population_Percent_Child + Population_Percent_Female + Poverty_Rate + Airport", data = conflict).fit()
print(CFModel.summary())

                            OLS Regression Results                            
Dep. Variable:     Pop_percent_change   R-squared:                       0.171
Model:                            OLS   Adj. R-squared:                 -0.037
Method:                 Least Squares   F-statistic:                    0.8209
Date:                Mon, 27 Apr 2020   Prob (F-statistic):              0.856
Time:                        10:03:03   Log-Likelihood:                 396.88
No. Observations:                 405   AIC:                            -629.8
Df Residuals:                     323   BIC:                            -301.4
Df Model:                          81                                         
Covariance Type:            nonrobust                                         
                                  coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------
Intercept         

In [20]:
# # Difference in Difference by County
# CountyModel = smf.ols("Pop_percent_change ~ C(County) + Treated * PostConflict", data = conflict).fit()
# print(CountyModel.summary())

In [6]:
# Panel OLS 
conflict = conflict.set_index(['County','Year'])
PanelModel = PanelOLS.from_formula('Pop_percent_change ~ Treated * PostConflict + EntityEffects', data = conflict,
                           drop_absorbed=True)
PanelModel.fit(cov_type = 'clustered', cluster_entity = True)

0,1,2,3
Dep. Variable:,Pop_percent_change,R-squared:,0.0212
Estimator:,PanelOLS,R-squared (Between):,-0.2857
No. Observations:,405,R-squared (Within):,0.0212
Date:,"Mon, Apr 27 2020",R-squared (Overall):,-0.0646
Time:,09:58:02,Log-likelihood,396.88
Cov. Estimator:,Clustered,,
,,F-statistic:,2.3359
Entities:,78,P-value,0.0737
Avg Obs:,5.1923,Distribution:,"F(3,324)"
Min Obs:,5.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Treated,-0.0002,0.0123,-0.0156,0.9876,-0.0245,0.0241
PostConflict,-0.0163,0.0144,-1.1283,0.2600,-0.0446,0.0121
Treated:PostConflict,-0.0174,0.0195,-0.8966,0.3706,-0.0557,0.0208
