# Using Instrumental Variables for Treatment Effects in Quasi-Experiments

In [17]:
import pandas as pd
import math
import statsmodels.formula.api as smf  # for doing statistical regression
import statsmodels.api as sm      # access to the wider statsmodels library, including R datasets

In [7]:
# Import data set
voucher_df = pd.read_sas('colvoucher.sas7bdat')
voucher_df.head()

## Summary Statistics

In [9]:
print("==============================================================================")
print("                              OVERALL SUMMARY"                                 )
print("==============================================================================")

print(voucher_df.describe())

for i in range(2):
    print("==============================================================================")
    print("                         USE FINANCIAL AID = %(i)d" % {"i":i}                  )
    print("==============================================================================")
    print(voucher_df[voucher_df['use_fin_aid']==i].describe())

                              OVERALL SUMMARY
                id   won_lottry         male     base_age    finish8th  \
count  1171.000000  1171.000000  1171.000000  1171.000000  1171.000000   
mean   1357.010248     0.505551     0.504697    12.004270     0.681469   
std     890.711584     0.500183     0.500192     1.347038     0.466106   
min       3.000000     0.000000     0.000000     7.000000     0.000000   
25%     616.000000     0.000000     0.000000    11.000000     0.000000   
50%    1280.000000     1.000000     1.000000    12.000000     1.000000   
75%    1982.500000     1.000000     1.000000    13.000000     1.000000   
max    4030.000000     1.000000     1.000000    17.000000     1.000000   

       use_fin_aid  
count  1171.000000  
mean      0.581554  
std       0.493515  
min       0.000000  
25%       0.000000  
50%       1.000000  
75%       1.000000  
max       1.000000  
                         USE FINANCIAL AID = 0
                id  won_lottry        male    base_

## Two Stage Least Squares Estimation

In [13]:
print("==============================================================================")
print("                                  FIRST STAGE"                                 )
print("==============================================================================")
result = smf.glm(formula = "use_fin_aid ~ won_lottry + male + base_age", 
                 data=voucher_df,
                 family=sm.families.Binomial()).fit()
voucher_df['use_fin_aid_fitted']= result.predict()
print(result.summary())

print()
print()
print("==============================================================================")
print("                                  SECOND STAGE"                                )
print("==============================================================================")
result = smf.glm(formula = " finish8th ~ use_fin_aid_fitted + male + base_age", 
                 data=voucher_df,
                 family=sm.families.Binomial()).fit()
print(result.summary())

                                  FIRST STAGE
                 Generalized Linear Model Regression Results                  
Dep. Variable:            use_fin_aid   No. Observations:                 1171
Model:                            GLM   Df Residuals:                     1167
Model Family:                Binomial   Df Model:                            3
Link Function:                  logit   Scale:                             1.0
Method:                          IRLS   Log-Likelihood:                -488.00
Date:                Wed, 27 Jun 2018   Deviance:                       975.99
Time:                        09:34:51   Pearson chi2:                 1.16e+03
No. Iterations:                     5                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.3455      0.731      0.472      0.637      -1.088       1.779
won_lo

In [22]:
# Transforming the log-odds units into odds ratio
print("Those using financial aid are {:2f} times more likely to finish the 8th grade!".format(math.exp(0.7743)))

Those using financial aid are 2.169073 times more likely to finish the 8th grade!
