In [2]:
#######################
#   John Henderson    #
#    UC-Berkeley      #            
#       &             #
#   Sara Chatfield    #
#    UC-Berkeley      #       
#                     # 
#    Who Matches:     #
#   Replication Code  #
#                     #
#   Released          #
#     Version 1.0     #
#                     #
#   Nov 17, 2010      #
#######################


# USAGE
#########################################  
# This is replication code for Henderson, John, and Sara Chatfield. 2011. "Who Matches? Propensity Scores and Bias in the Causal Effects
# of Education on Participation," Journal of Politics.  The software, code, and data may be used, distributed, and modified freely 
# with proper attribution to both Henderson and Chatfield (2011) and Kam and Palmer (2008). 


# DESCRPITION
######################################### 
# This is the code for the replication of Kam and Palmers (2008) propensity score matching analysis. This code is 
# slightly modified and expanded from the original code provided by Kam and Palmer (2008), and correspondes to their article 
# 'Reconsidering the Effects of Education on Political Participation'.  
# 
# In addition to the authors propensity score, included below are MatchBalance statistics, placbeo tests, and alternative 
# matching models.
#  
# Tables 1 - 4 in the paper "Who Matches?", are produced below.  These are the models 'mout_r1', 'mout_r1D', 'mout_r3', 
# and 'mout_r3D'.  Balance statistisc are found in 'matbal_r1' and 'matbal_r3'.
#
# Additionally, diagnostic statistics are also estimated below.  These includes the moments of the Kam and Palmer (2008) 
# propesity score, as well as its degree of overlap, among other things.
 

library(MASS)
library(Matching)
 
source('funcs.R')   
load("WhoMatches.Rdata")

In [4]:
#source('objects.R')
attach(data_rep)
attach(factors)

In [5]:
# KP Pscore
#  Note the the Z suffix indicates that the covariate is factorized following Kam and Palmer (2008)

model=formula(college ~ yPubAffZ + yNewspaperZ + yRadioZ + yMagazineZ + yFamTalkZ + yFrTalkZ + yAdultTalkZ + ySPIDZ + yGovtOpinionZ + yGovtCrookZ + yGovtWasteZ + yTrGovtZ + yGovtSmartZ + yGovt4AllZ + yLifeWishZ + yGLuckZ + yFPlansZ + yWinArgZ + yStrOpinionZ + 
	yMChangeZ + yTrOthersZ + yOthHelpZ + yOthFairZ + yKnowledgeZ + yNextSchZ + yGPAZ + ySchOfficerZ + ySchPublishZ + yHobbyZ + ySchClubZ + yOccClubZ + yNeighClubZ + yRelClubZ + yYouthOrgZ + yClubLevZ + yPhoneZ + yGenZ + yRaceZ + pNewspaperZ + pRadioZ + pTVZ + 
	pMagazineZ + pLifeWishZ + pGLuckZ + pFPlansZ + pWinArgZ + pStrOpinionZ + pMChangeZ + pTrOthersZ + pOthHelpZ + pOthFairZ + pSPIDZ + pVoteZ + pPersuadeZ + pRallyZ + pOthActZ + pPolClubZ + pButtonZ + pMoneyZ + pGovtOpinionZ + pGovtCrookZ + pGovtWasteZ + pTrGovtZ + 
	pGovtSmartZ + pGovt4AllZ + pEmployZ + pEducHHZ + pChurchOrgZ + pFratOrgZ + pProOrgZ + pCivicOrgZ + pCLOrgZ + pNeighClubZ + pSportClubZ + pInfClubZ + pFarmGrZ + pWomenClubZ + pClubLevZ + pHHIncZ + pOwnHomeZ + pKnowledgeZ)
pscore=glm(model,family=binomial(link=logit))
etahat=pscore$fitted.values


# Propensity Score Matching
#  Note that 'out' is a n x 355 matrix, which is produced by dichotomizing 109 (86) covariates.  

#1973: 1 to 3

mout_r1 = Match(Y = yppnscal, Tr = college, X = etahat, estimand="ATT", M = 3) 
summary(mout_r1)

matbal_r1 = MatchBalance(college ~ etahat + yGPA + yGen + yBlack + yRep+ yKnowledge + yNextSch + pVote + pPersuade + pParticipate2 + pEmploy + pEducHH + pEducW + pHHInc + pOwnHome + pRep  + pKnowledge, match.out = mout_r1,nboots=1000)
matbal_all1=MatchBalance(college ~ out, match.out = mout_r1)
mb1=percent.bal(matbal_all1)
mb1[[1]] # 0.4084507 :: 0.2535211

# Outliers Dropped: 723, 676, 1061, 337, 595

mout_r1D = Match(Y= yppnscal[-c(723,676,1061,337,595)], Tr = college[-c(723,676,1061,337,595)], X = etahat[-c(723,676,1061,337,595)], estimand="ATT", M = 3) 
summary(mout_r1D)


#1973: 1 to 1

mout_r2 = Match(Y=yppnscal, Tr=college, X=etahat, estimand="ATT", M=1)   
summary(mout_r2)

matbal_r2 = MatchBalance(college ~ etahat + yGPA + yGen + yBlack + yRep+ yKnowledge + yNextSch + pVote + pPersuade + pParticipate2 + pEmploy + pEducHH + pEducW + pHHInc + pOwnHome + pRep  + pKnowledge, match.out = mout_r2,nboots=1000)
matbal_all2=MatchBalance(college ~ out, match.out = mout_r2)
mb2=percent.bal(matbal_all2)
mb2[[1]] # 0.4084507 :: 0.2366197

# Outliers Dropped: 723, 676, 1061, 337, 595

mout_r2D = Match(Y=yppnscal[-c(723,676,1061,337,595)], Tr=college[-c(723,676,1061,337,595)], X=etahat[-c(723,676,1061,337,595)], estimand="ATT", M=1)   
summary(mout_r2D)


#1982: 1 to 3

mout_r3 = Match(Y = y1982yppnscal[!is.na(y1982yppnscal)], Tr = college[!is.na(y1982yppnscal)], X = etahat[!is.na(y1982yppnscal)], estimand="ATT", M = 3) 
summary(mout_r3)

matbal_r3 = MatchBalance(college ~ etahat + yGPA + yGen + yBlack + yRep+ yKnowledge + yNextSch + pVote + pPersuade + pParticipate2 + pEmploy + pEducHH + pEducW + pHHInc + pOwnHome + pRep  + pKnowledge, match.out = mout_r3,nboots=1000)
matbal_all3=MatchBalance(college ~ out, match.out = mout_r3)
mb3=percent.bal(matbal_all3)
mb3[[1]] # 0.4084507 :: 0.2394366

# Outliers Dropped [after dropping missings in 1982 and re-indexing]:  613, 572, 897, 337, 507 

mout_r3D = Match(Y = y1982yppnscal[!is.na(y1982yppnscal)][-c(572,897,296,613,507)], Tr = college[!is.na(y1982yppnscal)][-c(572,897,296,613,507)], X = etahat[!is.na(y1982yppnscal)][-c(572,897,296,613,507)], estimand="ATT", M = 3) 
summary(mout_r3D)


#1982: 1 to 1

mout_r4 = Match(Y = y1982yppnscal[!is.na(y1982yppnscal)], Tr = college[!is.na(y1982yppnscal)], X = etahat[!is.na(y1982yppnscal)], estimand="ATT", M = 1) 
summary(mout_r4)

matbal_r4 = MatchBalance(college ~ etahat + yGPA + yGen + yBlack + yRep+ yKnowledge + yNextSch + pVote + pPersuade + pParticipate2 + pEmploy + pEducHH + pEducW + pHHInc + pOwnHome + pRep  + pKnowledge, match.out = mout_r4,nboots=1000)
matbal_all4=MatchBalance(college ~ out, match.out = mout_r4)
mb4=percent.bal(matbal_all4)
mb4[[1]] # 0.4084507 :: 0.2732394

# Outliers Dropped [after dropping missings in 1982 and re-indexing]:  613, 572, 897, 337, 507 

mout_r4D = Match(Y = y1982yppnscal[!is.na(y1982yppnscal)][-c(572,897,296,613,507)], Tr = college[!is.na(y1982yppnscal)][-c(572,897,296,613,507)], X = etahat[!is.na(y1982yppnscal)][-c(572,897,296,613,507)], estimand="ATT", M = 1) 
summary(mout_r4D)


# Pscore Stats/Diagnostics

# Control Outlers Index (1973): 723, 676, 1061, 337, 595
# Control Outlers Index (1982): 613, 572, 897, 337, 507 

# Moments 

caliper = .25
mean(etahat)                                                                # 0.6403509
mean(etahat[college==1])                                                    # 0.8338374
mean(etahat[college==0])                                                    # 0.2958504
sd(etahat)                                                                  # 0.3512628
sd(etahat)*caliper                                                          # 0.0878157

# Max/Min/95th Percentile

sort(etahat[college==0])[round(.95*length(etahat[college==0]))+1]           # 0.8280431 
sort(etahat[college==1])[round(.95*length(etahat[college==1]))+1]           # 0.9998174 
max(etahat[college==1])                                                     # 0.9999998
min(etahat[college==1])                                                     # 0.02330369
max(etahat[college==0])                                                     # 0.9889442
min(etahat[college==0])                                                     # 1.196065e-10

# No Overlap at the Endpoints

length(which(etahat[college==0]>.9))/length(etahat[college==0])             # 0.03104213
length(which(etahat[college==0]>.95))/length(etahat[college==0])            # 0.01108647
length(which(etahat[college==1]>.9))/length(etahat[college==0])             # 0.5728518
length(which(etahat[college==1]>.95))/length(etahat[college==0])            # 0.4458281

length(which(etahat[college==1] > max(etahat[college==0])))/nrow(data_rep)  # 0.1714514
length(which(etahat[college==0] < min(etahat[college==1])))/nrow(data_rep)  # 0.05582137

# Outliers Index (1973)

which(etahat[college==0]>.95) 
sort(etahat[c(337,595,676,723,1061)])
index_r1=cbind(mout_r1$index.treated,mout_r1$index.control)

# Corresponding Matches of Outliers to College Attenders (Treatment)

length(which(index_r1[,2]==337))                                            # 60
length(which(index_r1[,2]==595))                                            # 58
length(which(index_r1[,2]==676))                                            # 316
length(which(index_r1[,2]==723))                                            # 287
length(which(index_r1[,2]==1061))                                           # 335

length(which(index_r1[,2]==337))/nrow(index_r1)                             # 0.02451982
length(which(index_r1[,2]==595))/nrow(index_r1)                             # 0.02370249
length(which(index_r1[,2]==676))/nrow(index_r1)                             # 0.1291377
length(which(index_r1[,2]==723))/nrow(index_r1)                             # 0.1172865
length(which(index_r1[,2]==1061))/nrow(index_r1)                            # 0.1369023

(length(which(index_r1[,2]==337))+length(which(index_r1[,2]==595))+length(which(index_r1[,2]==676))+length(which(index_r1[,2]==723))+length(which(index_r1[,2]==1061)))/nrow(index_r1) # 0.4315488

# Participatory Outliers: 1973 & 1982

mean(yppnscal[college==1 & etahat>.95])                                     # 3.26257
mean(yppnscal[college==0 & etahat>.95])                                     # 4
mean(yppnscal[college==1])                                                  # 2.793275
mean(yppnscal[college==0])                                                  # 1.427938

mean(data_rep$y1982yppnscal[college==1 & etahat>.95],na.rm=T)               # 3.319355
mean(data_rep$y1982yppnscal[college==0 & etahat>.95],na.rm=T)               # 4.4
mean(data_rep$y1982yppnscal[college==1],na.rm=T)                            # 3.041543
mean(data_rep$y1982yppnscal[college==0],na.rm=T)                            # 1.976064



# END


Estimate...  0.022748 
AI SE......  0.46899 
T-stat.....  0.048504 
p.val......  0.96131 

Original number of observations..............  1254 
Original number of treated obs...............  803 
Matched number of observations...............  803 
Matched number of observations  (unweighted).  2447 


***** (V1) etahat *****
                       Before Matching 	 	 After Matching
mean treatment........    0.83384 	 	    0.83384 
mean control..........    0.29585 	 	    0.82957 
std mean diff.........     248.21 	 	     1.9674 

mean raw eQQ diff.....    0.53703 	 	  0.0058215 
med  raw eQQ diff.....    0.60628 	 	  0.0037115 
max  raw eQQ diff.....    0.74688 	 	   0.017799 

mean eCDF diff........     0.4235 	 	   0.039755 
med  eCDF diff........    0.46206 	 	   0.003678 
max  eCDF diff........    0.69313 	 	    0.26359 

var ratio (Tr/Co).....    0.63414 	 	     1.0284 
T-test p-value........ < 2.22e-16 	 	 < 2.22e-16 
KS Bootstrap p-value.. < 2.22e-16 	 	 < 2.22e-16 
KS Naive p-


***** (V1) outout *****
                       Before Matching 	 	 After Matching
mean treatment........    0.94645 	 	    0.94645 
mean control..........      0.949 	 	    0.98163 
std mean diff.........    -1.1326 	 	    -15.617 

mean raw eQQ diff.....  0.0044346 	 	   0.035145 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          1 

mean eCDF diff........  0.0012757 	 	   0.017573 
med  eCDF diff........  0.0012757 	 	   0.017573 
max  eCDF diff........  0.0025514 	 	   0.035145 

var ratio (Tr/Co).....     1.0462 	 	     2.8108 
T-test p-value........    0.84523 	 	 0.00014846 


***** (V2) out *****
                       Before Matching 	 	 After Matching
mean treatment........    0.50311 	 	    0.50311 
mean control..........    0.37916 	 	    0.55882 
std mean diff.........     24.776 	 	    -11.135 

mean raw eQQ diff.....    0.12417 	 	   0.053535 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          

bf.bal,af.bal
0.4084507,0.2535211



Estimate...  1.3966 
AI SE......  0.51891 
T-stat.....  2.6913 
p.val......  0.0071168 

Original number of observations..............  1249 
Original number of treated obs...............  803 
Matched number of observations...............  803 
Matched number of observations  (unweighted).  2445 


Estimate...  0.86115 
AI SE......  0.59042 
T-stat.....  1.4585 
p.val......  0.14469 

Original number of observations..............  1254 
Original number of treated obs...............  803 
Matched number of observations...............  803 
Matched number of observations  (unweighted).  923 


***** (V1) etahat *****
                       Before Matching 	 	 After Matching
mean treatment........    0.83384 	 	    0.83384 
mean control..........    0.29585 	 	    0.83169 
std mean diff.........     248.21 	 	    0.98898 

mean raw eQQ diff.....    0.53703 	 	  0.0033153 
med  raw eQQ diff.....    0.60628 	 	  0.0021141 
max  raw eQQ diff.....    0.74688 	 	   0.011056 

mean eCDF diff.


***** (V1) outout *****
                       Before Matching 	 	 After Matching
mean treatment........    0.94645 	 	    0.94645 
mean control..........      0.949 	 	    0.98381 
std mean diff.........    -1.1326 	 	    -16.585 

mean raw eQQ diff.....  0.0044346 	 	    0.03467 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          1 

mean eCDF diff........  0.0012757 	 	   0.017335 
med  eCDF diff........  0.0012757 	 	   0.017335 
max  eCDF diff........  0.0025514 	 	    0.03467 

var ratio (Tr/Co).....     1.0462 	 	     3.1821 
T-test p-value........    0.84523 	 	 3.4498e-05 


***** (V2) out *****
                       Before Matching 	 	 After Matching
mean treatment........    0.50311 	 	    0.50311 
mean control..........    0.37916 	 	    0.38491 
std mean diff.........     24.776 	 	     23.626 

mean raw eQQ diff.....    0.12417 	 	    0.10293 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          

bf.bal,af.bal
0.4084507,0.2366197



Estimate...  1.0349 
AI SE......  0.7588 
T-stat.....  1.3638 
p.val......  0.17262 

Original number of observations..............  1249 
Original number of treated obs...............  803 
Matched number of observations...............  803 
Matched number of observations  (unweighted).  919 


Estimate...  0.071217 
AI SE......  0.4519 
T-stat.....  0.15759 
p.val......  0.87478 

Original number of observations..............  1050 
Original number of treated obs...............  674 
Matched number of observations...............  674 
Matched number of observations  (unweighted).  2057 


***** (V1) etahat *****
                       Before Matching 	 	 After Matching
mean treatment........    0.83384 	 	    0.63729 
mean control..........    0.29585 	 	    0.58467 
std mean diff.........     248.21 	 	     14.926 

mean raw eQQ diff.....    0.53703 	 	   0.064626 
med  raw eQQ diff.....    0.60628 	 	   0.049431 
max  raw eQQ diff.....    0.74688 	 	    0.20304 

mean eCDF diff...


***** (V1) outout *****
                       Before Matching 	 	 After Matching
mean treatment........    0.94645 	 	    0.95401 
mean control..........      0.949 	 	    0.94735 
std mean diff.........    -1.1326 	 	     3.1732 

mean raw eQQ diff.....  0.0044346 	 	  0.0077783 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          1 

mean eCDF diff........  0.0012757 	 	  0.0038892 
med  eCDF diff........  0.0012757 	 	  0.0038892 
max  eCDF diff........  0.0025514 	 	  0.0077783 

var ratio (Tr/Co).....     1.0462 	 	    0.87978 
T-test p-value........    0.84523 	 	    0.57273 


***** (V2) out *****
                       Before Matching 	 	 After Matching
mean treatment........    0.50311 	 	    0.45549 
mean control..........    0.37916 	 	    0.40012 
std mean diff.........     24.776 	 	     11.109 

mean raw eQQ diff.....    0.12417 	 	   0.050559 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          

bf.bal,af.bal
0.4084507,0.2394366



Estimate...  1.1046 
AI SE......  0.56482 
T-stat.....  1.9557 
p.val......  0.050506 

Original number of observations..............  1045 
Original number of treated obs...............  674 
Matched number of observations...............  674 
Matched number of observations  (unweighted).  2055 


Estimate...  -0.37302 
AI SE......  0.60533 
T-stat.....  -0.61623 
p.val......  0.53774 

Original number of observations..............  1050 
Original number of treated obs...............  674 
Matched number of observations...............  674 
Matched number of observations  (unweighted).  746 


***** (V1) etahat *****
                       Before Matching 	 	 After Matching
mean treatment........    0.83384 	 	    0.63729 
mean control..........    0.29585 	 	    0.65994 
std mean diff.........     248.21 	 	    -6.4257 

mean raw eQQ diff.....    0.53703 	 	   0.045147 
med  raw eQQ diff.....    0.60628 	 	   0.034378 
max  raw eQQ diff.....    0.74688 	 	    0.14498 

mean eCDF dif


***** (V1) outout *****
                       Before Matching 	 	 After Matching
mean treatment........    0.94645 	 	    0.95401 
mean control..........      0.949 	 	     0.9409 
std mean diff.........    -1.1326 	 	      6.252 

mean raw eQQ diff.....  0.0044346 	 	   0.013405 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          1 

mean eCDF diff........  0.0012757 	 	  0.0067024 
med  eCDF diff........  0.0012757 	 	  0.0067024 
max  eCDF diff........  0.0025514 	 	   0.013405 

var ratio (Tr/Co).....     1.0462 	 	    0.78908 
T-test p-value........    0.84523 	 	    0.27989 


***** (V2) out *****
                       Before Matching 	 	 After Matching
mean treatment........    0.50311 	 	    0.45549 
mean control..........    0.37916 	 	    0.32033 
std mean diff.........     24.776 	 	      27.12 

mean raw eQQ diff.....    0.12417 	 	    0.12466 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          

bf.bal,af.bal
0.4084507,0.2732394



Estimate...  0.37475 
AI SE......  0.79043 
T-stat.....  0.47411 
p.val......  0.63542 

Original number of observations..............  1045 
Original number of treated obs...............  674 
Matched number of observations...............  674 
Matched number of observations  (unweighted).  742 

