Skip to content

Commit

Permalink
Added Poster.
Browse files Browse the repository at this point in the history
Fixed bug with combining balance statistics with weighting.
  • Loading branch information
jbryer committed May 16, 2023
1 parent 8ac769d commit 496083d
Show file tree
Hide file tree
Showing 18 changed files with 139 additions and 31 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ PSAboot Poster.graffle
build.R
^CRAN-SUBMISSION$
README.Rmd
Poster/
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Type: Package
Package: PSAboot
Title: Bootstrapping for Propensity Score Analysis
Version: 1.3.6
Date: 2023-03-21
Version: 1.3.7
Date: 2023-04-15
Authors@R:
person("Jason", "Bryer", , "jason@bryer.org", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-2454-0402"))
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# PSAboot 1.3.7

* Fixed a bug where the balance table wasn't combined correctly if the covariates were not specified in the correct order.

# PSAboot 1.3.6

* Update to re-release to CRAN.
Expand Down
Binary file added Poster/PSAboot_Poster.graffle
Binary file not shown.
Binary file added Poster/PSAboot_Poster.pdf
Binary file not shown.
88 changes: 88 additions & 0 deletions Poster/Poster.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# This script creates the figures and output used for the PSAboot poster located
# at https://github.com/jbryer/PSAboot/Poster

library(PSAboot) # install.packages('PSAboot')
library(psa) # remotes::install_github('jbryer/psa')

data(tutoring, package = 'TriMatch')

#' Propensity score weighting implementation for bootstrapping.
#'
#' @inherit boot.strata return params
#' @param estimand which treatment effect to estimate. Values can be ATE, ATT,
#' ATC, or ATM.
#' @export
boot.weighting <- function(Tr, Y, X, X.trans, formu, estimand = 'ATE', ...) {
formu.treat <- update.formula(formu, 'treat ~ .')
df <- cbind(treat = Tr, X)
ps <- fitted(glm(formu.treat,
data = df,
family = binomial(link = 'logit')))

weights <- psa::calculate_ps_weights(treatment = Tr,
ps = ps,
estimand = estimand)

X.trans.scaled <- lapply(X.trans, scale) |> as.data.frame()
X.trans.scaled$treat <- Tr
lm_balance <- glm(treat ~ .,
data = X.trans.scaled,
family = quasibinomial(link = 'logit'),
weights = weights) |> summary()

te_lm <- lm(formula = Y ~ treat,
data = data.frame(Y = Y, treat = Tr),
weights = weights) |> summary()

se <- te_lm$coefficients[2,]['Std. Error'] |> unname()
te <- te_lm$coefficients[2,]['Estimate'] |> unname()

return(list(
summary = c(estimate = te,
ci.min = te - 1.96 * se,
ci.max = te + 1.96 * se,
se.wtd = se,
approx.t = unname(te_lm$coefficients[2,]['t value'])),
details = te_lm,
balance = lm_balance$coefficients[,'Estimate'][-1]
))
}

tutoring.formu <- treat2 ~ Gender + Ethnicity + Military + ESL +
EdMother + EdFather + Age + Employment + Income + Transfer + GPA

tutoring$treat2 <- tutoring$treat != "Control"
covariates <- tutoring[,c("Gender", "Ethnicity", "Military", "ESL",
"EdMother", "EdFather", "Age", "Employment",
"Income", "Transfer", "GPA")]
tutoring.boot <- PSAboot(Tr = tutoring$treat2,
Y = tutoring$Grade,
X = covariates,
# formu = tutoring.formu,
methods = c(getPSAbootMethods(), 'weighting' = boot.weighting),
seed = 2112,
parallel = FALSE)
ls(tutoring.boot)

summary(tutoring.boot)

plot(tutoring.boot) + xlab('Average Treatment Effect')# + theme_minimal()
ggsave('Poster/treatment_effects.pdf', width = 12, height = 8, units = 'in', dpi = 100)

pdf('Poster/matrixplot.pdf', width = 12, height = 12)
matrixplot(tutoring.boot)
dev.off()

tutoring.balance <- balance(tutoring.boot)
summary(tutoring.balance)

plot(tutoring.balance) + theme(legend.position = 'bottom')
ggsave('Poster/balance.pdf', width = 12, height = 8, units = 'in', dpi = 100)

boxplot(tutoring.balance)
ggsave('Poster/balance_boxplot.pdf', width = 12, height = 8, units = 'in', dpi = 100)

boxplot(tutoring.boot)
ggsave('Poster/boxplot.pdf', width = 12, height = 4, units = 'in', dpi = 100)


Binary file added Poster/balance.pdf
Binary file not shown.
Binary file added Poster/balance_boxplot.pdf
Binary file not shown.
Binary file added Poster/boxplot.pdf
Binary file not shown.
Binary file added Poster/matrixplot.pdf
Binary file not shown.
Binary file added Poster/qr_code.pdf
Binary file not shown.
Binary file added Poster/qr_code.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Poster/treatment_effects.pdf
Binary file not shown.
40 changes: 20 additions & 20 deletions R/PSAboot.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ getPSAbootMethods <- function() {
PSAboot <- function(Tr, Y, X, M = 100,
formu = as.formula(paste0('treat ~ ', paste0(names(X), collapse=' + '))),
control.ratio = 5,
control.sample.size = min(control.ratio*min(table(Tr)), max(table(Tr))),
control.sample.size = min(control.ratio * min(table(Tr)), max(table(Tr))),
control.replace = TRUE,
treated.sample.size = min(table(Tr)),
treated.replace = TRUE,
Expand Down Expand Up @@ -141,11 +141,11 @@ PSAboot <- function(Tr, Y, X, M = 100,
complete.details[[paste0('details.', n)]] <- r$details
complete.details[[paste0('balance.', n)]] <- r$balance
complete.summary <- rbind(complete.summary, data.frame(
method=n,
estimate=unname(r$summary['estimate']),
ci.min=unname(r$summary['ci.min']),
ci.max=unname(r$summary['ci.max']),
stringsAsFactors=FALSE))
method = n,
estimate = unname(r$summary['estimate']),
ci.min = unname(r$summary['ci.min']),
ci.max = unname(r$summary['ci.max']),
stringsAsFactors = FALSE))
}

pb <- txtProgressBar(1,M,style=3)
Expand All @@ -171,11 +171,11 @@ PSAboot <- function(Tr, Y, X, M = 100,
result[[paste0('details.', n)]] <- r$details
result[[paste0('balance.', n)]] <- r$balance
result[['summary']] <- rbind(result[['summary']], data.frame(
Method=n,
estimate=unname(r$summary['estimate']),
ci.min=unname(r$summary['ci.min']),
ci.max=unname(r$summary['ci.max']),
stringsAsFactors=FALSE))
Method = n,
estimate = unname(r$summary['estimate']),
ci.min = unname(r$summary['ci.min']),
ci.max = unname(r$summary['ci.max']),
stringsAsFactors = FALSE))
}, error=function(e) {
warning(paste0('Error occurred during iteration ', i,
' for ', n, ' method: ', e))
Expand Down Expand Up @@ -207,15 +207,15 @@ PSAboot <- function(Tr, Y, X, M = 100,
cols] <- as.numeric((sum[j,cols]))
}
}
r <- list(pooled.summary=summary,
pooled.details=tmp,
complete.summary=complete.summary,
complete.details=complete.details,
Y=Y, Tr=Tr, X=X, M=M, seed=seed,
control.sample.size=control.sample.size,
treated.sample.size=treated.sample.size,
control.replace=control.replace,
treated.replace=treated.replace)
r <- list(pooled.summary = summary,
pooled.details = tmp,
complete.summary = complete.summary,
complete.details = complete.details,
Y = Y, Tr = Tr, X = X, M = M, seed = seed,
control.sample.size = control.sample.size,
treated.sample.size = treated.sample.size,
control.replace = control.replace,
treated.replace = treated.replace)
class(r) <- "PSAboot"
return(r)
}
4 changes: 2 additions & 2 deletions R/balance.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ balance <- function(psaboot, na.rm = TRUE, pool.fun = mean) {
index.balance <- which(substr(names(psaboot$complete.details), 1, 8) == 'balance.')
index.names <- substr(names(psaboot$complete.details)[index.balance], 9,
max(nchar(names(psaboot$complete.details))))
bal <- psaboot$complete.details[[ index.balance[1] ]]
bal_names <- names(psaboot$complete.details[[ index.balance[1] ]])
bal <- c()
for(i in index.balance) {
bal <- rbind(bal, psaboot$complete.details[[ i ]])
bal <- rbind(bal, psaboot$complete.details[[ i ]][bal_names])
}
bal <- abs(bal)
dimnames(bal)[[1]] <- index.names
Expand Down
10 changes: 9 additions & 1 deletion R/balance.boxplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,15 @@ boxplot.PSAboot.balance <- function(x,
method <- names(x$balances)[i]
tmp <- as.data.frame(x$balances[[i]])
tmp$Method <- method
combined <- rbind(combined, tmp)
if(nrow(combined) == 0) {
combined <- tmp
} else {
missing_vars <- names(combined)[!names(combined) %in% names(tmp)]
for(i in missing_vars) {
tmp[,i] <- NA
}
combined <- rbind(combined, tmp[,names(combined)])
}
}
tmp <- reshape2::melt(combined, id='Method')
tmp2 <- as.data.frame(x$unadjusted)
Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ editor_options:
<!-- badges: end -->


As the popularity of propensity score methods for estimating causal effects in observational studies increase, the choices researchers have for which methods to use has also increased. Rosenbaum (2012) suggested that there are benefits for testing the null hypothesis more than once in observational studies. With the wide availability of high power computers resampling methods such as bootstrapping (Efron, 1979) have become popular for providing more stable estimates of the sampling distribution. This paper introduces the `PSAboot` package for R that provides functions for bootstrapping propensity score methods. It deviates from traditional bootstrapping methods by allowing for different sampling specifications for treatment and control groups, mainly to ensure the ratio of treatment-to-control observations are maintained. Additionally, this framework will provide estimates using multiple methods for each bootstrap sample. Two examples are discussed: the classic National Work Demonstration and PSID (Lalonde, 1986) study and a study on tutoring effects on student grades.
As the popularity of propensity score methods for estimating causal effects in observational studies increase, the choices researchers have for which methods to use has also increased. Estimated treatment effects may be sensitive to choice of method. One approach to test the sensitivity of method choice is to test the null hypothesis more than once using more than one method (Rosenbaum, 2012). With the wide availability of high power computers resampling methods such as bootstrapping (Efron, 1979) have become popular for providing more estimates of the sampling distribution. This paper introduces the `PSAboot` R package that provides functions for bootstrapping propensity score methods. It deviates from traditional bootstrapping methods by allowing for different sampling specifications for treatment and control groups, mainly to ensure the ratio of treatment-to-control observations are consistent. This approach can also be used in situations where there is imbalance between the number of treatment and control observations by allowing for up and/or down sampling. Lastly, by estimating balance statistics and treatment effects for each bootstrap sample we can compare the distributions across multiple propensity score methods to examine the relative performance of these methods.

## Installation

Expand Down
17 changes: 12 additions & 5 deletions vignettes/PSAboot.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,13 @@ vignette: >

```{r setup, echo=FALSE, results='hide', message=FALSE, warning=FALSE}
require(knitr)
opts_chunk$set(comment = '')
opts_chunk$set(fig.width = 12,
fig.height = 8,
fig.align = 'center',
out.width = '100%',
echo = TRUE,
warning = FALSE,
message = FALSE)
require(PSAboot)
```

Expand Down Expand Up @@ -118,15 +124,16 @@ The `PSAboot` function returns an object of class `PSAboot`. The following S3 me

The `lalonde` (Lalonde, 1986) has become the *de defacto* teaching dataset in PSA since Dehejia and Wahba's (1999) re-examination of the National Supported Work Demonstration (NSW) and the Current Population Survey (CPS).

The `lalonde` data set is included in the `MatchIt` package. The crosstab shows that there are 429 control units and 185 treatment units.
The `lalonde` data set is included in the `Matching` package. The contingency table shows that there are 429 control units and 185 treatment units.

```{r lalonde.load}
data(lalonde, package='MatchIt')
data(lalonde, package='Matching')
table(lalonde$treat)
```
```{r lalonde.boot, cache=FALSE}
lalonde.formu <- treat ~ age + I(age^2) + educ + I(educ^2) + race +
married + nodegree + re74 + I(re74^2) + re75 + I(re75^2)
lalonde.formu <- treat~age + I(age^2) + educ + I(educ^2) + black +
hisp + married + nodegr + re74 + I(re74^2) + re75 + I(re75^2) +
u74 + u75
boot.lalonde <- PSAboot(Tr = lalonde$treat,
Y = lalonde$re78,
X = lalonde,
Expand Down

0 comments on commit 496083d

Please sign in to comment.