/
Estimator.R
276 lines (240 loc) · 9.59 KB
/
Estimator.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
#' Estimate Causal Effects of Population Treatment Policies Assuming Clustered
#' Interference
#'
#' This function implements the estimators from Barkley et al. (2017) for
#' estimating causal effects \emph{("FX")} of treatment policies from an
#' observational study when \strong{clustered interference} is assumed.
#' Clustered interference is also often known as "partial" interference. For the
#' manuscript introducing the methods in \pkg{clusteredinterference}, see: URL
#' \url{https://arxiv.org/abs/1711.04834}
#'
#'
#' @param data A \code{data.frame} (not a \code{tibble}). Columns of
#' \code{factor} types are not recommended and will sometimes throw
#' (defensive) errors.
#' @param formula The \code{formula} defines the different components of the
#' method. The components are specified by \code{outcome | treatment ~
#' f(covariates) + (1|cluster_id) | cluster_id}. The middle component is
#' passed to \code{\link[lme4]{glmer}}, so \code{treatment ~ f(covariates) +
#' (1|cluster_id)} specifies the model form for the propensity score (i.e.,
#' treatment) model. See Details.
#' @param alphas A numeric vector for the probabilities corresponding to the
#' policies of interest. Each entry must be between 0 and 1.
#' @param k_samps The maximum number of vectors to evaluate to estimate the
#' counterfactual probabilities (i.e., \eqn{\omega(A,N,\alpha)}). Setting to 0
#' avoids approximation at the cost of increased computation time. Recommended
#' to set <= 5.
#' @param ... The dots argument. The user may supply their own
#' \code{target_grid} through the dots argument. The \code{target_grid} can be
#' made through exported function \code{\link{makeTargetGrid}}
#' @param nAGQ This is the number of Adaptive Gaussian Quadrature points used in
#' the \code{\link[lme4]{glmer}} model fitting computation. Defaults to 2. It
#' is recommended to use more than 1.
#' @param root_options These are passed to \code{\link[rootSolve]{multiroot}}
#' function.
#' @param return_matrices A Boolean on whether to return the "bread" and "meat"
#' matrices in the sandwich variance. Defaults to \code{FALSE}.
#' @param verbose A Boolean on whether to print output to \code{stderr}.
#' Defaults to FALSE.
#'
#' @details These estimators are based on inverse probability-weighting by the
#' propensity score for treatment (IPW) to estimate causal effects of
#' counterfactual policies of interest (i.e., \emph{"policy effects"}) when
#' clustered interference is assumed. The policies of interest correspond to
#' counterfactual scenarios in which treatment may be correlated within
#' clusters.
#'
#' This method estimates causal contrasts of these policies by estimating the
#' counterfactual treatment probabilities; taking the correlation structures
#' into account requires heavy computational resources, so the user should be
#' patient.
#'
#' The modeling formula for the propensity score (i.e., treatment) model is
#' specified via the \code{formula} formal argument. An example of a model
#' logit-linear fixed effects would be \code{Y | A ~ X1 + X2 + (1 |
#' cluster_ID) | cluster_ID}. A similar model that also includes an
#' interaction term is \code{Y | A ~ X1 + X2 + X1:X2 + (1 | cluster_ID) |
#' cluster_ID}.
#'
#' @author Brian G. Barkley, \email{BarkleyBG@@unc.edu}.
#'
#' Some of the plumbing functions for estimating the sandwich variance matrix
#' were adapted from Bradley Saul's
#' \href{https://cran.r-project.org/package=geex}{\pkg{geex}} package.
#'
#' Some of the plumbing functions for the logistic mixed model likelihood were
#' adapted from Bradley Saul's
#' \href{https://cran.r-project.org/package=inferference}{\pkg{inferference}}
#' package.
#'
#' @return A \code{list} object including: \enumerate{ \item \code{estimates}: A
#' tidy \code{data.frame} with columns \code{estimand}, \code{estimate},
#' \code{var}, \code{se}, \code{LCI} and \code{UCI} for 95\% CI's, and more
#' information. \item \code{parameters}: An untidy \code{list} of the point
#' estimates of all (target and nuisance) parameters. \item
#' \code{variance_matrices}: When \code{return_matrices} is \code{TRUE} this
#' is a \code{list} object for the "bread" and "meat" matrices in the sandwich
#' variance calculations for each estimand. Otherwise, it is a \code{list}
#' object with length 0. \item \code{propensity_scores}: The estimated
#' propensity scores for each cluster. \item \code{model}: The treatment model
#' object. \item \code{formula}: The full formula argument provided, after
#' coercion to a \code{\link[Formula]{Formula}} object}
#'
#' @seealso Please see the main package vignette at
#' \code{vignette("estimate-policyFX")}. It describes the necessary arguments,
#' as well as some extra functionality.
#'
#' @references Barkley, B. G., Hudgens, M. G., Clemens, J. D., Ali, M., and
#' Emch, M. E. (2017). Causal Inference from Observational Studies with
#' Clustered Interference. \emph{arXiv preprint arXiv:1711.04834}. (URL:
#' \url{https://arxiv.org/abs/1711.04834}.)
#'
#' Bradley C Saul and Michael G Hudgens (2017). A Recipe for
#' \code{inferference}: Start with Causal Inference. Add Interference. Mix
#' Well with R. \emph{Journal of Statistical Software} \strong{82}(2), pp.
#' 1-21. doi: <10.18637/jss.v082.i02> (URL:
#' \url{http://doi.org/10.18637/jss.v082.i02}).
#' \url{https://cran.r-project.org/package=inferference}.
#' \url{https://github.com/bsaul/inferference}.
#'
#' Bradley Saul (2017). \code{geex}: An API for M-Estimation.
#' \url{https://cran.r-project.org/package=geex}.
#' \url{https://github.com/bsaul/geex}.
#'
#'
#' @author Brian G. Barkley
#' @examples
#' \dontrun{
#' toy_data <- clusteredinterference::toy_data
#' causal_fx <- policyFX(
#' data = toy_data,
#' formula = Outcome | Treatment ~ Age + Distance + (1 | Cluster_ID) | Cluster_ID,
#' alphas = c(.3, .5),
#' k_samps = 1,
#' verbose = FALSE
#' )}
#' @export
policyFX <- function(
data,
formula,
alphas,
k_samps=NULL,
...,
verbose = FALSE,
root_options = NULL,
nAGQ=2,
return_matrices = FALSE
){
if (is.null(k_samps)) {
stop("Please specify a numeric for `k_samps` argument. 0 is for full sample, but <=5 is recommended")
}
max_k_evals <- k_samps
dots <- list(...)
if ("target_grid" %in% names(dots)) {
target_grid <- dots$target_grid
alphas_in_grid <- sort(unique(target_grid$alpha1))
if (missing("alphas")){
alphas <- alphas_in_grid
}
if (!all(alphas == alphas_in_grid)){
warning("target_grid has different alphas than those specified; the alphas from the target_grid argument will be used")
alphas <- alphas_in_grid
}
} else {
target_grid <- makeTargetGrid(alphas = alphas)
}
target_grid$k <- max_k_evals
formule <- Formula::Formula(formula)
arguments <- argTests(
data = data,
formule = formule,
alphas = alphas,
max_k_evals = max_k_evals
)
data <- arguments$data ##sorted
alphas <- arguments$alphas ##sorted
modeling_formula <- arguments$modeling_formula
grouping_var_name<- arguments$grouping_var_name
## verbose
if (verbose){print("I. Estimate GLMER model")}
## verbose
glmer_fit <- lme4::glmer(
data = data,
formula = modeling_formula,
family = stats::binomial,
nAGQ = nAGQ
)
model_parms <- unlist(lme4::getME(glmer_fit, c("beta","theta")))
split_data_list <- splitData(
data = data,
formule = formule,
glmer_fit = glmer_fit,
grouping_var_name = grouping_var_name
)
## verbose
if (verbose){print("IB. get MCFP vecs")}
## verbose
vec_dfm_no_alphas <- sampleVecs4Trt(
split_data_list = split_data_list,
max_k_evals = max_k_evals
)
index_references <- makeIndexReferences(
model_parms = model_parms,vec_dfm_no_alphas = vec_dfm_no_alphas)
typical_args <- list(
split_data_list = split_data_list,
model_parms = model_parms,
verbose = verbose
)
CFBI_args <- append(typical_args, list(
alphas = alphas,
glmer_fit = glmer_fit,
root_options = root_options,
vec_dfm_no_alphas = vec_dfm_no_alphas
))
CFBI_MCFP_list <- do.call(what=estimateNuisance,args=CFBI_args)
CFBI_estimates <- CFBI_MCFP_list$CFBI_estimates
MCFP_tidy_estimates <- CFBI_MCFP_list$MCFP_tidy_estimates
target_args <- append(typical_args, list(
CFBI_estimates = CFBI_estimates,
MCFP_tidy_estimates = MCFP_tidy_estimates,
alphas = alphas,
target_grid = target_grid
))
## to output
targets_and_cps <- do.call(what=estimateTargets,args=target_args)
target_estimates <- targets_and_cps$target_grid
var_args <- target_args
var_args$target_grid <- NULL
var_args$target_grid_estimates <- target_estimates
## Diagnostics
MCFP_tidier_estimates <- tidyOutMCFP(MCFP_tidy_estimates)
## to output
parameters <- list(
model_parms = model_parms,
CFBI_estimates = CFBI_estimates,
MCFP_estimates = MCFP_tidier_estimates,
target_estimates = target_estimates
)
if (verbose){print('Beginning variance calculations. This may take some time.')}
var_args$index_references <- index_references
var_args$return_matrices <- return_matrices
var_ests_list <- do.call(estimateVariance, args = var_args)
## to ouptut
tidy_estimates <- tidyEstimates(var_ests_list$tidy_grid)
## to output
if (return_matrices== TRUE){
variance_matrices <- var_ests_list$var_matrices
} else{
variance_matrices <- list()
}
out_list <- list(
estimates = tidy_estimates,
parameters = parameters,
variance_matrices = variance_matrices,
propensity_scores = targets_and_cps$prop_scores,
model = glmer_fit,
formula = formule
)
class(out_list) <- "policyFX"
out_list
}