/
main.R
353 lines (297 loc) · 15.6 KB
/
main.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
#' @title Estimate a Robust Bayesian T-Test
#'
#' @description \code{RoBTT} is used to estimate a robust Bayesian
#' t-test or truncated Bayesian t-test (if \code{truncation} is used).
#' The input either requires the vector of observations for
#' each group, \code{x1, x2}, or the summary statistics (only if the normal
#' likelihood models are used).
#'
#' @param x1 vector of observations of the first group
#' @param x2 vector of observations of the second group
#' @param mean1 mean of the first group
#' @param mean2 mean of the first group
#' @param sd1 standard deviation of the first group
#' @param sd2 standard deviation of the first group
#' @param N1 sample size of the first group
#' @param N2 sample size of the first group
#' @param truncation an optional list specifying truncation applied to the data.
#' Defaults to \code{NULL}, i.e., no truncation was applied and the full likelihood is
#' applied. Alternative the truncation can be specified via a named list with:
#' \describe{
#' \item{\code{"x"}}{where \code{x} is a vector of two values specifying the lower
#' and upper truncation points common across the groups}
#' \item{\code{"x1"} and \code{"x2"}}{where \code{x1} is a vector of two values specifying
#' the lower and upper truncation points for the first group and \code{x2} is a vector of
#' two values specifying the lower and upper truncation points for the second group.}
#' \item{\code{"sigma"}}{where \code{sigma} corresponds to the number of standard deviations
#' from the common mean where the truncation points should be set.}
#' \item{\code{"sigma1"} and \code{"sigma2"}}{where \code{sigma1} corresponds to the number of
#' standard deviations from the mean of the first group where the truncation points should be set
#' and \code{sigma2} corresponds to the number of standard deviations from the mean of the second
#' group where the truncation points should be set.}
#' }
#' @param prior_delta prior distributions for the effect size \code{delta} parameter
#' that will be treated as belonging to the alternative hypothesis. Defaults to \code{
#' prior(distribution = "Cauchy", parameters = list(location = 0, scale = sqrt(2)/2))}.
#' @param prior_rho prior distributions for the precision allocation \code{rho} parameter
#' that will be treated as belonging to the alternative hypothesis. Defaults to \code{
#' prior(distribution = "beta", parameters = list(alpha = 1, beta = 1))}.
#' @param prior_nu prior distribution for the degrees of freedom + 2 \code{nu}
#' parameter that will be treated as belonging to the alternative hypothesis.
#' Defaults to \code{prior(distribution = "exp", parameters = list(rate = 1))} if no
#' \code{truncation} is specified. If \code{truncation} is specified, the default is
#' \code{NULL} (i.e., use only normal likelihood).
#' @param prior_delta_null prior distribution for the \code{delta} parameter that
#' will be treated as belonging to the null hypothesis. Defaults to point distribution
#' with location at 0 (
#' \code{prior(distribution = "point", parameters = list(location = 0))}).
#' @param prior_rho_null prior distribution for the \code{rho} parameter that
#' will be treated as belonging to the null hypothesis. Defaults to point distribution
#' with location at 0.5 (
#' \code{prior(distribution = "point", parameters = list(location = 0.5))}).
#' @param prior_nu_null prior distribution for the \code{nu} parameter
#' that will be treated as belonging to the null hypothesis. Defaults to \code{prior_none} (
#' (i.e., normal likelihood)).
#' @param prior_mu prior distribution for the grand mean parameter. Defaults to \code{NULL}
#' which sets Jeffreys prior for the grand mean in case of no truncation or an unit Cauchy
#' prior distributions for the grand mean in case of truncation (which greatly improves
#' sampling efficiency).
#' @param prior_sigma2 prior distribution for the grand variance parameter. Defaults to \code{NULL}
#' which sets Jeffreys prior for the variance in case of no truncation or an exponential prior
#' distribution for the variance in case of truncation (which greatly improves sampling efficiency).
#' @param chains a number of chains of the MCMC algorithm.
#' @param iter a number of sampling iterations of the MCMC algorithm.
#' Defaults to \code{10000}, with a minimum of \code{4000}.
#' @param warmup a number of warmup iterations of the MCMC algorithm.
#' Defaults to \code{5000}.
#' @param thin a thinning of the chains of the MCMC algorithm. Defaults to
#' \code{1}.
#' @param parallel whether the individual models should be fitted in parallel.
#' Defaults to \code{FALSE}. The implementation is not completely stable
#' and might cause a connection error.
#' @param control allows to pass control settings with the
#' [set_control()] function. See \code{?set_control} for
#' options and default settings.
#' @param convergence_checks automatic convergence checks to assess the fitted
#' models, passed with [set_convergence_checks()] function. See
#' \code{?set_convergence_checks} for options and default settings.
##' @param save whether all models posterior distributions should be kept
#' after obtaining a model-averaged result. Defaults to \code{"all"} which
#' does not remove anything. Set to \code{"min"} to significantly reduce
#' the size of final object, however, some model diagnostics and further
#' manipulation with the object will not be possible.
#' @param seed a seed to be set before model fitting, marginal likelihood
#' computation, and posterior mixing for reproducibility of results. Defaults
#' to \code{NULL} - no seed is set.
#' @param silent whether all print messages regarding the fitting process
#' should be suppressed. Defaults to \code{TRUE}. Note that \code{parallel = TRUE}
#' also suppresses all messages.
#' @param ... additional arguments.
#'
#' @details
#' See \insertCite{maier2022bayesian;textual}{RoBTT} for more details
#' regarding the robust Bayesian t-test methodology and the corresponding
#' vignette (\href{../doc/Introduction_to_RoBTT.html}{\code{vignette("Introduction_to_RoBTT", package = "RoBTT")}}).
#'
#' See \insertCite{godmann2024how;textual}{RoBTT} for more details
#' regarding the truncated Bayesian t-test methodology and the corresponding
#' vignette (\href{../doc/Truncated_t_test.html}{\code{vignette("Truncated_t_test", package = "RoBTT")}}).
#'
#' Generic [summary.RoBTT()], [print.RoBTT()], and [plot.RoBTT()]
#' functions are provided to facilitate manipulation with the ensemble.
#'
#'
#' @return \code{RoBTT} returns an object of \link[base]{class} \code{"RoBTT"}.
#'
#' @examples \dontrun{
#' # using the example data from Darwin
#' data("fertilization", package = "RoBTT")
#' fit <- RoBTT(
#' x1 = fertilization$Self,
#' x2 = fertilization$Crossed,
#' prior_delta = prior("cauchy", list(0, 1/sqrt(2))),
#' prior_rho = prior("beta", list(3, 3)),
#' seed = 1,
#' chains = 1,
#' warmup = 1000,
#' iter = 2000,
#' control = set_control(adapt_delta = 0.95)
#' )
#'
#' # summary can provide many details about the model
#' summary(fit)
#' }
#'
#' @references
#' \insertAllCited{}
#' @export RoBTT
#' @seealso [summary.RoBTT()], [prior()]
RoBTT <- function(
x1 = NULL, x2 = NULL,
mean1 = NULL, mean2 = NULL, sd1 = NULL, sd2 = NULL, N1 = NULL, N2 = NULL,
truncation = NULL,
prior_delta = prior(distribution = "cauchy", parameters = list(location = 0, scale = sqrt(2)/2)),
prior_rho = prior(distribution = "beta", parameters = list(alpha = 1, beta = 1)),
prior_nu = if(is.null(truncation)) prior(distribution = "exp", parameters = list(rate = 1)),
prior_delta_null = prior(distribution = "spike", parameters = list(location = 0)),
prior_rho_null = prior(distribution = "spike", parameters = list(location = 0.5)),
prior_nu_null = prior_none(),
prior_mu = NULL, prior_sigma2 = NULL,
chains = 4, iter = 10000, warmup = 5000, thin = 1, parallel = FALSE,
control = set_control(), convergence_checks = set_convergence_checks(),
save = "all", seed = NULL, silent = TRUE, ...){
dots <- .RoBTT_collect_dots(...)
object <- NULL
object <- NULL
object$call <- match.call()
object$data <- .check_data(x1 = x1, x2 = x2, mean1 = mean1, mean2 = mean2, sd1 = sd1, sd2 = sd2, N1 = N1, N2 = N2, truncation = truncation)
### check MCMC settings
object$control <- .stan_check_and_list_fit_settings(chains = chains, warmup = warmup, iter = iter, thin = thin,
parallel = parallel, cores = chains, silent = silent, seed = seed, control = control)
object$convergence_checks <- .check_and_list_convergence_checks(convergence_checks)
### prepare and check the settings
object$priors <- .set_priors(prior_delta, prior_rho, prior_nu, prior_delta_null, prior_rho_null, prior_nu_null, prior_mu, prior_sigma2, !is.null(truncation))
object$models <- .get_models(object$priors)
object$add_info <- list(
warnings = NULL,
seed = seed,
save = save
)
### fit the models and compute marginal likelihoods
if(!object$control[["parallel"]]){
if(dots[["is_JASP"]]){
.JASP_progress_bar_start(length(object[["models"]]))
}
for(i in seq_along(object[["models"]])){
object$models[[i]] <- .fit_RoBTT(object, i)
if(dots[["is_JASP"]]){
.JASP_progress_bar_tick()
}
}
}else{
fitting_order <- .fitting_priority(object[["models"]])
cl <- parallel::makePSOCKcluster(floor(RoBTT.get_option("max_cores") / object$control[["chains"]]))
parallel::clusterEvalQ(cl, {library("RoBTT")})
parallel::clusterExport(cl, "object", envir = environment())
object$models <- parallel::parLapplyLB(cl, fitting_order, .fit_RoBTT, object = object)[order(fitting_order)]
parallel::stopCluster(cl)
}
# deal with non-converged the converged models
object$add_info$converged <- .get_model_convergence(object)
# create ensemble only if all models converged
if(all(object$add_info$converged)){
### compute the model-space results
object$models <- BayesTools::models_inference(object[["models"]])
object$RoBTT <- .ensemble_inference(object)
object$coefficients <- .compute_coeficients(object[["RoBTT"]])
}else{
stop(paste0("The following models failed the converge: ", paste(which(!object$add_info$converged), collapse = ", "), "."))
}
### collect and print errors and warnings
object$add_info[["errors"]] <- c(object$add_info[["errors"]], .get_model_errors(object))
object$add_info[["warnings"]] <- c(object$add_info[["warnings"]], .get_model_warnings(object))
.print_errors_and_warnings(object)
### remove model posteriors if asked to
if(save == "min"){
object <- .remove_model_posteriors(object)
object <- .remove_model_margliks(object)
}
class(object) <- "RoBTT"
return(object)
}
#' @title Updates a fitted RoBTT object
#'
#' @description \code{update.RoBTT} can be used to
#' \enumerate{
#' \item{change the prior odds of fitted models by specifying a vector
#' \code{prior_weights} of the same length as the fitted models,}
#' \item{refitting models that failed to converge with updated settings
#' of control parameters,}
#' \item{or changing the convergence criteria and recalculating the ensemble
#' results by specifying new \code{control} argument and setting
#' \code{refit_failed == FALSE}.}
#' }
#'
#' @param object a fitted RoBTT object
#' @param prior_weights either a single value specifying prior model weight
#' of a newly specified model using priors argument, or a vector of the
#' same length as already fitted models to update their prior weights.
#' @param refit_failed whether failed models should be refitted. Relevant only
#' \code{prior_weights} are not supplied. Defaults to \code{TRUE}.
#' @inheritParams RoBTT
#' @param ... additional arguments.
#'
#' @details See [RoBTT()] for more details.
#'
#' @return \code{RoBTT} returns an object of class 'RoBTT'.
#'
#' @seealso [RoBTT()], [summary.RoBTT()], [prior()], [check_setup()]
#' @export
update.RoBTT <- function(object, refit_failed = TRUE, prior_weights = NULL,
chains = NULL, iter = NULL, warmup = NULL, thin = NULL, parallel = NULL,
control = NULL, convergence_checks = NULL,
save = "all", seed = NULL, silent = TRUE, ...){
BayesTools::check_bool(refit_failed, "refit_failed")
dots <- .RoBTT_collect_dots(...)
if(object$add_info$save == "min")
stop("Models cannot be updated because individual model posteriors were not save during the fitting process. Set 'save' parameter to 'all' in while fitting the model (see ?RoBMA for more details).")
if(!is.null(prior_weights)){
what_to_do <- "update_prior_weights"
if(length(prior_weights) != length(object$models))
stop("The number of newly specified prior odds does not match the number of models. See '?update.RoBTT' for more details.")
for(i in 1:length(object$models)){
object$models[[i]]$prior_weights <- prior_weights[i]
object$models[[i]]$prior_weights_set <- prior_weights[i]
}
}else if(refit_failed & any(!.get_model_convergence(object, include_warning = TRUE))){
what_to_do <- "refit_failed_models"
}else{
what_to_do <- "update_settings"
}
### update control settings if any change is specified
object[["control"]] <- .update_fit_control(object[["control"]], chains = chains, warmup = warmup, iter = iter, thin = thin, parallel = parallel, cores = chains, silent = silent, seed = seed, control = control)
object[["convergence_checks"]] <- .update_convergence_checks(object[["convergence_checks"]], convergence_checks)
### clean errors and warnings
object$add_info[["errors"]] <- NULL
object$add_info[["warnings"]] <- NULL
### do the stuff
if(what_to_do == "refit_failed_models"){
models_to_update <- seq_along(object$models)[!.get_model_convergence(object, include_warning = TRUE)]
if(!object$control[["parallel"]]){
if(dots[["is_JASP"]]){
.JASP_progress_bar_start(length(models_to_update))
}
for(i in models_to_update){
object$models[[i]] <- .fit_RoBTT(object, i)
if(dots[["is_JASP"]]){
.JASP_progress_bar_tick()
}
}
}else{
cl <- parallel::makePSOCKcluster(floor(RoBTT.get_option("max_cores") / object$control[["chains"]]))
parallel::clusterEvalQ(cl, {library("RoBTT")})
parallel::clusterExport(cl, "object", envir = environment())
object$models[models_to_update] <- parallel::parLapplyLB(cl, models_to_update, .fit_RoBTT, object = object)
parallel::stopCluster(cl)
}
}
# create ensemble only if at least one model converged
if(all(object$add_info$converged)){
### compute the model-space results
object$models <- BayesTools::models_inference(object[["models"]])
object$RoBTT <- .ensemble_inference(object)
object$coefficients <- .compute_coeficients(object[["RoBTT"]])
}else{
stop(paste0("The following models failed the converge: ", paste(which(!object$add_info$converged), collapse = ", "), "."))
}
### collect and print errors and warnings
object$add_info[["errors"]] <- c(object$add_info[["errors"]], .get_model_errors(object))
object$add_info[["warnings"]] <- c(object$add_info[["warnings"]], .get_model_warnings(object))
.print_errors_and_warnings(object)
### remove model posteriors if asked to
if(save == "min"){
object <- .remove_model_posteriors(object)
object <- .remove_model_margliks(object)
}
return(object)
}