From de42beacb8cef2008e4c46a18db85af6f3ea8f59 Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Tue, 28 Apr 2020 18:06:48 -0700 Subject: [PATCH 1/9] v1 --- NAMESPACE | 1 + R/declare_estimator.R | 145 +++------ R/tidy_filter.R | 78 +++++ man/declare_estimator.Rd | 9 +- man/reexports.Rd | 2 +- man/tidy_filter.Rd | 19 ++ tests/testthat/test-post-estimation.R | 404 ++++++++++++++++++++++++++ 7 files changed, 550 insertions(+), 108 deletions(-) create mode 100644 R/tidy_filter.R create mode 100644 man/tidy_filter.Rd diff --git a/NAMESPACE b/NAMESPACE index 11eab5c0..608c2712 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -58,6 +58,7 @@ export(simulate_design) export(simulate_designs) export(tidy) export(tidy_estimator) +export(tidy_filter) importFrom(estimatr,difference_in_means) importFrom(fabricatr,add_level) importFrom(fabricatr,fabricate) diff --git a/R/declare_estimator.R b/R/declare_estimator.R index aa5e0276..173bcf43 100644 --- a/R/declare_estimator.R +++ b/R/declare_estimator.R @@ -250,6 +250,7 @@ model_handler <- function(data, ..., model = estimatr::difference_in_means, + post_estimation = tidy_filter, term = FALSE) { coefficient_names <- enquo(term) # forces evaluation of quosure @@ -260,10 +261,45 @@ model_handler <- # todo special case weights offsets for glm etc? results <- eval_tidy(quo(model(!!!args, data = data))) - - results <- fit2tidy(results, coefficient_names) - + + # following copied from dplyr:::as_inlined_function and dplyr:::as_fun_list + + if(rlang::is_formula(post_estimation)) { + + # if you have used our built-in tidy filter function, replace term with that provided to model_handler + # this is a temporary solution for backward compatibility, it will be removed in future versions + if(call_name(post_estimation) == "tidy_filter" && !"term" %in% rlang::call_args_names(post_estimation)){ + post_estimation <- call_modify(.call = post_estimation, term = coefficient_names) + } + + f <- rlang::expr_interp(post_estimation) + fn <- rlang::as_function(f, env = empty_env()) + body(fn) <- rlang::expr({ + base::pairlist(...) + `_quo` <- rlang::quo(!!body(fn)) + rlang::eval_bare(`_quo`, base::parent.frame()) + }) + + results <- rlang::eval_tidy(fn(results)) + + } else { + + if (rlang::is_character(post_estimation)) { + post_estimation <- get(post_estimation, envir = parent.frame(), mode = "function") + } else if (!rlang::is_function(post_estimation)) { + stop("Please provide one sided formula, a function, or a function name to post_estimation.") + } + + if("term" %in% names(formals(post_estimation))) { + results <- post_estimation(results, term = coefficient_names) + } else { + results <- post_estimation(results) + } + + } + results + } validation_fn(model_handler) <- function(ret, dots, label) { @@ -288,109 +324,6 @@ validation_fn(model_handler) <- function(ret, dots, label) { #' @rdname declare_estimator estimator_handler <- tidy_estimator(model_handler) -#' @importFrom generics tidy -#' @export -generics::tidy - -tidy_default <- function(x, conf.int = TRUE) { - # TODO: error checking -- are column names named as we expect - - val <- try({ - summ <- coef(summary(x)) - - if(conf.int == TRUE) { - ci <- suppressMessages(as.data.frame(confint(x))) - tidy_df <- - data.frame( - term = rownames(summ), - summ, - ci, - stringsAsFactors = FALSE, - row.names = NULL - ) - colnames(tidy_df) <- - c( - "term", - "estimate", - "std.error", - "statistic", - "p.value", - "conf.low", - "conf.high" - ) - } else { - tidy_df <- - data.frame( - term = rownames(summ), - summ, - ci, - stringsAsFactors = FALSE, - row.names = NULL - ) - colnames(tidy_df) <- - c( - "term", - "estimate", - "std.error", - "statistic", - "p.value" - ) - } - - }, silent = TRUE) - - if(class(val) == "try-error"){ - stop("The default tidy method for the model fit of class ", class(x), " failed. You may try installing and loading the broom package, or you can write your own tidy.", class(x), " method.", call. = FALSE) - } - - tidy_df -} - -#' @importFrom utils getS3method -hasS3Method <- function(f, obj) { - for(i in class(obj)) { - get_function <- try(getS3method(f, i), silent = TRUE) - if(class(get_function) != "try-error" && is.function(get_function)) return(TRUE) - } - FALSE -} - -# called by model_handler, resets columns names !!! -fit2tidy <- function(fit, term = FALSE) { - - # browser() - if (hasS3Method("tidy", fit)) { - tidy_df <- tidy(fit, conf.int = TRUE) - } else { - tidy_df <- try(tidy_default(fit, conf.int = TRUE), silent = TRUE) - - if(class(tidy_df) == "try-error"){ - stop("We were unable to tidy the output of the function provided to 'model'. - It is possible that the broom package has a tidier for that object type. - If not, you can use a custom estimator to 'estimator_function'. - See examples in ?declare_estimator") - } - } - - if (is.character(term)) { - coefs_in_output <- term %in% tidy_df$term - if (!all(coefs_in_output)) { - stop( - "Not all of the terms declared in your estimator are present in the model output, including ", - paste(term[!coefs_in_output], collapse = ", "), - ".", - call. = FALSE - ) - } - tidy_df <- tidy_df[tidy_df$term %in% term, , drop = FALSE] - } else if (is.logical(term) && !term) { - tidy_df <- - tidy_df[which.max(tidy_df$term != "(Intercept)"), , drop = FALSE] - } - - tidy_df -} - # helper methods for estimand=my_estimand arguments to estimator_handler # get_estimand_label <- function(estimand) { diff --git a/R/tidy_filter.R b/R/tidy_filter.R new file mode 100644 index 00000000..55fc3652 --- /dev/null +++ b/R/tidy_filter.R @@ -0,0 +1,78 @@ +#' Tidy Model Results and Filter to Relevant Coefficients +#' +#' @param fit +#' @param term +#' +#' @return +#' @export +#' +#' @examples +tidy_filter <- function(fit, term = FALSE) { + + if (hasS3Method("tidy", fit)) { + tidy_df <- tidy(fit, conf.int = TRUE) + } else { + tidy_df <- try(tidy_try(fit, conf.int = TRUE), silent = TRUE) + + if(inherits(tidy_df, "try-error")){ + stop("We were unable to tidy the output of the function provided to 'model'. + It is possible that the broom package has a tidier for that object type. + If not, you can use a custom estimator to 'estimator_function'. + See examples in ?declare_estimator") + } + } + + if (is.character(term)) { + coefs_in_output <- term %in% tidy_df$term + if (!all(coefs_in_output)) { + stop( + "Not all of the terms declared in your estimator are present in the model output, including ", + paste(term[!coefs_in_output], collapse = ", "), + ".", + call. = FALSE + ) + } + tidy_df <- tidy_df[tidy_df$term %in% term, , drop = FALSE] + } else if (is.logical(term) && !term) { + tidy_df <- tidy_df[which.max(tidy_df$term != "(Intercept)"), , drop = FALSE] + } + + tidy_df +} + +tidy_try <- function(x, conf.int = TRUE) { + # TODO: error checking -- are column names named as we expect + + val <- try({ + summ <- coef(summary(x)) + + if(conf.int == TRUE) { + ci <- suppressMessages(as.data.frame(confint(x))) + tidy_df <- data.frame(term = rownames(summ), summ, ci, stringsAsFactors = FALSE, row.names = NULL) + colnames(tidy_df) <- c("term", "estimate", "std.error", "statistic", "p.value", "conf.low", "conf.high") + } else { + tidy_df <- data.frame(term = rownames(summ), summ, ci, stringsAsFactors = FALSE, row.names = NULL) + colnames(tidy_df) <- c("term", "estimate", "std.error", "statistic", "p.value") + } + + }, silent = TRUE) + + if(inherits(val, "try-error")){ + stop("The default tidy method for the model fit of class ", class(x), " failed. You may try installing and loading the broom package, or you can write your own tidy.", class(x), " method.", call. = FALSE) + } + + tidy_df +} + +#' @importFrom generics tidy +#' @export +generics::tidy + +#' @importFrom utils getS3method +hasS3Method <- function(f, obj) { + for(i in class(obj)) { + get_function <- try(getS3method(f, i), silent = TRUE) + if(class(get_function) != "try-error" && is.function(get_function)) return(TRUE) + } + FALSE +} diff --git a/man/declare_estimator.Rd b/man/declare_estimator.Rd index 0c275480..76afaa2f 100644 --- a/man/declare_estimator.Rd +++ b/man/declare_estimator.Rd @@ -14,12 +14,19 @@ declare_estimators(..., handler = estimator_handler, label = "estimator") tidy_estimator(estimator_function) -model_handler(data, ..., model = estimatr::difference_in_means, term = FALSE) +model_handler( + data, + ..., + model = estimatr::difference_in_means, + post_estimation = tidy_filter, + term = FALSE +) estimator_handler( data, ..., model = estimatr::difference_in_means, + post_estimation = tidy_filter, term = FALSE, estimand = NULL, label diff --git a/man/reexports.Rd b/man/reexports.Rd index cbbda97a..279415e0 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/declare_estimator.R +% Please edit documentation in R/tidy_filter.R \docType{import} \name{reexports} \alias{reexports} diff --git a/man/tidy_filter.Rd b/man/tidy_filter.Rd new file mode 100644 index 00000000..602ee935 --- /dev/null +++ b/man/tidy_filter.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidy_filter.R +\name{tidy_filter} +\alias{tidy_filter} +\title{Tidy Model Results and Filter to Relevant Coefficients} +\usage{ +tidy_filter(fit, term = FALSE) +} +\arguments{ +\item{fit}{} + +\item{term}{} +} +\value{ + +} +\description{ +Tidy Model Results and Filter to Relevant Coefficients +} diff --git a/tests/testthat/test-post-estimation.R b/tests/testthat/test-post-estimation.R index f59cbf58..c7549290 100644 --- a/tests/testthat/test-post-estimation.R +++ b/tests/testthat/test-post-estimation.R @@ -66,3 +66,407 @@ test_that("multiple design draw_estimates", { draw_estimates(my_designs) }) + +test_that("glance works", { + des <- + declare_population(data = sleep) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = ~ glance(.), + label = "formula call" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = glance, + label = "bare function" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = "glance", + label = "string" + ) + + est <- draw_estimates(des) + + expect_equal(est, + structure( + list( + estimator_label = c("formula call", "bare function", + "string"), + r.squared = c(0.161332850791025, 0.161332850791025, + 0.161332850791025), + adj.r.squared = c(0.114740231390526, 0.114740231390526, + 0.114740231390526), + statistic = c(3.46262676078045, 3.46262676078045, + 3.46262676078045), + p.value = c(0.079186714215938, 0.079186714215938, + 0.079186714215938), + df.residual = c(18, 18, 18), + N = c(20L, 20L, + 20L), + se_type = c("HC2", "HC2", "HC2") + ), + row.names = c(NA, -3L), + class = "data.frame" + )) + +}) + +test_that("tidy works", { + # default term + des <- + declare_population(data = sleep) + + declare_estimator(extra ~ group, model = lm_robust, label = "formula") + est <- draw_estimates(des) + expect_equal(est, structure( + list( + estimator_label = "formula", + term = "group2", + estimate = 1.58, + std.error = 0.849091017238762, + statistic = 1.86081346748685, + p.value = 0.0791867142159381, + conf.low = -0.203874032287598, + conf.high = 3.3638740322876, + df = 18, + outcome = "extra" + ), + row.names = c(NA,-1L), + class = "data.frame" + )) + + # default term + des <- + declare_population(data = sleep) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = ~ tidy_filter(.), + label = "formula" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = tidy_filter, + label = "bare" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = "tidy_filter", + label = "string" + ) + + est <- draw_estimates(des) + expect_equal(est, structure( + list( + estimator_label = c("formula", "bare", "string"), + term = c("group2", "group2", "group2"), + estimate = c(1.58, + 1.58, 1.58), + std.error = c(0.849091017238762, 0.849091017238762, + 0.849091017238762), + statistic = c(1.86081346748685, 1.86081346748685, + 1.86081346748685), + p.value = c(0.0791867142159381, 0.0791867142159381, + 0.0791867142159381), + conf.low = c(-0.203874032287598,-0.203874032287598,-0.203874032287598), + conf.high = c(3.3638740322876, 3.3638740322876, + 3.3638740322876), + df = c(18, 18, 18), + outcome = c("extra", "extra", + "extra") + ), + row.names = c(NA,-3L), + class = "data.frame" + )) + + # another default + des <- + declare_population(data = sleep) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = ~ tidy_filter(.), + term = FALSE, + label = "formula1" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = ~ tidy_filter(., term = FALSE), + label = "formula2" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = tidy_filter, + term = FALSE, + label = "bare" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = "tidy_filter", + term = FALSE, + label = "string" + ) + + est <- draw_estimates(des) + expect_equal(est, structure( + list( + estimator_label = c("formula1", "formula2", "bare", + "string"), + term = c("group2", "group2", "group2", "group2"), + estimate = c(1.58, 1.58, 1.58, 1.58), + std.error = c( + 0.849091017238762, + 0.849091017238762, + 0.849091017238762, + 0.849091017238762 + ), + statistic = c( + 1.86081346748685, + 1.86081346748685, + 1.86081346748685, + 1.86081346748685 + ), + p.value = c( + 0.0791867142159381, + 0.0791867142159381, + 0.0791867142159381, + 0.0791867142159381 + ), + conf.low = c( + -0.203874032287598,-0.203874032287598, + -0.203874032287598, + -0.203874032287598 + ), + conf.high = c( + 3.3638740322876, + 3.3638740322876, + 3.3638740322876, + 3.3638740322876 + ), + df = c(18, 18, 18, 18), + outcome = c("extra", + "extra", "extra", "extra") + ), + row.names = c(NA,-4L), + class = "data.frame" + )) + + # return all coefs + des <- + declare_population(data = sleep) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = ~ tidy_filter(., term = TRUE), + label = "formula1" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = ~ tidy_filter(.), + term = TRUE, + label = "formula2" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = tidy_filter, + term = TRUE, + label = "bare" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = "tidy_filter", + term = TRUE, + label = "string" + ) + + est <- draw_estimates(des) + expect_equal(est, structure( + list( + estimator_label = c( + "formula1", + "formula1", + "formula2", + "formula2", + "bare", + "bare", + "string", + "string" + ), + term = c( + "(Intercept)", + "group2", + "(Intercept)", + "group2", + "(Intercept)", + "group2", + "(Intercept)", + "group2" + ), + estimate = c(0.75, 1.58, 0.75, 1.58, 0.75, 1.58, 0.75, + 1.58), + std.error = c( + 0.565734527455728, + 0.849091017238762, + 0.565734527455728, + 0.849091017238762, + 0.565734527455728, + 0.849091017238762, + 0.565734527455728, + 0.849091017238762 + ), + statistic = c( + 1.32571014071382, + 1.86081346748685, + 1.32571014071382, + 1.86081346748685, + 1.32571014071382, + 1.86081346748685, + 1.32571014071382, + 1.86081346748685 + ), + p.value = c( + 0.201515544020674, + 0.0791867142159381, + 0.201515544020674, + 0.0791867142159381, + 0.201515544020674, + 0.0791867142159381, + 0.201515544020674, + 0.0791867142159381 + ), + conf.low = c( + -0.438564137657087,-0.203874032287598, + -0.438564137657087, + -0.203874032287598, + -0.438564137657087,-0.203874032287598, + -0.438564137657087, + -0.203874032287598 + ), + conf.high = c( + 1.93856413765709, + 3.3638740322876, + 1.93856413765709, + 3.3638740322876, + 1.93856413765709, + 3.3638740322876, + 1.93856413765709, + 3.3638740322876 + ), + df = c(18, 18, 18, 18, 18, 18, 18, 18), + outcome = c( + "extra", + "extra", + "extra", + "extra", + "extra", + "extra", + "extra", + "extra" + ) + ), + row.names = c(NA,-8L), + class = "data.frame" + )) + + # select them manually + des <- + declare_population(data = sleep) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = ~ tidy_filter(., term = "group2"), + label = "formula1" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = ~ tidy_filter(.), + term = "group2", + label = "formula2" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = tidy_filter, + term = "group2", + label = "bare" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = "tidy_filter", + term = "group2", + label = "string" + ) + + est <- draw_estimates(des) + expect_equal(est, structure(list(estimator_label = c("formula1", "formula2", "bare", + "string"), term = c("group2", "group2", "group2", "group2"), + estimate = c(1.58, 1.58, 1.58, 1.58), std.error = c(0.849091017238762, + 0.849091017238762, 0.849091017238762, 0.849091017238762), + statistic = c(1.86081346748685, 1.86081346748685, 1.86081346748685, + 1.86081346748685), p.value = c(0.0791867142159381, 0.0791867142159381, + 0.0791867142159381, 0.0791867142159381), conf.low = c(-0.203874032287598, + -0.203874032287598, -0.203874032287598, -0.203874032287598 + ), conf.high = c(3.3638740322876, 3.3638740322876, 3.3638740322876, + 3.3638740322876), df = c(18, 18, 18, 18), outcome = c("extra", + "extra", "extra", "extra")), row.names = c(NA, -4L), class = "data.frame")) + + + # select them manually + des <- + declare_population(data = sleep) + + # does not work (intentionally) + # declare_estimator( + # extra ~ group, + # model = lm_robust, + # post_estimation = ~ tidy_filter(., term = group2), + # label = "formula1" + # ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = ~ tidy_filter(.), + term = group2, + label = "formula2" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = tidy_filter, + term = group2, + label = "bare" + ) + + declare_estimator( + extra ~ group, + model = lm_robust, + post_estimation = "tidy_filter", + term = group2, + label = "string" + ) + + est <- draw_estimates(des) + + expect_equal(est, structure(list(estimator_label = c("formula2", "bare", "string" + ), term = c("group2", "group2", "group2"), estimate = c(1.58, + 1.58, 1.58), std.error = c(0.849091017238762, 0.849091017238762, + 0.849091017238762), statistic = c(1.86081346748685, 1.86081346748685, + 1.86081346748685), p.value = c(0.0791867142159381, 0.0791867142159381, + 0.0791867142159381), conf.low = c(-0.203874032287598, -0.203874032287598, + -0.203874032287598), conf.high = c(3.3638740322876, 3.3638740322876, + 3.3638740322876), df = c(18, 18, 18), outcome = c("extra", "extra", + "extra")), row.names = c(NA, -3L), class = "data.frame")) + +}) + From 78474af3f5b45bfdf9691868755df3efab1a3328 Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Tue, 28 Apr 2020 18:07:14 -0700 Subject: [PATCH 2/9] rproj file --- DeclareDesign.Rproj | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/DeclareDesign.Rproj b/DeclareDesign.Rproj index 53807116..1650955c 100644 --- a/DeclareDesign.Rproj +++ b/DeclareDesign.Rproj @@ -1,8 +1,8 @@ Version: 1.0 -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: No EnableCodeIndexing: Yes UseSpacesForTab: Yes @@ -16,3 +16,6 @@ BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source PackageRoxygenize: rd,collate,namespace + +QuitChildProcessesOnExit: Yes +DisableExecuteRprofile: Yes From b2bd3724fa9c6e8288bd11c1d9cbed4325abac95 Mon Sep 17 00:00:00 2001 From: Alexander Coppock Date: Tue, 28 Apr 2020 21:13:58 -0400 Subject: [PATCH 3/9] man updates --- R/tidy_filter.R | 4 ++-- man/tidy_filter.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/tidy_filter.R b/R/tidy_filter.R index 55fc3652..cb4337b5 100644 --- a/R/tidy_filter.R +++ b/R/tidy_filter.R @@ -1,7 +1,7 @@ #' Tidy Model Results and Filter to Relevant Coefficients #' -#' @param fit -#' @param term +#' @param fit A model fit, as returned by a modeling function like lm, glm, or estimatr::lm_robust. +#' @param term A character vector of the terms that represent quantities of interest, i.e., "Z". If FALSE, return the first non-intercept term; if TRUE return all terms. #' #' @return #' @export diff --git a/man/tidy_filter.Rd b/man/tidy_filter.Rd index 602ee935..324d1e06 100644 --- a/man/tidy_filter.Rd +++ b/man/tidy_filter.Rd @@ -7,9 +7,9 @@ tidy_filter(fit, term = FALSE) } \arguments{ -\item{fit}{} +\item{fit}{A model fit, as returned by a modeling function like lm, glm, or estimatr::lm_robust.} -\item{term}{} +\item{term}{A character vector of the terms that represent quantities of interest, i.e., "Z". If FALSE, return the first non-intercept term; if TRUE return all terms.} } \value{ From 3eeea89fe6d4dbed0092c9ffb5a08466705ce5aa Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Tue, 28 Apr 2020 18:33:05 -0700 Subject: [PATCH 4/9] R CMD CHECK --- NAMESPACE | 8 ++++++++ R/declare_estimator.R | 25 ++++++++++++++----------- R/tidy_filter.R | 13 ++++++++++++- man/declare_estimator.Rd | 2 ++ man/tidy_filter.Rd | 14 ++++++++++++-- 5 files changed, 48 insertions(+), 14 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 608c2712..2c850202 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -74,17 +74,23 @@ importFrom(rlang,"!!!") importFrom(rlang,"!!") importFrom(rlang,"%||%") importFrom(rlang,":=") +importFrom(rlang,as_function) importFrom(rlang,as_list) importFrom(rlang,as_quosure) importFrom(rlang,call_args) +importFrom(rlang,call_args_names) importFrom(rlang,call_modify) importFrom(rlang,call_name) +importFrom(rlang,empty_env) importFrom(rlang,enexpr) importFrom(rlang,enquo) importFrom(rlang,enquos) importFrom(rlang,env_clone) +importFrom(rlang,eval_bare) importFrom(rlang,eval_tidy) +importFrom(rlang,expr) importFrom(rlang,expr_deparse) +importFrom(rlang,expr_interp) importFrom(rlang,expr_name) importFrom(rlang,expr_text) importFrom(rlang,f_env) @@ -92,8 +98,10 @@ importFrom(rlang,f_rhs) importFrom(rlang,f_text) importFrom(rlang,is_bare_integerish) importFrom(rlang,is_call) +importFrom(rlang,is_character) importFrom(rlang,is_empty) importFrom(rlang,is_formula) +importFrom(rlang,is_function) importFrom(rlang,is_list) importFrom(rlang,is_missing) importFrom(rlang,is_null) diff --git a/R/declare_estimator.R b/R/declare_estimator.R index 173bcf43..9e1e56ed 100644 --- a/R/declare_estimator.R +++ b/R/declare_estimator.R @@ -244,8 +244,10 @@ tidy_estimator <- function(estimator_function) { #' @param data a data.frame #' @param model A model function, e.g. lm or glm. By default, the model is the \code{\link{difference_in_means}} function from the \link{estimatr} package. +#' @param post_estimation A model-in data-out function to extract coefficient estimates or model summary statistics, such as \code{\link{tidy}} or \code{\link{glance}}. By default, the \code{DeclareDesign} post-estimation function \code{\link{tidy_filter}} is used, which tidies data and optionally filters to relevant coefficients. #' @param term Symbols or literal character vector of term that represent quantities of interest, i.e. Z. If FALSE, return the first non-intercept term; if TRUE return all term. To escape non-standard-evaluation use \code{!!}. #' @rdname declare_estimator +#' @importFrom rlang is_formula call_modify call_args_names expr_interp as_function expr quo eval_bare eval_tidy is_character is_function empty_env model_handler <- function(data, ..., @@ -264,29 +266,30 @@ model_handler <- # following copied from dplyr:::as_inlined_function and dplyr:::as_fun_list - if(rlang::is_formula(post_estimation)) { + if(is_formula(post_estimation)) { # if you have used our built-in tidy filter function, replace term with that provided to model_handler # this is a temporary solution for backward compatibility, it will be removed in future versions - if(call_name(post_estimation) == "tidy_filter" && !"term" %in% rlang::call_args_names(post_estimation)){ + if(call_name(post_estimation) == "tidy_filter" && !"term" %in% call_args_names(post_estimation)){ post_estimation <- call_modify(.call = post_estimation, term = coefficient_names) } - f <- rlang::expr_interp(post_estimation) - fn <- rlang::as_function(f, env = empty_env()) - body(fn) <- rlang::expr({ - base::pairlist(...) - `_quo` <- rlang::quo(!!body(fn)) - rlang::eval_bare(`_quo`, base::parent.frame()) + f <- expr_interp(post_estimation) + # TODO: unsure of what env should be here! + fn <- as_function(f, env = parent.frame()) + body(fn) <- expr({ + pairlist(...) + `_quo` <- quo(!!body(fn)) + eval_bare(`_quo`, parent.frame()) }) - results <- rlang::eval_tidy(fn(results)) + results <- eval_tidy(fn(results)) } else { - if (rlang::is_character(post_estimation)) { + if (is_character(post_estimation)) { post_estimation <- get(post_estimation, envir = parent.frame(), mode = "function") - } else if (!rlang::is_function(post_estimation)) { + } else if (!is_function(post_estimation)) { stop("Please provide one sided formula, a function, or a function name to post_estimation.") } diff --git a/R/tidy_filter.R b/R/tidy_filter.R index cb4337b5..a66fd1c3 100644 --- a/R/tidy_filter.R +++ b/R/tidy_filter.R @@ -1,12 +1,23 @@ #' Tidy Model Results and Filter to Relevant Coefficients +#' +#' Tidy function that returns a tidy data.frame of model results and allows filtering to relevant coefficients. The function will attempt to tidy model objects even when they do not have a tidy method available. For best results, first load the broom package via \code{library(broom)}. #' #' @param fit A model fit, as returned by a modeling function like lm, glm, or estimatr::lm_robust. #' @param term A character vector of the terms that represent quantities of interest, i.e., "Z". If FALSE, return the first non-intercept term; if TRUE return all terms. #' -#' @return +#' @return A data.frame with coefficient estimates and associated statistics. +#' #' @export #' #' @examples +#' +#' fit <- lm_robust(mpg ~ hp + disp + cyl, data = mtcars) +#' +#' tidy_filter(fit) +#' tidy_filter(fit, term = TRUE) +#' tidy_filter(fit, term = "hp") +#' tidy_filter(fit, term = c("hp", "cyl")) +#' tidy_filter <- function(fit, term = FALSE) { if (hasS3Method("tidy", fit)) { diff --git a/man/declare_estimator.Rd b/man/declare_estimator.Rd index 76afaa2f..e94c8073 100644 --- a/man/declare_estimator.Rd +++ b/man/declare_estimator.Rd @@ -45,6 +45,8 @@ estimator_handler( \item{model}{A model function, e.g. lm or glm. By default, the model is the \code{\link{difference_in_means}} function from the \link{estimatr} package.} +\item{post_estimation}{A model-in data-out function to extract coefficient estimates or model summary statistics, such as \code{\link{tidy}} or \code{\link{glance}}. By default, the \code{DeclareDesign} post-estimation function \code{\link{tidy_filter}} is used, which tidies data and optionally filters to relevant coefficients.} + \item{term}{Symbols or literal character vector of term that represent quantities of interest, i.e. Z. If FALSE, return the first non-intercept term; if TRUE return all term. To escape non-standard-evaluation use \code{!!}.} \item{estimand}{a declare_estimand step object, or a character label, or a list of either} diff --git a/man/tidy_filter.Rd b/man/tidy_filter.Rd index 324d1e06..fd725776 100644 --- a/man/tidy_filter.Rd +++ b/man/tidy_filter.Rd @@ -12,8 +12,18 @@ tidy_filter(fit, term = FALSE) \item{term}{A character vector of the terms that represent quantities of interest, i.e., "Z". If FALSE, return the first non-intercept term; if TRUE return all terms.} } \value{ - +A data.frame with coefficient estimates and associated statistics. } \description{ -Tidy Model Results and Filter to Relevant Coefficients +Tidy function that returns a tidy data.frame of model results and allows filtering to relevant coefficients. The function will attempt to tidy model objects even when they do not have a tidy method available. For best results, first load the broom package via \code{library(broom)}. +} +\examples{ + +fit <- lm_robust(mpg ~ hp + disp + cyl, data = mtcars) + +tidy_filter(fit) +tidy_filter(fit, term = TRUE) +tidy_filter(fit, term = "hp") +tidy_filter(fit, term = c("hp", "cyl")) + } From 7f82ca082f672ed8b9ae73bc80fcaa8bd2ba2a84 Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Wed, 29 Apr 2020 09:15:47 -0700 Subject: [PATCH 5/9] doc changes --- R/declare_estimator.R | 2 ++ R/declare_test.R | 4 +++- man/declare_estimator.Rd | 2 ++ man/declare_test.Rd | 7 +++++-- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/R/declare_estimator.R b/R/declare_estimator.R index f6dd9bba..6eabbb0b 100644 --- a/R/declare_estimator.R +++ b/R/declare_estimator.R @@ -1,6 +1,8 @@ #' Declare estimator #' #' @description Declares an estimator which generates estimates and associated statistics. +#' +#' Use of \code{declare_test} is identical to use of \code{\link{declare_estimator}}. Use \code{declare_test} for hypothesis testing with no specific estimand in mind; use \code{declare_estimator} for hypothesis testing when you can link each estimate to an estimand. For example, \code{declare_test} could be used for a K-S test of distributional equality and \code{declare_estimator} for a difference-in-means estimate of an average treatment effect. #' #' @inheritParams declare_internal_inherit_params #' diff --git a/R/declare_test.R b/R/declare_test.R index d69cf43a..5ace9e7d 100644 --- a/R/declare_test.R +++ b/R/declare_test.R @@ -1,6 +1,8 @@ #' Declare test #' -#' @description Declares an test which generates a test statistic and associated inferential statistics. Use of \code{declare_test} is identical to use of \code{\link{declare_estimator}}. Use \code{declare_test} for hypothesis testing with no specific estimand in mind; use \code{declare_estimator} for hypothesis testing when you can link each estimate to an estimand. +#' @description Declares an test which generates a test statistic and associated inferential statistics. +#' +#' Use of \code{declare_test} is identical to use of \code{\link{declare_estimator}}. Use \code{declare_test} for hypothesis testing with no specific estimand in mind; use \code{declare_estimator} for hypothesis testing when you can link each estimate to an estimand. For example, \code{declare_test} could be used for a K-S test of distributional equality and \code{declare_estimator} for a difference-in-means estimate of an average treatment effect. #' #' @inheritParams declare_internal_inherit_params #' diff --git a/man/declare_estimator.Rd b/man/declare_estimator.Rd index 67ef21d8..e3d32769 100644 --- a/man/declare_estimator.Rd +++ b/man/declare_estimator.Rd @@ -56,6 +56,8 @@ A function that accepts a data.frame as an argument and returns a data.frame con } \description{ Declares an estimator which generates estimates and associated statistics. + +Use of \code{declare_test} is identical to use of \code{\link{declare_estimator}}. Use \code{declare_test} for hypothesis testing with no specific estimand in mind; use \code{declare_estimator} for hypothesis testing when you can link each estimate to an estimand. For example, \code{declare_test} could be used for a K-S test of distributional equality and \code{declare_estimator} for a difference-in-means estimate of an average treatment effect. } \details{ \code{tidy_estimator} takes an untidy estimation function, and returns a tidy handler which accepts standard labeling options. diff --git a/man/declare_test.Rd b/man/declare_test.Rd index e7029563..255f6726 100644 --- a/man/declare_test.Rd +++ b/man/declare_test.Rd @@ -22,7 +22,9 @@ tidy_test(fn) A function that accepts a data.frame as an argument and returns a data.frame containing the value of the test statistic and other inferential statistics. } \description{ -Declares an test which generates a test statistic and associated inferential statistics +Declares an test which generates a test statistic and associated inferential statistics. + +Use of \code{declare_test} is identical to use of \code{\link{declare_estimator}}. Use \code{declare_test} for hypothesis testing with no specific estimand in mind; use \code{declare_estimator} for hypothesis testing when you can link each estimate to an estimand. For example, \code{declare_test} could be used for a K-S test of distributional equality and \code{declare_estimator} for a difference-in-means estimate of an average treatment effect. } \details{ \code{tidy_test} takes an untidy test function, and returns a tidy handler which accepts standard labeling options. @@ -41,8 +43,9 @@ Declares an test which generates a test statistic and associated inferential sta # Thanks to Jake Bowers for this example +library(coin) + our_ttest <- function(data) { - require(coin) res <- coin::oneway_test( outcome ~ factor(Xclus), data = data, From 22d866233e75433206cba39827f55036f461ab48 Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Wed, 29 Apr 2020 15:01:08 -0700 Subject: [PATCH 6/9] rename and refactor per @nfultz --- NAMESPACE | 2 +- R/declare_estimator.R | 45 +-- R/{tidy_filter.R => tidy_try.R} | 22 +- man/declare_estimator.Rd | 6 +- man/reexports.Rd | 2 +- man/{tidy_filter.Rd => tidy_try.Rd} | 8 +- ...post-estimation.R => test-model-summary.R} | 290 ++++++++---------- 7 files changed, 163 insertions(+), 212 deletions(-) rename R/{tidy_filter.R => tidy_try.R} (79%) rename man/{tidy_filter.Rd => tidy_try.Rd} (89%) rename tests/testthat/{test-post-estimation.R => test-model-summary.R} (52%) diff --git a/NAMESPACE b/NAMESPACE index 8cf62c33..ea3e5772 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -59,8 +59,8 @@ export(simulate_design) export(simulate_designs) export(tidy) export(tidy_estimator) -export(tidy_filter) export(tidy_test) +export(tidy_try) importFrom(estimatr,difference_in_means) importFrom(fabricatr,add_level) importFrom(fabricatr,fabricate) diff --git a/R/declare_estimator.R b/R/declare_estimator.R index 6eabbb0b..e77e088d 100644 --- a/R/declare_estimator.R +++ b/R/declare_estimator.R @@ -246,7 +246,7 @@ tidy_estimator <- function(fn) { #' @param data a data.frame #' @param model A model function, e.g. lm or glm. By default, the model is the \code{\link{difference_in_means}} function from the \link{estimatr} package. -#' @param post_estimation A model-in data-out function to extract coefficient estimates or model summary statistics, such as \code{\link{tidy}} or \code{\link{glance}}. By default, the \code{DeclareDesign} post-estimation function \code{\link{tidy_filter}} is used, which tidies data and optionally filters to relevant coefficients. +#' @param model_summary A model-in data-out function to extract coefficient estimates or model summary statistics, such as \code{\link{tidy}} or \code{\link{glance}}. By default, the \code{DeclareDesign} model summary function \code{\link{tidy_try}} is used, which first attempts to use the available tidy method for the model object sent to \code{model}, then if not attempts to summarize coefficients using the \code{coef(summary())} and \code{confint} methods. If these do not exist for the model object, it fails. #' @param term Symbols or literal character vector of term that represent quantities of interest, i.e. Z. If FALSE, return the first non-intercept term; if TRUE return all term. To escape non-standard-evaluation use \code{!!}. #' @rdname declare_estimator #' @importFrom rlang is_formula call_modify call_args_names expr_interp as_function expr quo eval_bare eval_tidy is_character is_function empty_env @@ -254,7 +254,7 @@ model_handler <- function(data, ..., model = estimatr::difference_in_means, - post_estimation = tidy_filter, + model_summary = tidy_try, term = FALSE) { coefficient_names <- enquo(term) # forces evaluation of quosure @@ -268,15 +268,9 @@ model_handler <- # following copied from dplyr:::as_inlined_function and dplyr:::as_fun_list - if(is_formula(post_estimation)) { + if(is_formula(model_summary)) { - # if you have used our built-in tidy filter function, replace term with that provided to model_handler - # this is a temporary solution for backward compatibility, it will be removed in future versions - if(call_name(post_estimation) == "tidy_filter" && !"term" %in% call_args_names(post_estimation)){ - post_estimation <- call_modify(.call = post_estimation, term = coefficient_names) - } - - f <- expr_interp(post_estimation) + f <- expr_interp(model_summary) # TODO: unsure of what env should be here! fn <- as_function(f, env = parent.frame()) body(fn) <- expr({ @@ -289,20 +283,33 @@ model_handler <- } else { - if (is_character(post_estimation)) { - post_estimation <- get(post_estimation, envir = parent.frame(), mode = "function") - } else if (!is_function(post_estimation)) { - stop("Please provide one sided formula, a function, or a function name to post_estimation.") + if (is_character(model_summary)) { + model_summary <- get(model_summary, envir = parent.frame(), mode = "function") + } else if (!is_function(model_summary)) { + stop("Please provide one sided formula, a function, or a function name to model_summary.") } - if("term" %in% names(formals(post_estimation))) { - results <- post_estimation(results, term = coefficient_names) - } else { - results <- post_estimation(results) - } + results <- model_summary(results) } + if("term" %in% colnames(results)) { + if (is.character(coefficient_names)) { + coefs_in_output <- coefficient_names %in% results$term + if (!all(coefs_in_output)) { + stop( + "Not all of the terms declared in your estimator are present in the model output, including ", + paste(coefficient_names[!coefs_in_output], collapse = ", "), + ".", + call. = FALSE + ) + } + results <- results[results$term %in% coefficient_names, , drop = FALSE] + } else if (is.logical(coefficient_names) && !coefficient_names) { + results <- results[which.max(results$term != "(Intercept)"), , drop = FALSE] + } + } + results } diff --git a/R/tidy_filter.R b/R/tidy_try.R similarity index 79% rename from R/tidy_filter.R rename to R/tidy_try.R index a66fd1c3..f2dd50b1 100644 --- a/R/tidy_filter.R +++ b/R/tidy_try.R @@ -18,12 +18,12 @@ #' tidy_filter(fit, term = "hp") #' tidy_filter(fit, term = c("hp", "cyl")) #' -tidy_filter <- function(fit, term = FALSE) { +tidy_try <- function(fit, term = FALSE) { if (hasS3Method("tidy", fit)) { tidy_df <- tidy(fit, conf.int = TRUE) } else { - tidy_df <- try(tidy_try(fit, conf.int = TRUE), silent = TRUE) + tidy_df <- try(tidy_try_internal(fit, conf.int = TRUE), silent = TRUE) if(inherits(tidy_df, "try-error")){ stop("We were unable to tidy the output of the function provided to 'model'. @@ -32,26 +32,10 @@ tidy_filter <- function(fit, term = FALSE) { See examples in ?declare_estimator") } } - - if (is.character(term)) { - coefs_in_output <- term %in% tidy_df$term - if (!all(coefs_in_output)) { - stop( - "Not all of the terms declared in your estimator are present in the model output, including ", - paste(term[!coefs_in_output], collapse = ", "), - ".", - call. = FALSE - ) - } - tidy_df <- tidy_df[tidy_df$term %in% term, , drop = FALSE] - } else if (is.logical(term) && !term) { - tidy_df <- tidy_df[which.max(tidy_df$term != "(Intercept)"), , drop = FALSE] - } - tidy_df } -tidy_try <- function(x, conf.int = TRUE) { +tidy_try_internal <- function(x, conf.int = TRUE) { # TODO: error checking -- are column names named as we expect val <- try({ diff --git a/man/declare_estimator.Rd b/man/declare_estimator.Rd index e3d32769..6cb1b66f 100644 --- a/man/declare_estimator.Rd +++ b/man/declare_estimator.Rd @@ -18,7 +18,7 @@ model_handler( data, ..., model = estimatr::difference_in_means, - post_estimation = tidy_filter, + model_summary = tidy_try, term = FALSE ) @@ -26,7 +26,7 @@ estimator_handler( data, ..., model = estimatr::difference_in_means, - post_estimation = tidy_filter, + model_summary = tidy_try, term = FALSE, estimand = NULL, label @@ -45,7 +45,7 @@ estimator_handler( \item{model}{A model function, e.g. lm or glm. By default, the model is the \code{\link{difference_in_means}} function from the \link{estimatr} package.} -\item{post_estimation}{A model-in data-out function to extract coefficient estimates or model summary statistics, such as \code{\link{tidy}} or \code{\link{glance}}. By default, the \code{DeclareDesign} post-estimation function \code{\link{tidy_filter}} is used, which tidies data and optionally filters to relevant coefficients.} +\item{model_summary}{A model-in data-out function to extract coefficient estimates or model summary statistics, such as \code{\link{tidy}} or \code{\link{glance}}. By default, the \code{DeclareDesign} model summary function \code{\link{tidy_try}} is used, which first attempts to use the available tidy method for the model object sent to \code{model}, then if not attempts to summarize coefficients using the \code{coef(summary())} and \code{confint} methods. If these do not exist for the model object, it fails.} \item{term}{Symbols or literal character vector of term that represent quantities of interest, i.e. Z. If FALSE, return the first non-intercept term; if TRUE return all term. To escape non-standard-evaluation use \code{!!}.} diff --git a/man/reexports.Rd b/man/reexports.Rd index 279415e0..8bf7a636 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tidy_filter.R +% Please edit documentation in R/tidy_try.R \docType{import} \name{reexports} \alias{reexports} diff --git a/man/tidy_filter.Rd b/man/tidy_try.Rd similarity index 89% rename from man/tidy_filter.Rd rename to man/tidy_try.Rd index fd725776..c71950d4 100644 --- a/man/tidy_filter.Rd +++ b/man/tidy_try.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tidy_filter.R -\name{tidy_filter} -\alias{tidy_filter} +% Please edit documentation in R/tidy_try.R +\name{tidy_try} +\alias{tidy_try} \title{Tidy Model Results and Filter to Relevant Coefficients} \usage{ -tidy_filter(fit, term = FALSE) +tidy_try(fit, term = FALSE) } \arguments{ \item{fit}{A model fit, as returned by a modeling function like lm, glm, or estimatr::lm_robust.} diff --git a/tests/testthat/test-post-estimation.R b/tests/testthat/test-model-summary.R similarity index 52% rename from tests/testthat/test-post-estimation.R rename to tests/testthat/test-model-summary.R index c7549290..b4f91ca5 100644 --- a/tests/testthat/test-post-estimation.R +++ b/tests/testthat/test-model-summary.R @@ -3,17 +3,18 @@ context("post estimation") test_that("multiple design draw_estimates", { my_designer <- function(N = 50) { my_population <- declare_population(N = N, noise = rnorm(N)) - - my_potential_outcomes <- declare_potential_outcomes(Y_Z_0 = noise, Y_Z_1 = noise + rnorm(N, mean = 2, sd = 2)) - + + my_potential_outcomes <- + declare_potential_outcomes(Y_Z_0 = noise, Y_Z_1 = noise + rnorm(N, mean = 2, sd = 2)) + my_assignment <- declare_assignment(m = 25) - + my_estimand <- declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0)) - + my_estimator <- declare_estimator(Y ~ Z, estimand = my_estimand) - + my_reveal <- declare_reveal() - + my_design <- my_population + my_potential_outcomes + @@ -21,49 +22,51 @@ test_that("multiple design draw_estimates", { my_assignment + my_reveal + my_estimator - + my_design } - + my_population <- declare_population(N = 100, noise = rnorm(N)) - - my_potential_outcomes <- declare_potential_outcomes(Y_Z_0 = noise, Y_Z_1 = noise + rnorm(N, mean = 2, sd = 2)) - + + my_potential_outcomes <- + declare_potential_outcomes(Y_Z_0 = noise, Y_Z_1 = noise + rnorm(N, mean = 2, sd = 2)) + my_assignment <- declare_assignment(m = 25) - + my_estimand <- declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0)) - + my_estimator <- declare_estimator(Y ~ Z, estimand = my_estimand) - + my_reveal <- declare_reveal() - + design_1 <- my_population + my_potential_outcomes + my_estimand + my_assignment + my_reveal + my_estimator - + my_assignment_2 <- declare_assignment(m = 50) - + design_2 <- replace_step(design_1, my_assignment, my_assignment_2) - + my_designs <- expand_design(my_designer, N = c(50, 100)) - + draw_estimands(design_1) - + draw_estimands(design_2) - + draw_estimands(design_1, design_2) - + draw_estimands(my_designs) - + draw_estimates(design_1) - + draw_estimates(design_2) - - expect_equal(draw_estimates(design_1, design_2)$design_label, c("design_1", "design_2")) - + + expect_equal(draw_estimates(design_1, design_2)$design_label, + c("design_1", "design_2")) + draw_estimates(my_designs) }) @@ -73,19 +76,19 @@ test_that("glance works", { declare_estimator( extra ~ group, model = lm_robust, - post_estimation = ~ glance(.), + model_summary = ~ glance(.), label = "formula call" ) + declare_estimator( extra ~ group, model = lm_robust, - post_estimation = glance, + model_summary = glance, label = "bare function" ) + declare_estimator( extra ~ group, model = lm_robust, - post_estimation = "glance", + model_summary = "glance", label = "string" ) @@ -109,7 +112,7 @@ test_that("glance works", { 20L), se_type = c("HC2", "HC2", "HC2") ), - row.names = c(NA, -3L), + row.names = c(NA,-3L), class = "data.frame" )) @@ -134,7 +137,7 @@ test_that("tidy works", { df = 18, outcome = "extra" ), - row.names = c(NA,-1L), + row.names = c(NA, -1L), class = "data.frame" )) @@ -144,19 +147,19 @@ test_that("tidy works", { declare_estimator( extra ~ group, model = lm_robust, - post_estimation = ~ tidy_filter(.), + model_summary = ~ tidy_try(.), label = "formula" ) + declare_estimator( extra ~ group, model = lm_robust, - post_estimation = tidy_filter, + model_summary = tidy_try, label = "bare" ) + declare_estimator( extra ~ group, model = lm_robust, - post_estimation = "tidy_filter", + model_summary = "tidy_try", label = "string" ) @@ -173,14 +176,14 @@ test_that("tidy works", { 1.86081346748685), p.value = c(0.0791867142159381, 0.0791867142159381, 0.0791867142159381), - conf.low = c(-0.203874032287598,-0.203874032287598,-0.203874032287598), + conf.low = c(-0.203874032287598, -0.203874032287598, -0.203874032287598), conf.high = c(3.3638740322876, 3.3638740322876, 3.3638740322876), df = c(18, 18, 18), outcome = c("extra", "extra", "extra") ), - row.names = c(NA,-3L), + row.names = c(NA, -3L), class = "data.frame" )) @@ -190,27 +193,21 @@ test_that("tidy works", { declare_estimator( extra ~ group, model = lm_robust, - post_estimation = ~ tidy_filter(.), + model_summary = ~ tidy_try(.), term = FALSE, label = "formula1" ) + declare_estimator( extra ~ group, model = lm_robust, - post_estimation = ~ tidy_filter(., term = FALSE), - label = "formula2" - ) + - declare_estimator( - extra ~ group, - model = lm_robust, - post_estimation = tidy_filter, + model_summary = tidy_try, term = FALSE, label = "bare" ) + declare_estimator( extra ~ group, model = lm_robust, - post_estimation = "tidy_filter", + model_summary = "tidy_try", term = FALSE, label = "string" ) @@ -218,44 +215,24 @@ test_that("tidy works", { est <- draw_estimates(des) expect_equal(est, structure( list( - estimator_label = c("formula1", "formula2", "bare", - "string"), - term = c("group2", "group2", "group2", "group2"), - estimate = c(1.58, 1.58, 1.58, 1.58), - std.error = c( - 0.849091017238762, - 0.849091017238762, - 0.849091017238762, - 0.849091017238762 - ), - statistic = c( - 1.86081346748685, - 1.86081346748685, - 1.86081346748685, - 1.86081346748685 - ), - p.value = c( - 0.0791867142159381, - 0.0791867142159381, - 0.0791867142159381, - 0.0791867142159381 - ), - conf.low = c( - -0.203874032287598,-0.203874032287598, - -0.203874032287598, - -0.203874032287598 - ), - conf.high = c( - 3.3638740322876, - 3.3638740322876, - 3.3638740322876, - 3.3638740322876 - ), - df = c(18, 18, 18, 18), - outcome = c("extra", - "extra", "extra", "extra") + estimator_label = c("formula1", "bare", "string"), + term = c("group2", "group2", "group2"), + estimate = c(1.58, + 1.58, 1.58), + std.error = c(0.849091017238762, 0.849091017238762, + 0.849091017238762), + statistic = c(1.86081346748685, 1.86081346748685, + 1.86081346748685), + p.value = c(0.0791867142159381, 0.0791867142159381, + 0.0791867142159381), + conf.low = c(-0.203874032287598,-0.203874032287598,-0.203874032287598), + conf.high = c(3.3638740322876, 3.3638740322876, + 3.3638740322876), + df = c(18, 18, 18), + outcome = c("extra", "extra", + "extra") ), - row.names = c(NA,-4L), + row.names = c(NA,-3L), class = "data.frame" )) @@ -265,27 +242,21 @@ test_that("tidy works", { declare_estimator( extra ~ group, model = lm_robust, - post_estimation = ~ tidy_filter(., term = TRUE), - label = "formula1" - ) + - declare_estimator( - extra ~ group, - model = lm_robust, - post_estimation = ~ tidy_filter(.), + model_summary = ~ tidy_try(.), term = TRUE, label = "formula2" ) + declare_estimator( extra ~ group, model = lm_robust, - post_estimation = tidy_filter, + model_summary = tidy_try, term = TRUE, label = "bare" ) + declare_estimator( extra ~ group, model = lm_robust, - post_estimation = "tidy_filter", + model_summary = "tidy_try", term = TRUE, label = "string" ) @@ -293,36 +264,24 @@ test_that("tidy works", { est <- draw_estimates(des) expect_equal(est, structure( list( - estimator_label = c( - "formula1", - "formula1", - "formula2", - "formula2", - "bare", - "bare", - "string", - "string" - ), + estimator_label = c("formula2", "formula2", "bare", + "bare", "string", "string"), term = c( "(Intercept)", "group2", "(Intercept)", "group2", "(Intercept)", - "group2", - "(Intercept)", "group2" ), - estimate = c(0.75, 1.58, 0.75, 1.58, 0.75, 1.58, 0.75, - 1.58), + estimate = c(0.75, + 1.58, 0.75, 1.58, 0.75, 1.58), std.error = c( 0.565734527455728, 0.849091017238762, 0.565734527455728, 0.849091017238762, 0.565734527455728, - 0.849091017238762, - 0.565734527455728, 0.849091017238762 ), statistic = c( @@ -331,8 +290,6 @@ test_that("tidy works", { 1.32571014071382, 1.86081346748685, 1.32571014071382, - 1.86081346748685, - 1.32571014071382, 1.86081346748685 ), p.value = c( @@ -341,17 +298,13 @@ test_that("tidy works", { 0.201515544020674, 0.0791867142159381, 0.201515544020674, - 0.0791867142159381, - 0.201515544020674, 0.0791867142159381 ), conf.low = c( -0.438564137657087,-0.203874032287598, -0.438564137657087, -0.203874032287598, - -0.438564137657087,-0.203874032287598, - -0.438564137657087, - -0.203874032287598 + -0.438564137657087,-0.203874032287598 ), conf.high = c( 1.93856413765709, @@ -359,23 +312,13 @@ test_that("tidy works", { 1.93856413765709, 3.3638740322876, 1.93856413765709, - 3.3638740322876, - 1.93856413765709, 3.3638740322876 ), - df = c(18, 18, 18, 18, 18, 18, 18, 18), - outcome = c( - "extra", - "extra", - "extra", - "extra", - "extra", - "extra", - "extra", - "extra" - ) + df = c(18, 18, 18, 18, 18, 18), + outcome = c("extra", "extra", + "extra", "extra", "extra", "extra") ), - row.names = c(NA,-8L), + row.names = c(NA,-6L), class = "data.frame" )) @@ -385,88 +328,105 @@ test_that("tidy works", { declare_estimator( extra ~ group, model = lm_robust, - post_estimation = ~ tidy_filter(., term = "group2"), - label = "formula1" - ) + - declare_estimator( - extra ~ group, - model = lm_robust, - post_estimation = ~ tidy_filter(.), + model_summary = ~ tidy_try(.), term = "group2", label = "formula2" ) + declare_estimator( extra ~ group, model = lm_robust, - post_estimation = tidy_filter, + model_summary = tidy_try, term = "group2", label = "bare" ) + declare_estimator( extra ~ group, model = lm_robust, - post_estimation = "tidy_filter", + model_summary = "tidy_try", term = "group2", label = "string" ) est <- draw_estimates(des) - expect_equal(est, structure(list(estimator_label = c("formula1", "formula2", "bare", - "string"), term = c("group2", "group2", "group2", "group2"), - estimate = c(1.58, 1.58, 1.58, 1.58), std.error = c(0.849091017238762, - 0.849091017238762, 0.849091017238762, 0.849091017238762), - statistic = c(1.86081346748685, 1.86081346748685, 1.86081346748685, - 1.86081346748685), p.value = c(0.0791867142159381, 0.0791867142159381, - 0.0791867142159381, 0.0791867142159381), conf.low = c(-0.203874032287598, - -0.203874032287598, -0.203874032287598, -0.203874032287598 - ), conf.high = c(3.3638740322876, 3.3638740322876, 3.3638740322876, - 3.3638740322876), df = c(18, 18, 18, 18), outcome = c("extra", - "extra", "extra", "extra")), row.names = c(NA, -4L), class = "data.frame")) + expect_equal(est, structure( + list( + estimator_label = c("formula2", "bare", "string"), + term = c("group2", "group2", "group2"), + estimate = c(1.58, + 1.58, 1.58), + std.error = c(0.849091017238762, 0.849091017238762, + 0.849091017238762), + statistic = c(1.86081346748685, 1.86081346748685, + 1.86081346748685), + p.value = c(0.0791867142159381, 0.0791867142159381, + 0.0791867142159381), + conf.low = c(-0.203874032287598,-0.203874032287598,-0.203874032287598), + conf.high = c(3.3638740322876, 3.3638740322876, + 3.3638740322876), + df = c(18, 18, 18), + outcome = c("extra", "extra", + "extra") + ), + row.names = c(NA,-3L), + class = "data.frame" + )) # select them manually des <- declare_population(data = sleep) + - # does not work (intentionally) + # does not work (intentionally) # declare_estimator( # extra ~ group, # model = lm_robust, - # post_estimation = ~ tidy_filter(., term = group2), + # model_summary = ~ tidy_try(., term = group2), # label = "formula1" # ) + declare_estimator( extra ~ group, model = lm_robust, - post_estimation = ~ tidy_filter(.), + model_summary = ~ tidy_try(.), term = group2, label = "formula2" ) + declare_estimator( extra ~ group, model = lm_robust, - post_estimation = tidy_filter, + model_summary = tidy_try, term = group2, label = "bare" ) + declare_estimator( extra ~ group, model = lm_robust, - post_estimation = "tidy_filter", + model_summary = "tidy_try", term = group2, label = "string" ) est <- draw_estimates(des) - expect_equal(est, structure(list(estimator_label = c("formula2", "bare", "string" - ), term = c("group2", "group2", "group2"), estimate = c(1.58, - 1.58, 1.58), std.error = c(0.849091017238762, 0.849091017238762, - 0.849091017238762), statistic = c(1.86081346748685, 1.86081346748685, - 1.86081346748685), p.value = c(0.0791867142159381, 0.0791867142159381, - 0.0791867142159381), conf.low = c(-0.203874032287598, -0.203874032287598, - -0.203874032287598), conf.high = c(3.3638740322876, 3.3638740322876, - 3.3638740322876), df = c(18, 18, 18), outcome = c("extra", "extra", - "extra")), row.names = c(NA, -3L), class = "data.frame")) + expect_equal(est, structure( + list( + estimator_label = c("formula2", "bare", "string"), + term = c("group2", "group2", "group2"), + estimate = c(1.58, + 1.58, 1.58), + std.error = c(0.849091017238762, 0.849091017238762, + 0.849091017238762), + statistic = c(1.86081346748685, 1.86081346748685, + 1.86081346748685), + p.value = c(0.0791867142159381, 0.0791867142159381, + 0.0791867142159381), + conf.low = c(-0.203874032287598,-0.203874032287598,-0.203874032287598), + conf.high = c(3.3638740322876, 3.3638740322876, + 3.3638740322876), + df = c(18, 18, 18), + outcome = c("extra", "extra", + "extra") + ), + row.names = c(NA,-3L), + class = "data.frame" + )) }) - From b26c2b14150f11b21d8e6b36d3adfe8e2163ab6f Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Wed, 29 Apr 2020 15:01:36 -0700 Subject: [PATCH 7/9] news --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 3112eeb7..32ce5935 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # DeclareDesign 0.23.0 -* Add `declare_test` to enable hypothesis testing where no estimand is targeted. +* Add `model_summary` option to `declare_estimator`, to enable specifying a model and then a separate post-estimation function to extract coefficient estimates (e.g., estimate of a treatment effect) or model summary statistics (e.g., R^2 or the result of an F-test from a regression). +* Add `declare_test` to enable hypothesis testing where no estimand is targeted. For example, `declare_test` could be used for a K-S test of distributional equality and `declare_estimator` for a difference-in-means estimate of an average treatment effect. # DeclareDesign 0.22.0 From d0be7e79e22225e960df5ffe3dbd7bea63d23f88 Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Wed, 29 Apr 2020 15:24:24 -0700 Subject: [PATCH 8/9] cleanup --- R/declare_test.R | 39 ++++++++++++++++++++++++++-------- R/design_print_summary.R | 4 +--- R/tidy_try.R | 7 ++---- man/declare_design.Rd | 45 +-------------------------------------- man/declare_test.Rd | 39 ++++++++++++++++++++++++++-------- man/post_design.Rd | 46 +++++++++++++++++++++++++++++++++++++++- man/tidy_try.Rd | 7 ++---- 7 files changed, 111 insertions(+), 76 deletions(-) diff --git a/R/declare_test.R b/R/declare_test.R index 5ace9e7d..613d5aea 100644 --- a/R/declare_test.R +++ b/R/declare_test.R @@ -12,15 +12,36 @@ #' #' @examples #' -#' # balance_test_design <- -#' # declare_population(N = 100, cov1 = rnorm(N), cov2 = rnorm(N), cov3 = rnorm(N)) + -#' # declare_assignment(prob = 0.2) + -#' # declare_test(Z ~ cov1 + cov2 + cov3, model = lm_robust, post_estimation = glance) -#' # -#' # diagnosis <- diagnose_design( -#' # design = balance_test_design, -#' # diagnosands = declare_diagnosands(false_positive_rate = mean(p.value <= 0.05), keep_defaults = FALSE) -#' # ) +#' # Balance test F test +#' +#' balance_test_design <- +#' declare_population(N = 100, cov1 = rnorm(N), cov2 = rnorm(N), cov3 = rnorm(N)) + +#' declare_assignment(prob = 0.2) + +#' declare_test(Z ~ cov1 + cov2 + cov3, model = lm_robust, post_estimation = glance) +#' +#' diagnosis <- diagnose_design( +#' design = balance_test_design, +#' diagnosands = declare_diagnosands(false_positive_rate = mean(p.value <= 0.05), keep_defaults = FALSE) +#' ) +#' +#' # K-S test of distributional equality +#' +#' ks_test <- function(data) { +#' test <- with(data, ks.test(x = Y[Z == 1], y = Y[Z == 0])) +#' data.frame(statistic = test$statistic, p.value = test$p.value) +#' } +#' +#' distributional_equality_design <- +#' declare_population(N = 100) + +#' declare_potential_outcomes(Y_Z_1 = rnorm(N), Y_Z_0 = rnorm(N, sd = 1.5)) + +#' declare_assignment(prob = 0.5) + +#' declare_reveal(Y, Z) + +#' declare_test(handler = tidy_test(ks_test), label = "ks-test") +#' +#' diagnosis <- diagnose_design( +#' design = distributional_equality_design, +#' diagnosands = declare_diagnosands(select = power) +#' ) #' #' # Thanks to Jake Bowers for this example #' diff --git a/R/design_print_summary.R b/R/design_print_summary.R index 150922f0..9925d57b 100644 --- a/R/design_print_summary.R +++ b/R/design_print_summary.R @@ -1,7 +1,7 @@ #' @param x a design object, typically created using the + operator -#' @rdname declare_design +#' @rdname post_design #' @export print.design <- function(x, verbose = TRUE, ...) { print(summary(x, verbose = verbose, ... = ...)) @@ -11,8 +11,6 @@ print.design <- function(x, verbose = TRUE, ...) { #' @param verbose an indicator for printing a long summary of the design, defaults to \code{TRUE} #' @param ... optional arguments to be sent to summary function #' -#' @rdname declare_design -#' #' @examples #' #' my_population <- declare_population(N = 500, noise = rnorm(N)) diff --git a/R/tidy_try.R b/R/tidy_try.R index f2dd50b1..c8121a8a 100644 --- a/R/tidy_try.R +++ b/R/tidy_try.R @@ -11,12 +11,9 @@ #' #' @examples #' -#' fit <- lm_robust(mpg ~ hp + disp + cyl, data = mtcars) +#' fit <- lm(mpg ~ hp + disp + cyl, data = mtcars) #' -#' tidy_filter(fit) -#' tidy_filter(fit, term = TRUE) -#' tidy_filter(fit, term = "hp") -#' tidy_filter(fit, term = c("hp", "cyl")) +#' tidy_try(fit) #' tidy_try <- function(fit, term = FALSE) { diff --git a/man/declare_design.Rd b/man/declare_design.Rd index 32fb72f3..e0874450 100644 --- a/man/declare_design.Rd +++ b/man/declare_design.Rd @@ -1,30 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/construct_design.R, R/design_print_summary.R +% Please edit documentation in R/construct_design.R \name{declare_design} \alias{declare_design} \alias{+.dd} -\alias{print.design} -\alias{summary.design} \title{Declare a design} \usage{ \method{+}{dd}(lhs, rhs) - -\method{print}{design}(x, verbose = TRUE, ...) - -\method{summary}{design}(object, verbose = TRUE, ...) } \arguments{ \item{lhs}{A step in a research design, beginning with a function that draws the population. Steps are evaluated sequentially. With the exception of the first step, all steps must be functions that take a \code{data.frame} as an argument and return a \code{data.frame}. Typically, many steps are declared using the \code{declare_} functions, i.e., \code{\link{declare_population}}, \code{\link{declare_population}}, \code{\link{declare_sampling}}, \code{\link{declare_potential_outcomes}}, \code{\link{declare_estimand}}, \code{\link{declare_assignment}}, and \code{\link{declare_estimator}}.} \item{rhs}{A second step in a research design} - -\item{x}{a design object, typically created using the + operator} - -\item{verbose}{an indicator for printing a long summary of the design, defaults to \code{TRUE}} - -\item{...}{optional arguments to be sent to summary function} - -\item{object}{a design object created using the + operator} } \value{ a list of two functions, the \code{design_function} and the \code{data_function}. The \code{design_function} runs the design once, i.e. draws the data and calculates any estimates and estimands defined in \code{...}, returned separately as two \code{data.frame}'s. The \code{data_function} runs the design once also, but only returns the final data. @@ -95,33 +81,4 @@ diagnosis <- diagnose_design(design) summary(diagnosis) } - -my_population <- declare_population(N = 500, noise = rnorm(N)) - -my_potential_outcomes <- declare_potential_outcomes( - Y_Z_0 = noise, Y_Z_1 = noise + - rnorm(N, mean = 2, sd = 2)) - -my_sampling <- declare_sampling(n = 250) - -my_assignment <- declare_assignment(m = 25) - -my_estimand <- declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0)) - -my_estimator <- declare_estimator(Y ~ Z, estimand = my_estimand) - -my_mutate <- declare_step(dplyr::mutate, noise_sq = noise ^ 2) - -my_reveal <- declare_reveal() - -design <- my_population + - my_potential_outcomes + - my_sampling + - my_estimand + - my_mutate + - my_assignment + - my_reveal + - my_estimator - -summary(design) } diff --git a/man/declare_test.Rd b/man/declare_test.Rd index 255f6726..e96d7f87 100644 --- a/man/declare_test.Rd +++ b/man/declare_test.Rd @@ -31,15 +31,36 @@ Use of \code{declare_test} is identical to use of \code{\link{declare_estimator} } \examples{ -# balance_test_design <- -# declare_population(N = 100, cov1 = rnorm(N), cov2 = rnorm(N), cov3 = rnorm(N)) + -# declare_assignment(prob = 0.2) + -# declare_test(Z ~ cov1 + cov2 + cov3, model = lm_robust, post_estimation = glance) -# -# diagnosis <- diagnose_design( -# design = balance_test_design, -# diagnosands = declare_diagnosands(false_positive_rate = mean(p.value <= 0.05), keep_defaults = FALSE) -# ) +# Balance test F test + +balance_test_design <- + declare_population(N = 100, cov1 = rnorm(N), cov2 = rnorm(N), cov3 = rnorm(N)) + + declare_assignment(prob = 0.2) + + declare_test(Z ~ cov1 + cov2 + cov3, model = lm_robust, post_estimation = glance) + +diagnosis <- diagnose_design( + design = balance_test_design, + diagnosands = declare_diagnosands(false_positive_rate = mean(p.value <= 0.05), keep_defaults = FALSE) +) + +# K-S test of distributional equality + +ks_test <- function(data) { + test <- with(data, ks.test(x = Y[Z == 1], y = Y[Z == 0])) + data.frame(statistic = test$statistic, p.value = test$p.value) +} + +distributional_equality_design <- + declare_population(N = 100) + + declare_potential_outcomes(Y_Z_1 = rnorm(N), Y_Z_0 = rnorm(N, sd = 1.5)) + + declare_assignment(prob = 0.5) + + declare_reveal(Y, Z) + + declare_test(handler = tidy_test(ks_test), label = "ks-test") + +diagnosis <- diagnose_design( + design = distributional_equality_design, + diagnosands = declare_diagnosands(select = power) +) # Thanks to Jake Bowers for this example diff --git a/man/post_design.Rd b/man/post_design.Rd index 42440866..6a0f6bc0 100644 --- a/man/post_design.Rd +++ b/man/post_design.Rd @@ -1,14 +1,29 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/design_helper_functions.R +% Please edit documentation in R/design_helper_functions.R, +% R/design_print_summary.R \name{post_design} \alias{post_design} \alias{print_code} +\alias{print.design} +\alias{summary.design} \title{Explore your design} \usage{ print_code(design) + +\method{print}{design}(x, verbose = TRUE, ...) + +\method{summary}{design}(object, verbose = TRUE, ...) } \arguments{ \item{design}{A design object, typically created using the + operator} + +\item{x}{a design object, typically created using the + operator} + +\item{verbose}{an indicator for printing a long summary of the design, defaults to \code{TRUE}} + +\item{...}{optional arguments to be sent to summary function} + +\item{object}{a design object created using the + operator} } \description{ Explore your design @@ -45,4 +60,33 @@ my_design <- my_population + my_assignment print_code(my_design) + +my_population <- declare_population(N = 500, noise = rnorm(N)) + +my_potential_outcomes <- declare_potential_outcomes( + Y_Z_0 = noise, Y_Z_1 = noise + + rnorm(N, mean = 2, sd = 2)) + +my_sampling <- declare_sampling(n = 250) + +my_assignment <- declare_assignment(m = 25) + +my_estimand <- declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0)) + +my_estimator <- declare_estimator(Y ~ Z, estimand = my_estimand) + +my_mutate <- declare_step(dplyr::mutate, noise_sq = noise ^ 2) + +my_reveal <- declare_reveal() + +design <- my_population + + my_potential_outcomes + + my_sampling + + my_estimand + + my_mutate + + my_assignment + + my_reveal + + my_estimator + +summary(design) } diff --git a/man/tidy_try.Rd b/man/tidy_try.Rd index c71950d4..d9e47c6a 100644 --- a/man/tidy_try.Rd +++ b/man/tidy_try.Rd @@ -19,11 +19,8 @@ Tidy function that returns a tidy data.frame of model results and allows filteri } \examples{ -fit <- lm_robust(mpg ~ hp + disp + cyl, data = mtcars) +fit <- lm(mpg ~ hp + disp + cyl, data = mtcars) -tidy_filter(fit) -tidy_filter(fit, term = TRUE) -tidy_filter(fit, term = "hp") -tidy_filter(fit, term = c("hp", "cyl")) +tidy_try(fit) } From 9c59e9a30850a0bf3e2f3cac0968ff56e9df7105 Mon Sep 17 00:00:00 2001 From: Graeme Blair Date: Wed, 29 Apr 2020 15:56:44 -0700 Subject: [PATCH 9/9] examples --- R/declare_test.R | 2 +- man/declare_test.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/declare_test.R b/R/declare_test.R index 613d5aea..b86be955 100644 --- a/R/declare_test.R +++ b/R/declare_test.R @@ -17,7 +17,7 @@ #' balance_test_design <- #' declare_population(N = 100, cov1 = rnorm(N), cov2 = rnorm(N), cov3 = rnorm(N)) + #' declare_assignment(prob = 0.2) + -#' declare_test(Z ~ cov1 + cov2 + cov3, model = lm_robust, post_estimation = glance) +#' declare_test(Z ~ cov1 + cov2 + cov3, model = lm_robust, model_summary = glance) #' #' diagnosis <- diagnose_design( #' design = balance_test_design, diff --git a/man/declare_test.Rd b/man/declare_test.Rd index e96d7f87..49fff34c 100644 --- a/man/declare_test.Rd +++ b/man/declare_test.Rd @@ -36,7 +36,7 @@ Use of \code{declare_test} is identical to use of \code{\link{declare_estimator} balance_test_design <- declare_population(N = 100, cov1 = rnorm(N), cov2 = rnorm(N), cov3 = rnorm(N)) + declare_assignment(prob = 0.2) + - declare_test(Z ~ cov1 + cov2 + cov3, model = lm_robust, post_estimation = glance) + declare_test(Z ~ cov1 + cov2 + cov3, model = lm_robust, model_summary = glance) diagnosis <- diagnose_design( design = balance_test_design,