Merge 9c59e9a into 890ae56

DeclareDesign · Apr 29, 2020 · 8835fb4 · 8835fb4
2 parents 890ae56 + 9c59e9a
commit 8835fb4
Show file tree

Hide file tree

Showing 15 changed files with 724 additions and 249 deletions.
diff --git a/DeclareDesign.Rproj b/DeclareDesign.Rproj
@@ -1,8 +1,8 @@
 Version: 1.0
 
-RestoreWorkspace: Default
-SaveWorkspace: Default
-AlwaysSaveHistory: Default
+RestoreWorkspace: No
+SaveWorkspace: No
+AlwaysSaveHistory: No
 
 EnableCodeIndexing: Yes
 UseSpacesForTab: Yes
@@ -16,3 +16,6 @@ BuildType: Package
 PackageUseDevtools: Yes
 PackageInstallArgs: --no-multiarch --with-keep.source
 PackageRoxygenize: rd,collate,namespace
+
+QuitChildProcessesOnExit: Yes
+DisableExecuteRprofile: Yes
diff --git a/NAMESPACE b/NAMESPACE
@@ -60,6 +60,7 @@ export(simulate_designs)
 export(tidy)
 export(tidy_estimator)
 export(tidy_test)
+export(tidy_try)
 importFrom(estimatr,difference_in_means)
 importFrom(fabricatr,add_level)
 importFrom(fabricatr,fabricate)
@@ -75,26 +76,34 @@ importFrom(rlang,"!!!")
 importFrom(rlang,"!!")
 importFrom(rlang,"%||%")
 importFrom(rlang,":=")
+importFrom(rlang,as_function)
 importFrom(rlang,as_list)
 importFrom(rlang,as_quosure)
 importFrom(rlang,call_args)
+importFrom(rlang,call_args_names)
 importFrom(rlang,call_modify)
 importFrom(rlang,call_name)
+importFrom(rlang,empty_env)
 importFrom(rlang,enexpr)
 importFrom(rlang,enquo)
 importFrom(rlang,enquos)
 importFrom(rlang,env_clone)
+importFrom(rlang,eval_bare)
 importFrom(rlang,eval_tidy)
+importFrom(rlang,expr)
 importFrom(rlang,expr_deparse)
+importFrom(rlang,expr_interp)
 importFrom(rlang,expr_name)
 importFrom(rlang,expr_text)
 importFrom(rlang,f_env)
 importFrom(rlang,f_rhs)
 importFrom(rlang,f_text)
 importFrom(rlang,is_bare_integerish)
 importFrom(rlang,is_call)
+importFrom(rlang,is_character)
 importFrom(rlang,is_empty)
 importFrom(rlang,is_formula)
+importFrom(rlang,is_function)
 importFrom(rlang,is_list)
 importFrom(rlang,is_missing)
 importFrom(rlang,is_null)

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,7 @@
 # DeclareDesign 0.23.0
 
-* Add `declare_test` to enable hypothesis testing where no estimand is targeted.
+* Add `model_summary` option to `declare_estimator`, to enable specifying a model and then a separate post-estimation function to extract coefficient estimates (e.g., estimate of a treatment effect) or model summary statistics (e.g., R^2 or the result of an F-test from a regression).
+* Add `declare_test` to enable hypothesis testing where no estimand is targeted. For example, `declare_test` could be used for a K-S test of distributional equality and `declare_estimator` for a difference-in-means estimate of an average treatment effect.
 
 # DeclareDesign 0.22.0
 

diff --git a/R/declare_estimator.R b/R/declare_estimator.R
@@ -1,6 +1,8 @@
 #' Declare estimator
 #'
 #' @description Declares an estimator which generates estimates and associated statistics.
+#' 
+#' Use of \code{declare_test} is identical to use of \code{\link{declare_estimator}}. Use \code{declare_test} for hypothesis testing with no specific estimand in mind; use \code{declare_estimator} for hypothesis testing when you can link each estimate to an estimand. For example, \code{declare_test} could be used for a K-S test of distributional equality and \code{declare_estimator} for a difference-in-means estimate of an average treatment effect.
 #'
 #' @inheritParams declare_internal_inherit_params
 #'
@@ -244,12 +246,15 @@ tidy_estimator <- function(fn) {
 
 #' @param data a data.frame
 #' @param model A model function, e.g. lm or glm. By default, the model is the \code{\link{difference_in_means}} function from the \link{estimatr} package.
+#' @param model_summary A model-in data-out function to extract coefficient estimates or model summary statistics, such as \code{\link{tidy}} or \code{\link{glance}}. By default, the \code{DeclareDesign} model summary function \code{\link{tidy_try}} is used, which first attempts to use the available tidy method for the model object sent to \code{model}, then if not attempts to summarize coefficients using the \code{coef(summary())} and \code{confint} methods. If these do not exist for the model object, it fails.
 #' @param term Symbols or literal character vector of term that represent quantities of interest, i.e. Z. If FALSE, return the first non-intercept term; if TRUE return all term. To escape non-standard-evaluation use \code{!!}.
 #' @rdname declare_estimator
+#' @importFrom rlang is_formula call_modify call_args_names expr_interp as_function expr quo eval_bare eval_tidy is_character is_function empty_env
 model_handler <-
   function(data,
              ...,
              model = estimatr::difference_in_means,
+             model_summary = tidy_try,
              term = FALSE) {
     coefficient_names <-
       enquo(term) # forces evaluation of quosure
@@ -260,10 +265,53 @@ model_handler <-
     # todo special case weights offsets for glm etc?
 
     results <- eval_tidy(quo(model(!!!args, data = data)))
-
-    results <- fit2tidy(results, coefficient_names)
-
+
+    # following copied from dplyr:::as_inlined_function and dplyr:::as_fun_list
+
+    if(is_formula(model_summary)) {
+
+      f <- expr_interp(model_summary)
+      # TODO: unsure of what env should be here!
+      fn <- as_function(f, env = parent.frame())
+      body(fn) <- expr({
+        pairlist(...)
+        `_quo` <- quo(!!body(fn))
+        eval_bare(`_quo`, parent.frame())
+      })
+
+      results <- eval_tidy(fn(results))
+
+    } else {
+
+      if (is_character(model_summary)) {
+        model_summary <- get(model_summary, envir = parent.frame(), mode = "function")
+      } else if (!is_function(model_summary)) {
+        stop("Please provide one sided formula, a function, or a function name to model_summary.")
+      }
+
+      results <- model_summary(results)
+
+    }
+
+    if("term" %in% colnames(results)) {
+      if (is.character(coefficient_names)) {
+        coefs_in_output <- coefficient_names %in% results$term
+        if (!all(coefs_in_output)) {
+          stop(
+            "Not all of the terms declared in your estimator are present in the model output, including ",
+            paste(coefficient_names[!coefs_in_output], collapse = ", "),
+            ".",
+            call. = FALSE
+          )
+        }
+        results <- results[results$term %in% coefficient_names, , drop = FALSE]
+      } else if (is.logical(coefficient_names) && !coefficient_names) {
+        results <- results[which.max(results$term != "(Intercept)"), , drop = FALSE]
+      }
+    }
+
     results
+
   }
 
 validation_fn(model_handler) <- function(ret, dots, label) {
@@ -288,109 +336,6 @@ validation_fn(model_handler) <- function(ret, dots, label) {
 #' @rdname declare_estimator
 estimator_handler <- tidy_estimator(model_handler)
 
-#' @importFrom generics tidy
-#' @export
-generics::tidy
-
-tidy_default <- function(x, conf.int = TRUE) {
-  # TODO: error checking -- are column names named as we expect
-
-  val <- try({
-    summ <- coef(summary(x))
-
-    if(conf.int == TRUE) {
-      ci <- suppressMessages(as.data.frame(confint(x)))
-      tidy_df <-
-        data.frame(
-          term = rownames(summ),
-          summ,
-          ci,
-          stringsAsFactors = FALSE,
-          row.names = NULL
-        )
-      colnames(tidy_df) <-
-        c(
-          "term",
-          "estimate",
-          "std.error",
-          "statistic",
-          "p.value",
-          "conf.low",
-          "conf.high"
-        )
-    } else {
-      tidy_df <-
-        data.frame(
-          term = rownames(summ),
-          summ,
-          ci,
-          stringsAsFactors = FALSE,
-          row.names = NULL
-        )
-      colnames(tidy_df) <-
-        c(
-          "term",
-          "estimate",
-          "std.error",
-          "statistic",
-          "p.value"
-        )
-    }
-
-  }, silent = TRUE)
-
-  if(class(val) == "try-error"){
-    stop("The default tidy method for the model fit of class ", class(x), " failed. You may try installing and loading the broom package, or you can write your own tidy.", class(x), " method.", call. = FALSE)
-  }
-
-  tidy_df
-}
-
-#' @importFrom utils getS3method
-hasS3Method <- function(f, obj) {
-  for(i in class(obj)) {
-    get_function <- try(getS3method(f, i), silent = TRUE)
-    if(class(get_function) != "try-error" && is.function(get_function)) return(TRUE)
-  }
-  FALSE
-}
-
-# called by model_handler, resets columns names !!!
-fit2tidy <- function(fit, term = FALSE) {
-
-  # browser()
-  if (hasS3Method("tidy", fit)) {
-    tidy_df <- tidy(fit, conf.int = TRUE)
-  } else {
-    tidy_df <- try(tidy_default(fit, conf.int = TRUE), silent = TRUE)
-
-    if(class(tidy_df) == "try-error"){
-      stop("We were unable to tidy the output of the function provided to 'model'. 
-           It is possible that the broom package has a tidier for that object type. 
-           If not, you can use a custom estimator to 'estimator_function'.
-           See examples in ?declare_estimator")
-    }
-  }
-
-  if (is.character(term)) {
-    coefs_in_output <- term %in% tidy_df$term
-    if (!all(coefs_in_output)) {
-      stop(
-        "Not all of the terms declared in your estimator are present in the model output, including ",
-        paste(term[!coefs_in_output], collapse = ", "),
-        ".",
-        call. = FALSE
-      )
-    }
-    tidy_df <- tidy_df[tidy_df$term %in% term, , drop = FALSE]
-  } else if (is.logical(term) && !term) {
-    tidy_df <-
-      tidy_df[which.max(tidy_df$term != "(Intercept)"), , drop = FALSE]
-  }
-
-  tidy_df
-}
-
 # helper methods for estimand=my_estimand arguments to estimator_handler
 #
 get_estimand_label <- function(estimand) {

diff --git a/R/declare_test.R b/R/declare_test.R
@@ -1,6 +1,8 @@
 #' Declare test
 #'
-#' @description Declares an test which generates a test statistic and associated inferential statistics. Use of \code{declare_test} is identical to use of \code{\link{declare_estimator}}. Use \code{declare_test} for hypothesis testing with no specific estimand in mind; use \code{declare_estimator} for hypothesis testing when you can link each estimate to an estimand.
+#' @description Declares an test which generates a test statistic and associated inferential statistics. 
+#' 
+#' Use of \code{declare_test} is identical to use of \code{\link{declare_estimator}}. Use \code{declare_test} for hypothesis testing with no specific estimand in mind; use \code{declare_estimator} for hypothesis testing when you can link each estimate to an estimand. For example, \code{declare_test} could be used for a K-S test of distributional equality and \code{declare_estimator} for a difference-in-means estimate of an average treatment effect.
 #'
 #' @inheritParams declare_internal_inherit_params
 #'
@@ -10,15 +12,36 @@
 #'
 #' @examples
 #' 
-#' # balance_test_design <- 
-#' #   declare_population(N = 100, cov1 = rnorm(N), cov2 = rnorm(N), cov3 = rnorm(N)) +
-#' #   declare_assignment(prob = 0.2) + 
-#' #   declare_test(Z ~ cov1 + cov2 + cov3, model = lm_robust, post_estimation = glance)
-#' #   
-#' # diagnosis <- diagnose_design(
-#' #   design = balance_test_design,
-#' #   diagnosands = declare_diagnosands(false_positive_rate = mean(p.value <= 0.05), keep_defaults = FALSE)
-#' # )
+#' # Balance test F test
+#' 
+#' balance_test_design <-
+#'   declare_population(N = 100, cov1 = rnorm(N), cov2 = rnorm(N), cov3 = rnorm(N)) +
+#'   declare_assignment(prob = 0.2) +
+#'   declare_test(Z ~ cov1 + cov2 + cov3, model = lm_robust, model_summary = glance)
+#'
+#' diagnosis <- diagnose_design(
+#'   design = balance_test_design,
+#'   diagnosands = declare_diagnosands(false_positive_rate = mean(p.value <= 0.05), keep_defaults = FALSE)
+#' )
+#' 
+#' # K-S test of distributional equality
+#' 
+#' ks_test <- function(data) {
+#'   test <- with(data, ks.test(x = Y[Z == 1], y = Y[Z == 0]))
+#'   data.frame(statistic = test$statistic, p.value = test$p.value)
+#' }
+#' 
+#' distributional_equality_design <-
+#'   declare_population(N = 100) + 
+#'   declare_potential_outcomes(Y_Z_1 = rnorm(N), Y_Z_0 = rnorm(N, sd = 1.5)) + 
+#'   declare_assignment(prob = 0.5) + 
+#'   declare_reveal(Y, Z) + 
+#'   declare_test(handler = tidy_test(ks_test), label = "ks-test")
+#'   
+#' diagnosis <- diagnose_design(
+#'   design = distributional_equality_design,
+#'   diagnosands = declare_diagnosands(select = power)
+#' ) 
 #' 
 #' # Thanks to Jake Bowers for this example
 #' 

diff --git a/R/design_print_summary.R b/R/design_print_summary.R
@@ -1,7 +1,7 @@
 
 
 #' @param x a design object, typically created using the + operator
-#' @rdname declare_design
+#' @rdname post_design
 #' @export
 print.design <- function(x, verbose = TRUE, ...) {
   print(summary(x, verbose = verbose, ... = ...))
@@ -11,8 +11,6 @@ print.design <- function(x, verbose = TRUE, ...) {
 #' @param verbose an indicator for printing a long summary of the design, defaults to \code{TRUE}
 #' @param ... optional arguments to be sent to summary function
 #'
-#' @rdname declare_design
-#'
 #' @examples
 #'
 #' my_population <- declare_population(N = 500, noise = rnorm(N))