Improvements of consistency for post-hoc hypothesis test tidiers (#788)

* Adds tidy-method form summary_emm-objects. * Updates NEWS * Apply suggestions from code review Co-Authored-By: alex hayes <alexpghayes@gmail.com> * Makes requested changes for emmeans tidiers. (#691) - Adds joint_tests()-example - Removes strict = FALSE from tests * Roxygenizes documentation. * Improves consistency of post-hoc comparison tidies (i.e. glht, stats::TukeyHSD, and emmeans). See #692 * Fixes failing tests and roxygenizes. * Adds NEWS entry. * Fixes more failing tests. * Fix bug in emmeans and multcomp examples. * Implement revision suggested by @alexpghayes Co-Authored-By: alex hayes <alexpghayes@gmail.com> Co-authored-by: alex hayes <alexpghayes@gmail.com>
tidymodels · Apr 23, 2020 · d0f65b6 · d0f65b6
1 parent bc5587a
commit d0f65b6
Show file tree

Hide file tree

Showing 15 changed files with 332 additions and 105 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -21,6 +21,9 @@ This release features a number of unannounced hard-deprecations. I am sorry that
 
 - All `conf.int` arguments now default to `FALSE`, and all `conf.level` arguments now default to `0.95`. This should primarily affect `tidy.survreg()`, which previously always returned confidence intervals, although there are some others.
 
+- Tidiers for `emmeans`-objects use the arguments `conf.int` and `conf.level` instead of relying on the argument names native to the `emmeans::summary()`-methods (i.e., `infer` and `level`). Similarly, `multcomp`-tidiers now include a call to `summary()` as previous behavior was akin to setting the now removed argument `quick = TRUE`. Both families of tidiers now use the `adj.p.value` column name when appropriate. Finally, `emmeans`-, `multcomp`-, and `TukeyHSD`-tidiers now consistently use the column names `contrast` and `null.value` instead of `comparison`, `level1` and `level2`, or `lhs` and `rhs` (see #692).
+
+
 ### Deprecations
 
 This release of `broom` hard-deprecates the following functions and tidiers:

diff --git a/R/emmeans-tidiers.R b/R/emmeans-tidiers.R
@@ -8,19 +8,18 @@
 #'   may be silently ignored!
 #'   
 #' @evalRd return_tidy(
+#'   "contrast",
+#'   null.value = "Value to which estimate is compared",
+#'   estimate = "Expected marginal mean",
 #'   "std.error", 
 #'   "df", 
 #'   "conf.low", 
 #'   "conf.high",
-#'   level1 = "One level of the factor being contrasted",
-#'   level2 = "The other level of the factor being contrasted",
-#'   "contrast",
-#'   "p.value",
 #'   statistic = "T-ratio statistic",
-#'   estimate = "Estimated least-squares mean."
+#'   "p.value"
 #' )
 #'
-#' @details Returns a data frame with one observation for each estimated
+#' @details Returns a data frame with one observation for each estimated marginal
 #'   mean, and one column for each combination of factors. When the input is a
 #'   contrast, each row will contain one estimated contrast.
 #'
@@ -48,7 +47,7 @@
 #'
 #' # plot confidence intervals
 #' library(ggplot2)
-#' ggplot(tidy(marginal), aes(day, estimate)) +
+#' ggplot(tidy(marginal, conf.int = TRUE), aes(day, estimate)) +
 #'   geom_point() +
 #'   geom_errorbar(aes(ymin = conf.low, ymax = conf.high))
 #'
@@ -59,7 +58,7 @@
 #' by_price
 #' tidy(by_price)
 #'
-#' ggplot(tidy(by_price), aes(price2, estimate, color = day)) +
+#' ggplot(tidy(by_price, conf.int = TRUE), aes(price2, estimate, color = day)) +
 #'   geom_line() +
 #'   geom_errorbar(aes(ymin = conf.low, ymax = conf.high))
 #'   
@@ -82,24 +81,21 @@ tidy.lsmobj <- function(x, conf.int = FALSE, conf.level = .95, ...) {
 #' @inherit tidy.lsmobj params examples details 
 #'   
 #' @evalRd return_tidy(
+#'   estimate = "Expected marginal mean",
 #'   "std.error", 
 #'   "df", 
 #'   "conf.low", 
 #'   "conf.high",
-#'   level1 = "One level of the factor being contrasted",
-#'   level2 = "The other level of the factor being contrasted",
-#'   "contrast",
-#'   "p.value",
 #'   statistic = "T-ratio statistic",
-#'   estimate = "Estimated least-squares mean."
+#'   "p.value"
 #' )
 #'
 #' @export
 #' @family emmeans tidiers
 #' @seealso [tidy()], [emmeans::ref_grid()], [emmeans::emmeans()],
 #'   [emmeans::contrast()]
-tidy.ref.grid <- function(x, ...) {
-  tidy_emmeans(x, ...)
+tidy.ref.grid <- function(x, conf.int = FALSE, conf.level = .95, ...) {
+  tidy_emmeans(x, infer = c(conf.int, TRUE), level = conf.level, ...)
 }
 
 #' @templateVar class emmGrid
@@ -109,24 +105,49 @@ tidy.ref.grid <- function(x, ...) {
 #' @inherit tidy.lsmobj params examples details 
 #'   
 #' @evalRd return_tidy(
+#'   estimate = "Expected marginal mean",
 #'   "std.error", 
 #'   "df", 
 #'   "conf.low", 
 #'   "conf.high",
-#'   level1 = "One level of the factor being contrasted",
-#'   level2 = "The other level of the factor being contrasted",
+#'   statistic = "T-ratio statistic",
+#'   "p.value"
+#' )
+#'
+#' @export
+#' @family emmeans tidiers
+#' @seealso [tidy()], [emmeans::ref_grid()], [emmeans::emmeans()],
+#'   [emmeans::contrast()]
+tidy.emmGrid <- function(x, conf.int = FALSE, conf.level = .95, ...) {
+  tidy_emmeans(x, infer = c(conf.int, TRUE), level = conf.level, ...)
+}
+
+#' @templateVar class summary_emm
+#' @template title_desc_tidy
+#' 
+#' @param x An `summary_emm` object.
+#' @inherit tidy.lsmobj params examples details 
+#'   
+#' @evalRd return_tidy(
 #'   "contrast",
-#'   "p.value",
+#'   null.value = "Value to which estimate is compared",
+#'   estimate = "Expected marginal mean",
+#'   "std.error", 
+#'   "df", 
+#'   "num.df",
+#'   "den.df",
+#'   "conf.low", 
+#'   "conf.high",
 #'   statistic = "T-ratio statistic",
-#'   estimate = "Estimated least-squares mean."
+#'   "p.value"
 #' )
 #'
 #' @export
 #' @family emmeans tidiers
 #' @seealso [tidy()], [emmeans::ref_grid()], [emmeans::emmeans()],
 #'   [emmeans::contrast()]
-tidy.emmGrid <- function(x, ...) {
-  tidy_emmeans(x, ...)
+tidy.summary_emm <- function(x, null.value = NULL) {
+  tidy_emmeans_summary(x, null.value = null.value)
 }
 
 #' @templateVar class summary_emm
@@ -161,10 +182,21 @@ tidy.summary_emm <- function(x, ...) {
 
 tidy_emmeans <- function(x, ...) {
   s <- summary(x, ...)
-  tidy_emmeans_summary(s)
+
+  # Get null.value
+  if(".offset." %in% colnames(x@grid)) {
+    null.value <- x@grid[, ".offset."]
+  } else {
+    null.value <- 0
+  }
+
+  # Get term names
+  term_names <- names(x@misc$orig.grid)
+
+  tidy_emmeans_summary(s, null.value = null.value, term_names = term_names)
 }
 
-tidy_emmeans_summary <- function(x) {
+tidy_emmeans_summary <- function(x, null.value = NULL, term_names = NULL) {
   ret <- as.data.frame(x)
   repl <- list(
     "lsmean" = "estimate",
@@ -179,22 +211,51 @@ tidy_emmeans_summary <- function(x) {
     "df1" = "num.df",
     "df2" = "den.df",
     "model term" = "term"
+    # "contrast" = "lhs"
   )
 
-  if ("contrast" %in% colnames(ret) &&
-      all(stringr::str_detect(ret$contrast, " - "))) {
-    ret <- tidyr::separate_(ret, "contrast",
-                            c("level1", "level2"),
-                            sep = " - "
+  mc_adjusted <- any(
+    grepl(
+      "conf-level adjustment|p value adjustment",
+      attr(x, "mesg"),
+      ignore.case = TRUE
     )
+  )
+
+  if(mc_adjusted) {
+    repl <- c(repl, "p.value" = "adj.p.value")
   }
 
   colnames(ret) <- dplyr::recode(colnames(ret), !!!(repl))
 
+  # Remove std.error if conf.low/high exist
+  if(any(c("conf.low", "conf.high") %in% colnames(ret))) {
+    ret <- select(ret, -std.error)
+  }
+
+  # If contrast column exists, add null.value column
+  if("contrast" %in% colnames(ret)) {
+    if(length(null.value) < nrow(ret)) null.value <- rep_len(null.value, nrow(ret))
+    ret <- bind_cols(contrast = ret[, "contrast"], null.value = null.value, select(ret, -contrast))
+  }
+
+  # Add term column, if appropriate, unless it exists
   if("term" %in% colnames(ret)) {
     ret <- ret %>%
       mutate(term = stringr::str_trim(term))
+  } else if(!is.null(term_names)) {
+    term <- term_names[!term_names %in% colnames(ret)]
+
+    if(length(term) != 0) {
+      term <- paste(term_names[!term_names %in% colnames(ret)], collapse = "*") %>% 
+        rep_len(nrow(ret))
+    } else { # No missing term names, because combine = TRUE?
+      term <- apply(ret, 1, function(x) colnames(ret)[which(x == ".")])
+    }
+
+    ret <- bind_cols(ret[, colnames(ret) %in% term_names, drop = FALSE], term = term, ret[, !colnames(ret) %in% term_names, drop = FALSE])
   }
 
-  as_tibble(ret)
+  as_tibble(ret) %>% 
+    mutate_if(is.factor, as.character)
 }
diff --git a/R/multcomp-tidiers.R b/R/multcomp-tidiers.R
@@ -4,7 +4,7 @@
 #' @param x A `glht` object returned by [multcomp::glht()].
 #' @template param_unused_dots
 #' 
-#' @evalRd return_tidy("lhs", "rhs", "estimate")
+#' @evalRd return_tidy("contrast", "null.value", "estimate")
 #'
 #' @examples
 #' 
@@ -36,12 +36,22 @@
 #' @family multcomp tidiers
 #' @seealso [tidy()], [multcomp::glht()]
 #' 
-tidy.glht <- function(x, ...) {
-  tibble(
-    lhs = rownames(x$linfct),
-    rhs = x$rhs,
-    estimate = stats::coef(x)
-  )
+tidy.glht <- function(x, conf.int = FALSE, conf.level = .95, ...) {
+  glht_summary <- summary(x, ...)
+
+  tidy_glht_summary <- tidy.summary.glht(glht_summary, ...)
+
+  if (conf.int) {
+    tidy_glht_confint <- tidy.confint.glht(confint(x, level = conf.level, ...))
+
+    tidy_glht_summary <- dplyr::select(tidy_glht_summary, -std.error) %>% 
+      dplyr::left_join(tidy_glht_confint) %>% 
+      select(term, contrast, null.value, estimate, conf.low, conf.high, everything())
+
+    return(tidy_glht_summary)
+  }
+
+  tidy_glht_summary
 }
 
 #' @templateVar class confint.glht
@@ -53,22 +63,21 @@ tidy.glht <- function(x, ...) {
 #' @template param_unused_dots
 #' 
 #' @inherit tidy.glht examples
-#' @evalRd return_tidy("lhs", "rhs", "estimate", "conf.low", "conf.high")
+#' @evalRd return_tidy("contrast", "estimate", "conf.low", "conf.high")
 #' 
 #' @export
 #' @family multcomp tidiers
 #' @seealso [tidy()], [multcomp::confint.glht()], [multcomp::glht()]
 tidy.confint.glht <- function(x, ...) {
 
-  lhs_rhs <- tibble(
-    lhs = rownames(x$linfct),
-    rhs = x$rhs
-  )
+  lhs_rhs <- glht_lhr_rhs_tibble(x)
 
   coef <- as_tibble(x$confint)
+
   colnames(coef) <- c("estimate", "conf.low", "conf.high")
+  coef$estimate <- as.vector(coef$estimate) # Remove attributes
 
-  bind_cols(lhs_rhs, coef)
+  bind_cols(glht_term_column(x), lhs_rhs[, "contrast", drop = FALSE], coef)
 }
 
 #' @templateVar class summary.glht
@@ -81,8 +90,8 @@ tidy.confint.glht <- function(x, ...) {
 #' 
 #' @inherit tidy.glht examples
 #' @evalRd return_tidy(
-#'   "lhs", 
-#'   "rhs", 
+#'   "contrast", 
+#'   "null.value", 
 #'   "estimate",
 #'   "std.error",
 #'   "statistic",
@@ -94,17 +103,39 @@ tidy.confint.glht <- function(x, ...) {
 #' @seealso [tidy()], [multcomp::summary.glht()], [multcomp::glht()]
 tidy.summary.glht <- function(x, ...) {
 
-  lhs_rhs <- tibble(
-    lhs = rownames(x$linfct),
-    rhs = x$rhs
-  )
+  lhs_rhs <- glht_lhr_rhs_tibble(x)
 
   coef <- as_tibble(
     x$test[c("coefficients", "sigma", "tstat", "pvalues")]
   )
-  names(coef) <- c("estimate", "std.error", "statistic", "p.value")
+
+  if(x$test$type != "none") {
+    pvalue_colname <- "adj.p.value"
+  } else {
+    pvalue_colname <- "p.value"
+  }
+
+  names(coef) <- c("estimate", "std.error", "statistic", pvalue_colname)
+
+  bind_cols(glht_term_column(x), lhs_rhs, coef)
+}
+
+
+glht_lhr_rhs_tibble <- function(x) {
+  tibble(
+    contrast = stringr::str_replace(rownames(x$linfct), "^.+: ", ""),
+    null.value = x$rhs
+  )
+}
 
-  bind_cols(lhs_rhs, coef)
+glht_term_column <- function(x) {
+  if(!is.null(x$focus) && length(x$focus) == 1) {
+    tibble(term = rep(x$focus, nrow(x$linfct)))
+  } else if(!is.null(x$focus) && length(x$focus) > 1) {
+    term <- stringr::str_extract(rownames(x$linfct), "(^.+): .")
+    term <- stringr::str_replace(term, ": .$", "")
+    tibble(term = term)
+  }
 }
 
 
@@ -116,13 +147,15 @@ tidy.summary.glht <- function(x, ...) {
 #' @template param_unused_dots
 #' 
 #' @inherit tidy.glht examples
-#' @evalRd return_tidy("lhs", "letters")
+#' @evalRd return_tidy("contrast", "letters")
 #' 
 #' @export
 #' @family multcomp tidiers
 #' @seealso [tidy()], [multcomp::cld()], [multcomp::summary.glht()],
 #'   [multcomp::confint.glht()], [multcomp::glht()]
 tidy.cld <- function(x, ...) {
   vec <- x$mcletters$Letters
-  tibble(lhs = names(vec), letters = vec)
+  tidy_cld <- tibble(names(vec), vec)
+  colnames(tidy_cld) <- c(x$xname, "letters")
+  tidy_cld
 }
diff --git a/R/stats-anova-tidiers.R b/R/stats-anova-tidiers.R
@@ -319,8 +319,10 @@ tidy.summary.manova <- function(x, ...) {
 tidy.TukeyHSD <- function(x, ...) {
   purrr::map_df(x,
     function(e) {
-      nn <- c("estimate", "conf.low", "conf.high", "adj.p.value")
-      fix_data_frame(e, nn, "comparison")
+      null.value <- rep(0, nrow(e))
+      e <- cbind(null.value, e)
+      nn <- c("null.value", "estimate", "conf.low", "conf.high", "adj.p.value")
+      fix_data_frame(e, nn, "contrast")
     }, .id = "term"
   )
 }
diff --git a/man/tidy.cld.Rd b/man/tidy.cld.Rd
diff --git a/man/tidy.confint.glht.Rd b/man/tidy.confint.glht.Rd