Skip to content

Commit

Permalink
Improvements of consistency for post-hoc hypothesis test tidiers (#788)
Browse files Browse the repository at this point in the history
* Adds tidy-method form summary_emm-objects.

* Updates NEWS

* Apply suggestions from code review

Co-Authored-By: alex hayes <alexpghayes@gmail.com>

* Makes requested changes for emmeans tidiers. (#691)

- Adds joint_tests()-example
- Removes strict = FALSE from tests

* Roxygenizes documentation.

* Improves consistency of post-hoc comparison tidies (i.e. glht, stats::TukeyHSD, and emmeans). See #692

* Fixes failing tests and roxygenizes.

* Adds NEWS entry.

* Fixes more failing tests.

* Fix bug in emmeans and multcomp examples.

* Implement revision suggested by @alexpghayes

Co-Authored-By: alex hayes <alexpghayes@gmail.com>

Co-authored-by: alex hayes <alexpghayes@gmail.com>
  • Loading branch information
crsh and alexpghayes committed Apr 23, 2020
1 parent bc5587a commit d0f65b6
Show file tree
Hide file tree
Showing 15 changed files with 332 additions and 105 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Expand Up @@ -21,6 +21,9 @@ This release features a number of unannounced hard-deprecations. I am sorry that

- All `conf.int` arguments now default to `FALSE`, and all `conf.level` arguments now default to `0.95`. This should primarily affect `tidy.survreg()`, which previously always returned confidence intervals, although there are some others.

- Tidiers for `emmeans`-objects use the arguments `conf.int` and `conf.level` instead of relying on the argument names native to the `emmeans::summary()`-methods (i.e., `infer` and `level`). Similarly, `multcomp`-tidiers now include a call to `summary()` as previous behavior was akin to setting the now removed argument `quick = TRUE`. Both families of tidiers now use the `adj.p.value` column name when appropriate. Finally, `emmeans`-, `multcomp`-, and `TukeyHSD`-tidiers now consistently use the column names `contrast` and `null.value` instead of `comparison`, `level1` and `level2`, or `lhs` and `rhs` (see #692).


### Deprecations

This release of `broom` hard-deprecates the following functions and tidiers:
Expand Down
119 changes: 90 additions & 29 deletions R/emmeans-tidiers.R
Expand Up @@ -8,19 +8,18 @@
#' may be silently ignored!
#'
#' @evalRd return_tidy(
#' "contrast",
#' null.value = "Value to which estimate is compared",
#' estimate = "Expected marginal mean",
#' "std.error",
#' "df",
#' "conf.low",
#' "conf.high",
#' level1 = "One level of the factor being contrasted",
#' level2 = "The other level of the factor being contrasted",
#' "contrast",
#' "p.value",
#' statistic = "T-ratio statistic",
#' estimate = "Estimated least-squares mean."
#' "p.value"
#' )
#'
#' @details Returns a data frame with one observation for each estimated
#' @details Returns a data frame with one observation for each estimated marginal
#' mean, and one column for each combination of factors. When the input is a
#' contrast, each row will contain one estimated contrast.
#'
Expand Down Expand Up @@ -48,7 +47,7 @@
#'
#' # plot confidence intervals
#' library(ggplot2)
#' ggplot(tidy(marginal), aes(day, estimate)) +
#' ggplot(tidy(marginal, conf.int = TRUE), aes(day, estimate)) +
#' geom_point() +
#' geom_errorbar(aes(ymin = conf.low, ymax = conf.high))
#'
Expand All @@ -59,7 +58,7 @@
#' by_price
#' tidy(by_price)
#'
#' ggplot(tidy(by_price), aes(price2, estimate, color = day)) +
#' ggplot(tidy(by_price, conf.int = TRUE), aes(price2, estimate, color = day)) +
#' geom_line() +
#' geom_errorbar(aes(ymin = conf.low, ymax = conf.high))
#'
Expand All @@ -82,24 +81,21 @@ tidy.lsmobj <- function(x, conf.int = FALSE, conf.level = .95, ...) {
#' @inherit tidy.lsmobj params examples details
#'
#' @evalRd return_tidy(
#' estimate = "Expected marginal mean",
#' "std.error",
#' "df",
#' "conf.low",
#' "conf.high",
#' level1 = "One level of the factor being contrasted",
#' level2 = "The other level of the factor being contrasted",
#' "contrast",
#' "p.value",
#' statistic = "T-ratio statistic",
#' estimate = "Estimated least-squares mean."
#' "p.value"
#' )
#'
#' @export
#' @family emmeans tidiers
#' @seealso [tidy()], [emmeans::ref_grid()], [emmeans::emmeans()],
#' [emmeans::contrast()]
tidy.ref.grid <- function(x, ...) {
tidy_emmeans(x, ...)
tidy.ref.grid <- function(x, conf.int = FALSE, conf.level = .95, ...) {
tidy_emmeans(x, infer = c(conf.int, TRUE), level = conf.level, ...)
}

#' @templateVar class emmGrid
Expand All @@ -109,24 +105,49 @@ tidy.ref.grid <- function(x, ...) {
#' @inherit tidy.lsmobj params examples details
#'
#' @evalRd return_tidy(
#' estimate = "Expected marginal mean",
#' "std.error",
#' "df",
#' "conf.low",
#' "conf.high",
#' level1 = "One level of the factor being contrasted",
#' level2 = "The other level of the factor being contrasted",
#' statistic = "T-ratio statistic",
#' "p.value"
#' )
#'
#' @export
#' @family emmeans tidiers
#' @seealso [tidy()], [emmeans::ref_grid()], [emmeans::emmeans()],
#' [emmeans::contrast()]
tidy.emmGrid <- function(x, conf.int = FALSE, conf.level = .95, ...) {
tidy_emmeans(x, infer = c(conf.int, TRUE), level = conf.level, ...)
}

#' @templateVar class summary_emm
#' @template title_desc_tidy
#'
#' @param x An `summary_emm` object.
#' @inherit tidy.lsmobj params examples details
#'
#' @evalRd return_tidy(
#' "contrast",
#' "p.value",
#' null.value = "Value to which estimate is compared",
#' estimate = "Expected marginal mean",
#' "std.error",
#' "df",
#' "num.df",
#' "den.df",
#' "conf.low",
#' "conf.high",
#' statistic = "T-ratio statistic",
#' estimate = "Estimated least-squares mean."
#' "p.value"
#' )
#'
#' @export
#' @family emmeans tidiers
#' @seealso [tidy()], [emmeans::ref_grid()], [emmeans::emmeans()],
#' [emmeans::contrast()]
tidy.emmGrid <- function(x, ...) {
tidy_emmeans(x, ...)
tidy.summary_emm <- function(x, null.value = NULL) {
tidy_emmeans_summary(x, null.value = null.value)
}

#' @templateVar class summary_emm
Expand Down Expand Up @@ -161,10 +182,21 @@ tidy.summary_emm <- function(x, ...) {

tidy_emmeans <- function(x, ...) {
s <- summary(x, ...)
tidy_emmeans_summary(s)

# Get null.value
if(".offset." %in% colnames(x@grid)) {
null.value <- x@grid[, ".offset."]
} else {
null.value <- 0
}

# Get term names
term_names <- names(x@misc$orig.grid)

tidy_emmeans_summary(s, null.value = null.value, term_names = term_names)
}

tidy_emmeans_summary <- function(x) {
tidy_emmeans_summary <- function(x, null.value = NULL, term_names = NULL) {
ret <- as.data.frame(x)
repl <- list(
"lsmean" = "estimate",
Expand All @@ -179,22 +211,51 @@ tidy_emmeans_summary <- function(x) {
"df1" = "num.df",
"df2" = "den.df",
"model term" = "term"
# "contrast" = "lhs"
)

if ("contrast" %in% colnames(ret) &&
all(stringr::str_detect(ret$contrast, " - "))) {
ret <- tidyr::separate_(ret, "contrast",
c("level1", "level2"),
sep = " - "
mc_adjusted <- any(
grepl(
"conf-level adjustment|p value adjustment",
attr(x, "mesg"),
ignore.case = TRUE
)
)

if(mc_adjusted) {
repl <- c(repl, "p.value" = "adj.p.value")
}

colnames(ret) <- dplyr::recode(colnames(ret), !!!(repl))

# Remove std.error if conf.low/high exist
if(any(c("conf.low", "conf.high") %in% colnames(ret))) {
ret <- select(ret, -std.error)
}

# If contrast column exists, add null.value column
if("contrast" %in% colnames(ret)) {
if(length(null.value) < nrow(ret)) null.value <- rep_len(null.value, nrow(ret))
ret <- bind_cols(contrast = ret[, "contrast"], null.value = null.value, select(ret, -contrast))
}

# Add term column, if appropriate, unless it exists
if("term" %in% colnames(ret)) {
ret <- ret %>%
mutate(term = stringr::str_trim(term))
} else if(!is.null(term_names)) {
term <- term_names[!term_names %in% colnames(ret)]

if(length(term) != 0) {
term <- paste(term_names[!term_names %in% colnames(ret)], collapse = "*") %>%
rep_len(nrow(ret))
} else { # No missing term names, because combine = TRUE?
term <- apply(ret, 1, function(x) colnames(ret)[which(x == ".")])
}

ret <- bind_cols(ret[, colnames(ret) %in% term_names, drop = FALSE], term = term, ret[, !colnames(ret) %in% term_names, drop = FALSE])
}

as_tibble(ret)
as_tibble(ret) %>%
mutate_if(is.factor, as.character)
}
79 changes: 56 additions & 23 deletions R/multcomp-tidiers.R
Expand Up @@ -4,7 +4,7 @@
#' @param x A `glht` object returned by [multcomp::glht()].
#' @template param_unused_dots
#'
#' @evalRd return_tidy("lhs", "rhs", "estimate")
#' @evalRd return_tidy("contrast", "null.value", "estimate")
#'
#' @examples
#'
Expand Down Expand Up @@ -36,12 +36,22 @@
#' @family multcomp tidiers
#' @seealso [tidy()], [multcomp::glht()]
#'
tidy.glht <- function(x, ...) {
tibble(
lhs = rownames(x$linfct),
rhs = x$rhs,
estimate = stats::coef(x)
)
tidy.glht <- function(x, conf.int = FALSE, conf.level = .95, ...) {
glht_summary <- summary(x, ...)

tidy_glht_summary <- tidy.summary.glht(glht_summary, ...)

if (conf.int) {
tidy_glht_confint <- tidy.confint.glht(confint(x, level = conf.level, ...))

tidy_glht_summary <- dplyr::select(tidy_glht_summary, -std.error) %>%
dplyr::left_join(tidy_glht_confint) %>%
select(term, contrast, null.value, estimate, conf.low, conf.high, everything())

return(tidy_glht_summary)
}

tidy_glht_summary
}

#' @templateVar class confint.glht
Expand All @@ -53,22 +63,21 @@ tidy.glht <- function(x, ...) {
#' @template param_unused_dots
#'
#' @inherit tidy.glht examples
#' @evalRd return_tidy("lhs", "rhs", "estimate", "conf.low", "conf.high")
#' @evalRd return_tidy("contrast", "estimate", "conf.low", "conf.high")
#'
#' @export
#' @family multcomp tidiers
#' @seealso [tidy()], [multcomp::confint.glht()], [multcomp::glht()]
tidy.confint.glht <- function(x, ...) {

lhs_rhs <- tibble(
lhs = rownames(x$linfct),
rhs = x$rhs
)
lhs_rhs <- glht_lhr_rhs_tibble(x)

coef <- as_tibble(x$confint)

colnames(coef) <- c("estimate", "conf.low", "conf.high")
coef$estimate <- as.vector(coef$estimate) # Remove attributes

bind_cols(lhs_rhs, coef)
bind_cols(glht_term_column(x), lhs_rhs[, "contrast", drop = FALSE], coef)
}

#' @templateVar class summary.glht
Expand All @@ -81,8 +90,8 @@ tidy.confint.glht <- function(x, ...) {
#'
#' @inherit tidy.glht examples
#' @evalRd return_tidy(
#' "lhs",
#' "rhs",
#' "contrast",
#' "null.value",
#' "estimate",
#' "std.error",
#' "statistic",
Expand All @@ -94,17 +103,39 @@ tidy.confint.glht <- function(x, ...) {
#' @seealso [tidy()], [multcomp::summary.glht()], [multcomp::glht()]
tidy.summary.glht <- function(x, ...) {

lhs_rhs <- tibble(
lhs = rownames(x$linfct),
rhs = x$rhs
)
lhs_rhs <- glht_lhr_rhs_tibble(x)

coef <- as_tibble(
x$test[c("coefficients", "sigma", "tstat", "pvalues")]
)
names(coef) <- c("estimate", "std.error", "statistic", "p.value")

if(x$test$type != "none") {
pvalue_colname <- "adj.p.value"
} else {
pvalue_colname <- "p.value"
}

names(coef) <- c("estimate", "std.error", "statistic", pvalue_colname)

bind_cols(glht_term_column(x), lhs_rhs, coef)
}


glht_lhr_rhs_tibble <- function(x) {
tibble(
contrast = stringr::str_replace(rownames(x$linfct), "^.+: ", ""),
null.value = x$rhs
)
}

bind_cols(lhs_rhs, coef)
glht_term_column <- function(x) {
if(!is.null(x$focus) && length(x$focus) == 1) {
tibble(term = rep(x$focus, nrow(x$linfct)))
} else if(!is.null(x$focus) && length(x$focus) > 1) {
term <- stringr::str_extract(rownames(x$linfct), "(^.+): .")
term <- stringr::str_replace(term, ": .$", "")
tibble(term = term)
}
}


Expand All @@ -116,13 +147,15 @@ tidy.summary.glht <- function(x, ...) {
#' @template param_unused_dots
#'
#' @inherit tidy.glht examples
#' @evalRd return_tidy("lhs", "letters")
#' @evalRd return_tidy("contrast", "letters")
#'
#' @export
#' @family multcomp tidiers
#' @seealso [tidy()], [multcomp::cld()], [multcomp::summary.glht()],
#' [multcomp::confint.glht()], [multcomp::glht()]
tidy.cld <- function(x, ...) {
vec <- x$mcletters$Letters
tibble(lhs = names(vec), letters = vec)
tidy_cld <- tibble(names(vec), vec)
colnames(tidy_cld) <- c(x$xname, "letters")
tidy_cld
}
6 changes: 4 additions & 2 deletions R/stats-anova-tidiers.R
Expand Up @@ -319,8 +319,10 @@ tidy.summary.manova <- function(x, ...) {
tidy.TukeyHSD <- function(x, ...) {
purrr::map_df(x,
function(e) {
nn <- c("estimate", "conf.low", "conf.high", "adj.p.value")
fix_data_frame(e, nn, "comparison")
null.value <- rep(0, nrow(e))
e <- cbind(null.value, e)
nn <- c("null.value", "estimate", "conf.low", "conf.high", "adj.p.value")
fix_data_frame(e, nn, "contrast")
}, .id = "term"
)
}
2 changes: 1 addition & 1 deletion man/tidy.cld.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/tidy.confint.glht.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d0f65b6

Please sign in to comment.