tidymodels · alexpghayes · Apr 8, 2019 · Apr 4, 2019 · Apr 5, 2019 · Apr 5, 2019
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -200,6 +200,16 @@ Authors@R:
              role = "ctb",
              email = "krauskae@gmail.com",
              comment = c(ORCID = "0000-0002-1466-5850")),
+      person(given = "Malcolm",
+             family = "Barrett",
+             role = "ctb",
+             email = "malcolmbarrett@gmail.com", 
+             comment = c(ORCID = "0000-0003-0299-5825")),
+      person(given = "Charles",
+             family = "Gray",
+             role = "ctb",
+             email = "charlestigray@gmail.com",
+             comment = c(ORCID = "0000-0002-9978-011X")))
       person(given = "Jared",
              family = "Wilber",
              role = "ctb")))
@@ -307,7 +317,8 @@ Suggests:
     testthat,
     tseries,
     xergm,
-    zoo
+    zoo,
+    metafor
 VignetteBuilder: 
     knitr
 Remotes: 
@@ -381,6 +392,7 @@ Collate:
     'quantreg-nlrq-tidiers.R'
     'quantreg-rq-tidiers.R'
     'quantreg-rqs-tidiers.R'
+    'rma-tidiers.R'
     'robust-glmrob-tidiers.R'
     'robust-lmrob-tidiers.R'
     'sp-tidiers.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -26,6 +26,7 @@ S3method(augment,poLCA)
 S3method(augment,polr)
 S3method(augment,prcomp)
 S3method(augment,rlm)
+S3method(augment,rma)
 S3method(augment,rq)
 S3method(augment,rqs)
 S3method(augment,smooth.spline)
@@ -82,6 +83,7 @@ S3method(glance,polr)
 S3method(glance,pyears)
 S3method(glance,ridgelm)
 S3method(glance,rlm)
+S3method(glance,rma)
 S3method(glance,rq)
 S3method(glance,rqs)
 S3method(glance,smooth.spline)
@@ -180,6 +182,7 @@ S3method(tidy,ref.grid)
 S3method(tidy,regsubsets)
 S3method(tidy,ridgelm)
 S3method(tidy,rlm)
+S3method(tidy,rma)
 S3method(tidy,roc)
 S3method(tidy,rq)
 S3method(tidy,rqs)

diff --git a/NEWS.md b/NEWS.md
@@ -68,10 +68,12 @@ TODO: sort out what happens to `glance.aov()`
 
 ## New tidiers, features and bugfixes
 
+
+- Added tidiers for `rma` objects from the `metafor` package (#674, @malcolmbarrett, @softloud)
+
 - Added support for `tidy.lavaan()` to take `quick = TRUE`. (#628)
 
 - `ordinal` tidier rewrite
-
 - Added tidiers for `pam` objects from the `cluster` package. (#637)
 
 - Added `tidy.svyglm()` and `glance.svyglm()` (#611)

diff --git a/R/rma-tidiers.R b/R/rma-tidiers.R
@@ -0,0 +1,288 @@
+#' @templateVar class rma
+#' @template title_desc_tidy
+#' 
+#' @param x An `rma` created by the `metafor` package.
+#' @inheritParams tidy.lm
+#' @param include_studies Logical. Should individual studies be included in the
+#'    output? Defaults to `TRUE`.
+#' @template param_unused_dots
+#' @param measure Measure type. See [metafor::escalc()]
+#'
+#' @evalRd return_tidy(
+#'   study = "The name of the individual study",
+#'   type = "The estimate type  (summary vs individual study)",
+#'   "estimate",
+#'   "std.error",
+#'   "statistic",
+#'   "p.value",
+#'   "conf.low",
+#'   "conf.high"
+#' )
+#' @export
+#'
+#' @examples
+#' 
+#' library(metafor)
+#' 
+#' df <-
+#'   escalc(
+#'     measure = "RR",
+#'     ai = tpos,
+#'     bi = tneg,
+#'     ci = cpos,
+#'     di = cneg,
+#'     data = dat.bcg
+#'   )
+#' 
+#' meta_analysis <- rma(yi, vi, data = df, method = "EB")
+#'
+#' tidy(meta_analysis)
+#'
+#' @rdname metafor_tidiers
+#' 
+tidy.rma <- function(x, conf.int = FALSE, conf.level = 0.95, exponentiate = FALSE,
+                     include_studies = TRUE, measure = "GEN", ...) {
+  # tidy summary estimates
+  betas <- x$beta
+  if (!is.null(nrow(betas)) && nrow(betas) > 1) {
+    # get estimate type and fix spelling
+    study <- rownames(betas)
+    swap <- grepl("intrcpt", study)
+    study[swap] <- "intercept"
+    betas <- as.double(betas)
+  } else {
+    study <- "overall"
+    betas <- betas[1]
+  }
+
+  results <- tibble::tibble(
+    study = study, 
+    type = "summary",
+    estimate = betas, 
+    std.error = x$se,
+    statistic = x$zval, 
+    p.value = x$pval,
+    conf.low = x$ci.lb, 
+    conf.high = x$ci.ub
+  )
+
+  # tidy individual studies
+  if (include_studies) {
+    # use `metafor::escalc` to standardize estimates and confidence intervals
+    estimates <- metafor::escalc(yi = x$yi.f, vi = x$vi.f, measure = measure) %>%
+      summary(level = conf.level * 100) %>% 
+      as.data.frame(stringsAsFactors = FALSE)
+
+    n_studies <- length(x$slab)
+
+    estimates <- dplyr::bind_cols(
+      study = as.character(x$slab), 
+      # dplyr::bind_cols is strict about recycling, so repeat for each study
+      type = rep("study", n_studies), 
+      estimates[, c("yi", "sei", "zi")], 
+      p.value = rep(NA, n_studies),
+      estimates[, c("ci.lb", "ci.ub")] 
+    )
+
+    names(estimates) <- c("study", "type", "estimate", "std.error", "statistic",
+                          "p.value", "conf.low", "conf.high")
+    estimates <- tibble::as_tibble(estimates)
+    results <- dplyr::bind_rows(estimates, results) 
+  }
+
+  if (exponentiate) {
+    results <-  dplyr::mutate_at(results, dplyr::vars(estimate, conf.low, conf.high), exp)
+  }
+
+  if (!conf.int) {
+    results <- dplyr::select(results, -conf.low, -conf.high)
+  }
+
+  # remove extra model data from study names
+  attributes(results$study) <- NULL
+
+  results
+}
+
+#' @templateVar class rma
+#' @template title_desc_glance
+#'
+#' @param x An `rma` created by the `metafor` package.
+#' @template param_unused_dots
+#'
+#' @evalRd return_glance(
+#'   "nobs", 
+#'   "measure", 
+#'   "method", 
+#'   "i.squared", 
+#'   "h.squared", 
+#'   "tau.squared", 
+#'   "tau.squared.se", 
+#'   "cochran.qe", 
+#'   "p.value.cochran.qe", 
+#'   "cochran.qm", 
+#'   "p.value.cochran.qm"
+#' )
+#' @export
+#'
+#' @examples
+#'
+#' library(metafor)
+#' 
+#' df <-
+#'   escalc(
+#'     measure = "RR",
+#'     ai = tpos,
+#'     bi = tneg,
+#'     ci = cpos,
+#'     di = cneg,
+#'     data = dat.bcg
+#'   )
+#' 
+#' meta_analysis <- rma(yi, vi, data = df, method = "EB")
+#'
+#' glance(meta_analysis)
+#'
+glance.rma <- function(x, ...) {
+  # reshape model fit statistics and clean names
+  fit_stats <- metafor::fitstats(x)
+  fit_stats <- fit_stats %>%
+    t() %>%
+    as.data.frame()
+  names(fit_stats) <-
+    stringr::str_replace(names(fit_stats), "\\:", "")
+
+  # metafor returns different fit statistics for different models
+  # so use a list + `purrr::discard` to remove unrelated statistics
+  list(
+    nobs = x$k,
+    measure = x$measure,
+    method = x$method,
+    i.squared = x$I2,
+    h.squared = x$H2,
+    tau.squared = x$tau2,
+    tau.squared.se = x$se.tau2,
+    cochran.qe = x$QE,
+    p.value.cochran.qe = x$QEp,
+    cochran.qm = x$QM,
+    p.value.cochran.qm = x$QMp,
+    fit_stats
+  ) %>%
+    # get rid of null values
+    purrr::discard(is.null) %>%
+    # don't include multivariate model stats
+    purrr::discard(~length(.x) >= 2) %>%
+    # change to tibble with correct column and row names
+    as.data.frame() %>%
+    tibble::as_tibble()
+}
+
+#' @templateVar class rma
+#' @template title_desc_augment
+#'
+#' @param x An `rma` created by the `metafor` package.
+#' @template param_unused_dots
+#'
+#' @evalRd return_augment(
+#'   .observed = "The observed values for the individual studies", 
+#'   ".fitted", 
+#'   ".se.fit", 
+#'   ".conf.low", 
+#'   ".conf.high", 
+#'   ".cred.low", 
+#'   ".cred.high", 
+#'   ".resid",
+#'   ".moderator",
+#'   ".moderator.level"
+#' )
+#' @export
+#'
+#' @examples
+#'
+#' library(metafor)
+#' 
+#' df <-
+#'   escalc(
+#'     measure = "RR",
+#'     ai = tpos,
+#'     bi = tneg,
+#'     ci = cpos,
+#'     di = cneg,
+#'     data = dat.bcg
+#'   )
+#' 
+#' meta_analysis <- rma(yi, vi, data = df, method = "EB")
+#'
+#' augment(meta_analysis)
+#'
+#' @rdname metafor_augmenters
+augment.rma <- function(x, ...) {
+  # metafor generally handles these for different models through the monolith
+  # `rma` class; using `purrr::possibly` primarily helps discard unused
+  # components but also helps get the right component for each model
+  blup0 <- purrr::possibly(metafor::blup, NULL)
+  residuals0 <- purrr::possibly(stats::residuals, NULL)
+  influence0 <- purrr::possibly(stats::influence, NULL)
+
+  y <- x$yi.f
+  # get best linear unbiased predictors if available
+  pred <- blup0(x)
+  # otherwise use `predict.rma()`
+  if (is.null(pred)) pred <- predict(x)
+  pred <- as.data.frame(pred)
+
+  # fix names
+  names(pred)[1:4] <- c(".fitted", ".se.fit", ".conf.low", ".conf.high")
+  credible_intervals <- names(pred) %in% c("cr.lb", "cr.ub")
+  names(pred)[credible_intervals] <- c(".cred.low", ".cred.high")
+  moderator <- names(pred) == "X"
+  names(pred)[moderator] <- ".moderator"
+  moderator_level <- names(pred) == "tau2.level"
+  names(pred)[moderator_level] <- ".moderator.level"
+
+  res <- residuals0(x)
+  inf <- influence0(x)
+  # if model has influence statistics, bind them and clean their names
+  if (!is.null(inf)) {
+    inf <- dplyr::bind_cols(as.data.frame(inf$inf), dfbetas = inf$dfbs$intrcpt)
+    inf <- dplyr::select(
+      inf, 
+      .hat = hat, 
+      .cooksd = cook.d, 
+      .std.resid = rstudent,
+      .dffits = dffits, 
+      .cov.ratio = cov.r,
+      .tau.squared.loo = tau2.del, 
+      .cochran.qe.loo = QE.del,
+      .weight = weight, 
+      .dfbetas = dfbetas
+    )
+  }
+
+  if (nrow(pred) == 1) {
+    # Some metafor models only return a single prediction
+    # based on the summary estimate. `dplyr::bind_cols()` requires
+    # `pred` to be the same length as other results, so replicate
+    # prediction for each row
+    pred <- purrr::map_dfr(seq_along(x$slab), ~pred)
+  }
+
+  ret <- dplyr::bind_cols(
+    .rownames = as.character(x$slab),
+    .observed = y,
+    pred
+  )
+
+
+  # join residuals, if they exist for the model
+  if (!is.null(res)) {
+    res <- tibble::enframe(res, name = ".rownames", value = ".resid")
+    ret <- dplyr::left_join(ret, res, by = ".rownames")
+  }
+
+  # don't return rownames if they are just row numbers
+  no_study_names <- all(x$slab == as.character(seq_along(x$slab)))
+  if (no_study_names) ret$.rownames <- NULL
+
+  tibble::as_tibble(ret)
+}