tidymodels · alexpghayes · Jun 12, 2018 · Jun 10, 2018 · Jun 10, 2018 · Jun 10, 2018
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -25,6 +25,7 @@ Authors@R: c(
     person("Wilson", "Freitas", role = "ctb"),
     person("Jason Cory", "Brunson", email = "cornelioid@gmail.com", role = "ctb"),
     person("Simon", "Jackson", email = "drsimonjackson@gmail.com", role = "ctb"),
+    person("Michael", "Kuehn", email = "mkuehn10@gmail.com", role = "ctb"),
     person("Jorge", "Cimentada", email = "cimentadaj@gmail.com", role = "ctb"))
 Maintainer: David Robinson <admiral.david@gmail.com>
 Description: Convert statistical analysis objects from R into tidy data frames,
@@ -58,8 +59,10 @@ Suggests:
     brms,
     btergm,
     car,
+    caret,
     coda,
     covr,
+    e1071,
     emmeans,
     ergm,
     gam,

diff --git a/NAMESPACE b/NAMESPACE
@@ -131,6 +131,7 @@ S3method(tidy,clm)
 S3method(tidy,clmm)
 S3method(tidy,coeftest)
 S3method(tidy,confint.glht)
+S3method(tidy,confusionMatrix)
 S3method(tidy,coxph)
 S3method(tidy,cv.glmnet)
 S3method(tidy,data.frame)

diff --git a/R/caret_tidiers.R b/R/caret_tidiers.R
@@ -0,0 +1,113 @@
+#' Tidying methods for confusionMatrix objects
+#'
+#' Tidies the result of confusion matrix from the \code{caret} package.
+#' Only a \code{tidy} method is provided, not an \code{augment} or
+#' \code{glance} method.
+#'
+#' @param x An object of class \code{confusionMatrix}
+#' @param by_class A logical of whether to show the values for class specific
+#' quantities from the confusion matrix (specificty, sensitivity, etc.). If set
+#' to FALSE, result will only show accuracy and kappa.
+#' @param ... extra arguments (not used)
+#'
+#' @return A tibble with one or more of the following columns:
+#'   \item{term}{The name of a statistic from the confusion matrix}
+#'   \item{class}{Which class the term is a measurement of}
+#'   \item{estimate}{The value of the statistic}
+#'   \item{conf.low}{Low end of 95 percent CI only applicable to accuracy}
+#'   \item{conf.high}{High end of 95 percent CI only applicable to accuracy}
+#'   \item{p.value}{P-value for accuracy and kappa statistics}
+#'
+#' @examples
+#' 
+#' \dontrun{
+#' # 2 class confusion matrix
+#' cm2 <- caret::confusionMatrix(factor(rbinom(100,1,.5)),factor(rbinom(100,1,.5)))
+#' tidy(cm2)
+#' tidy(cm2, by_class = FALSE) # only shows accuracy and kappa
+#'
+#' # confusion matrix with more than 2 classes
+#' cm <- caret::confusionMatrix(factor(rbinom(100,2,.5)),factor(rbinom(100,2,.5)))
+#' tidy(cm)
+#' tidy(cm, by_class = FALSE) # only shows accuracy and kappa
+#' }
+#'
+#' @name caret_tidiers
+NULL
+
+#' @name caret_tidiers
+#' @export
+tidy.confusionMatrix <- function(x, by_class = TRUE, ...) {
+  cm <- as.list(x$overall)
+  nms_cm <- stringr::str_to_lower(names(cm)[1:2])
+
+
+  if (by_class) {
+    # case when only 2 classes
+    if (class(x$byClass) != "matrix") {
+      classes <-
+        x$byClass %>%
+        as.data.frame() %>%
+        rename_at(1, ~ "value") %>%
+        tibble::rownames_to_column("var") %>%
+        mutate(var = stringr::str_to_lower(gsub(" ", "_", var)))
+
+      terms <- c(nms_cm, classes$var)
+      class <- c(rep(NA_character_, 2), rep(x$positive, length(terms) - 2))
+      estimates <- c(cm$Accuracy, cm$Kappa, classes$value)
+      conf.low <- c(cm$AccuracyLower, rep(NA, length(terms) - 1))
+      conf.high <- c(cm$AccuracyUpper, rep(NA, length(terms) - 1))
+      p.value <- c(
+        cm$AccuracyPValue, cm$McnemarPValue,
+        rep(NA, length(terms) - 2)
+      )
+    }
+    else {
+      # case when there are more than 2 classes
+      classes <-
+        x$byClass %>%
+        as.data.frame() %>%
+        tibble::rownames_to_column("class") %>%
+        gather(var, value, -class) %>%
+        mutate(
+          var = stringr::str_to_lower(gsub(" ", "_", var)),
+          class = gsub("Class: ", "", class)
+        )
+
+      terms <- c(nms_cm, classes$var)
+      class <- c(rep(NA_character_, 2), classes$class)
+      estimates <- c(cm$Accuracy, cm$Kappa, classes$value)
+      conf.low <- c(cm$AccuracyLower, rep(NA, length(terms) - 1))
+      conf.high <- c(cm$AccuracyUpper, rep(NA, length(terms) - 1))
+      p.value <- c(
+        cm$AccuracyPValue, cm$McnemarPValue,
+        rep(NA, length(terms) - 2)
+      )
+    }
+    df <- data_frame(
+      term = terms,
+      class = class,
+      estimate = estimates,
+      conf.low = conf.low,
+      conf.high = conf.high,
+      p.value = p.value
+    )
+  } else {
+    # only show alpha and kappa when show_class = FALSE
+    terms <- c(nms_cm)
+    estimates <- c(cm$Accuracy, cm$Kappa)
+    conf.low <- c(cm$AccuracyLower, NA)
+    conf.high <- c(cm$AccuracyUpper, NA)
+    p.value <- c(cm$AccuracyPValue, cm$McnemarPValue)
+
+    df <- data_frame(
+      term = terms,
+      estimate = estimates,
+      conf.low = conf.low,
+      conf.high = conf.high,
+      p.value = p.value
+    )
+  }
+
+  as_tibble(fix_data_frame(df))
+}
diff --git a/man/caret_tidiers.Rd b/man/caret_tidiers.Rd
diff --git a/tests/testthat/test-caret.R b/tests/testthat/test-caret.R
@@ -0,0 +1,27 @@
+context("caret tidiers")
+
+test_that("tidy works for a 2 class confusion matrix", {
+  cm2 <- caret::confusionMatrix(factor(rbinom(100,1,.5)),factor(rbinom(100,1,.5)))
+  td <- tidy(cm2)
+  check_tidy(td, exp.row = 13, exp.col = 6)
+})
+
+test_that("tidy works for a 2 class confusion matrix with show_class = FALSE", {
+  cm2 <- caret::confusionMatrix(factor(rbinom(100,1,.5)),factor(rbinom(100,1,.5)))
+  td <- tidy(cm2, by_class = FALSE)
+  check_tidy(td, exp.row = 2, exp.col = 5)
+})
+
+test_that("tidy works for > 2 class confusion matrix", {
+  cm2 <- caret::confusionMatrix(factor(rbinom(100,3,.5)),factor(rbinom(100,3,.5)))
+  td <- tidy(cm2)
+  check_tidy(td, exp.row = 46, exp.col = 6)
+})
+
+test_that("tidy works for > 2 class confusion matrix with show_class = FALSE", {
+  cm2 <- caret::confusionMatrix(factor(rbinom(100,3,.5)),factor(rbinom(100,3,.5)))
+  td <- tidy(cm2, by_class = FALSE)
+  check_tidy(td, exp.row = 2, exp.col = 5)
+})
+
+