Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add caret tidiers #344

Merged
merged 11 commits into from Jun 12, 2018
3 changes: 3 additions & 0 deletions DESCRIPTION
Expand Up @@ -25,6 +25,7 @@ Authors@R: c(
person("Wilson", "Freitas", role = "ctb"),
person("Jason Cory", "Brunson", email = "cornelioid@gmail.com", role = "ctb"),
person("Simon", "Jackson", email = "drsimonjackson@gmail.com", role = "ctb"),
person("Michael", "Kuehn", email = "mkuehn10@gmail.com", role = "ctb"),
person("Jorge", "Cimentada", email = "cimentadaj@gmail.com", role = "ctb"))
Maintainer: David Robinson <admiral.david@gmail.com>
Description: Convert statistical analysis objects from R into tidy data frames,
Expand Down Expand Up @@ -58,8 +59,10 @@ Suggests:
brms,
btergm,
car,
caret,
coda,
covr,
e1071,
emmeans,
ergm,
gam,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Expand Up @@ -131,6 +131,7 @@ S3method(tidy,clm)
S3method(tidy,clmm)
S3method(tidy,coeftest)
S3method(tidy,confint.glht)
S3method(tidy,confusionMatrix)
S3method(tidy,coxph)
S3method(tidy,cv.glmnet)
S3method(tidy,data.frame)
Expand Down
113 changes: 113 additions & 0 deletions R/caret_tidiers.R
@@ -0,0 +1,113 @@
#' Tidying methods for confusionMatrix objects
#'
#' Tidies the result of confusion matrix from the \code{caret} package.
#' Only a \code{tidy} method is provided, not an \code{augment} or
#' \code{glance} method.
#'
#' @param x An object of class \code{confusionMatrix}
#' @param by_class A logical of whether to show the values for class specific
#' quantities from the confusion matrix (specificty, sensitivity, etc.). If set
#' to FALSE, result will only show accuracy and kappa.
#' @param ... extra arguments (not used)
#'
#' @return A tibble with one or more of the following columns:
#' \item{term}{The name of a statistic from the confusion matrix}
#' \item{class}{Which class the term is a measurement of}
#' \item{estimate}{The value of the statistic}
#' \item{conf.low}{Low end of 95 percent CI only applicable to accuracy}
#' \item{conf.high}{High end of 95 percent CI only applicable to accuracy}
#' \item{p.value}{P-value for accuracy and kappa statistics}
#'
#' @examples
#'
#' \dontrun{
#' # 2 class confusion matrix
#' cm2 <- caret::confusionMatrix(factor(rbinom(100,1,.5)),factor(rbinom(100,1,.5)))
#' tidy(cm2)
#' tidy(cm2, by_class = FALSE) # only shows accuracy and kappa
#'
#' # confusion matrix with more than 2 classes
#' cm <- caret::confusionMatrix(factor(rbinom(100,2,.5)),factor(rbinom(100,2,.5)))
#' tidy(cm)
#' tidy(cm, by_class = FALSE) # only shows accuracy and kappa
#' }
#'
#' @name caret_tidiers
NULL

#' @name caret_tidiers
#' @export
tidy.confusionMatrix <- function(x, by_class = TRUE, ...) {
cm <- as.list(x$overall)
nms_cm <- stringr::str_to_lower(names(cm)[1:2])


if (by_class) {
# case when only 2 classes
if (class(x$byClass) != "matrix") {
classes <-
x$byClass %>%
as.data.frame() %>%
rename_at(1, ~ "value") %>%
tibble::rownames_to_column("var") %>%
mutate(var = stringr::str_to_lower(gsub(" ", "_", var)))

terms <- c(nms_cm, classes$var)
class <- c(rep(NA_character_, 2), rep(x$positive, length(terms) - 2))
estimates <- c(cm$Accuracy, cm$Kappa, classes$value)
conf.low <- c(cm$AccuracyLower, rep(NA, length(terms) - 1))
conf.high <- c(cm$AccuracyUpper, rep(NA, length(terms) - 1))
p.value <- c(
cm$AccuracyPValue, cm$McnemarPValue,
rep(NA, length(terms) - 2)
)
}
else {
# case when there are more than 2 classes
classes <-
x$byClass %>%
as.data.frame() %>%
tibble::rownames_to_column("class") %>%
gather(var, value, -class) %>%
mutate(
var = stringr::str_to_lower(gsub(" ", "_", var)),
class = gsub("Class: ", "", class)
)

terms <- c(nms_cm, classes$var)
class <- c(rep(NA_character_, 2), classes$class)
estimates <- c(cm$Accuracy, cm$Kappa, classes$value)
conf.low <- c(cm$AccuracyLower, rep(NA, length(terms) - 1))
conf.high <- c(cm$AccuracyUpper, rep(NA, length(terms) - 1))
p.value <- c(
cm$AccuracyPValue, cm$McnemarPValue,
rep(NA, length(terms) - 2)
)
}
df <- data_frame(
term = terms,
class = class,
estimate = estimates,
conf.low = conf.low,
conf.high = conf.high,
p.value = p.value
)
} else {
# only show alpha and kappa when show_class = FALSE
terms <- c(nms_cm)
estimates <- c(cm$Accuracy, cm$Kappa)
conf.low <- c(cm$AccuracyLower, NA)
conf.high <- c(cm$AccuracyUpper, NA)
p.value <- c(cm$AccuracyPValue, cm$McnemarPValue)

df <- data_frame(
term = terms,
estimate = estimates,
conf.low = conf.low,
conf.high = conf.high,
p.value = p.value
)
}

as_tibble(fix_data_frame(df))
}
48 changes: 48 additions & 0 deletions man/caret_tidiers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions tests/testthat/test-caret.R
@@ -0,0 +1,27 @@
context("caret tidiers")

test_that("tidy works for a 2 class confusion matrix", {
cm2 <- caret::confusionMatrix(factor(rbinom(100,1,.5)),factor(rbinom(100,1,.5)))
td <- tidy(cm2)
check_tidy(td, exp.row = 13, exp.col = 6)
})

test_that("tidy works for a 2 class confusion matrix with show_class = FALSE", {
cm2 <- caret::confusionMatrix(factor(rbinom(100,1,.5)),factor(rbinom(100,1,.5)))
td <- tidy(cm2, by_class = FALSE)
check_tidy(td, exp.row = 2, exp.col = 5)
})

test_that("tidy works for > 2 class confusion matrix", {
cm2 <- caret::confusionMatrix(factor(rbinom(100,3,.5)),factor(rbinom(100,3,.5)))
td <- tidy(cm2)
check_tidy(td, exp.row = 46, exp.col = 6)
})

test_that("tidy works for > 2 class confusion matrix with show_class = FALSE", {
cm2 <- caret::confusionMatrix(factor(rbinom(100,3,.5)),factor(rbinom(100,3,.5)))
td <- tidy(cm2, by_class = FALSE)
check_tidy(td, exp.row = 2, exp.col = 5)
})