Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add caret tidiers #344

Merged
merged 11 commits into from Jun 12, 2018
1 change: 1 addition & 0 deletions DESCRIPTION
Expand Up @@ -57,6 +57,7 @@ Suggests:
brms,
btergm,
car,
caret,
coda,
covr,
emmeans,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Expand Up @@ -130,6 +130,7 @@ S3method(tidy,clm)
S3method(tidy,clmm)
S3method(tidy,coeftest)
S3method(tidy,confint.glht)
S3method(tidy,confusionMatrix)
S3method(tidy,coxph)
S3method(tidy,cv.glmnet)
S3method(tidy,data.frame)
Expand Down
111 changes: 111 additions & 0 deletions R/caret_tidiers.R
@@ -0,0 +1,111 @@
#' Tidying methods for confusionMatrix objects
#'
#' Tidies the result of confusion matrix from the \code{caret} package.
#' Only a \code{tidy} method is provided, not an \code{augment} or
#' \code{glance} method.
#'
#' @param x An object of class \code{confusionMatrix}
#' @param show_class A boolean of whether to show the values for class specific
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd use "logical" instead of boolean since that's more standard for R.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd also rename show_class to by_class.

#' quantities from the confusion matrix (specificty, sensitivity, etc.). If set
#' to FALSE, result will only show accuracy and kappa.
#' @param ... extra arguments (not used)
#'
#' @return A data.frame with one or more of the following columns:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should return a tibble!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I need to fix the durbinWatsonTest as well then.

#' \item{term}{The name of a statistic from the confusion matrix}
#' \item{class}{Which class the term is a measurement of}
#' \item{estimate}{The value of the statistic}
#' \item{lower}{Lower bound of 95 percent CI only applicable to accuracy}
#' \item{upper}{Upper bound of 95 percent CI only applicable to accuracy}
#' \item{p.value}{P-value for accuracy and kappa statistics}
#'
#' @examples
#'
#' # 2 class confusion matrix
#' cm2 <- caret::confusionMatrix(factor(rbinom(100,1,.5)),factor(rbinom(100,1,.5)))
#' tidy(cm2)
#' tidy(cm2, show_class = FALSE) # only shows accuracy and kappa
#'
#' # confusion matrix with more than 2 classes
#' cm <- caret::confusionMatrix(factor(rbinom(100,2,.5)),factor(rbinom(100,2,.5)))
#' tidy(cm)
#' tidy(cm, show_class = FALSE) # only shows accuracy and kappa
#'
#' @name caret_tidiers
NULL

#' @name caret_tidiers
#' @export
tidy.confusionMatrix <- function(x, show_class = TRUE, ...) {
cm <- as.list(x$overall)
nms_cm <- str_to_lower(names(cm)[1:2])


if (show_class) {
# case when only 2 classes
if (class(x$byClass) != "matrix") {
by_class <-
x$byClass %>%
as.data.frame() %>%
rename_at(1, ~ "value") %>%
rownames_to_column("var") %>%
mutate(var = str_to_lower(gsub(" ", "_", var)))

terms <- c(nms_cm, by_class$var)
class <- c(rep(NA_character_, 2), rep(x$positive, length(terms) - 2))
estimates <- c(cm$Accuracy, cm$Kappa, by_class$value)
lower <- c(cm$AccuracyLower, rep(NA, length(terms) - 1))
upper <- c(cm$AccuracyUpper, rep(NA, length(terms) - 1))
p.value <- c(
cm$AccuracyPValue, cm$McnemarPValue,
rep(NA, length(terms) - 2)
)
}
else {
# case when there are more than 2 classes
by_class <-
x$byClass %>%
as.data.frame() %>%
rownames_to_column("class") %>%
gather(var, value, -class) %>%
mutate(
var = str_to_lower(gsub(" ", "_", var)),
class = gsub("Class: ", "", class)
)

terms <- c(nms_cm, by_class$var)
class <- c(rep(NA_character_, 2), by_class$class)
estimates <- c(cm$Accuracy, cm$Kappa, by_class$value)
lower <- c(cm$AccuracyLower, rep(NA, length(terms) - 1))
upper <- c(cm$AccuracyUpper, rep(NA, length(terms) - 1))
p.value <- c(
cm$AccuracyPValue, cm$McnemarPValue,
rep(NA, length(terms) - 2)
)
}
df <- data_frame(
term = terms,
class = class,
estimate = estimates,
lower = lower,
upper = upper,
p.value = p.value
)
} else {
# only show alpha and kappa when show_class = FALSE
terms <- c(nms_cm)
estimates <- c(cm$Accuracy, cm$Kappa)
lower <- c(cm$AccuracyLower, NA)
upper <- c(cm$AccuracyUpper, NA)
p.value <- c(cm$AccuracyPValue, cm$McnemarPValue)

df <- data_frame(
term = terms,
estimate = estimates,
lower = lower,
upper = upper,
p.value = p.value
)
}

fix_data_frame(df)
}
46 changes: 46 additions & 0 deletions man/caret_tidiers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions tests/testthat/test-caret.R
@@ -0,0 +1,28 @@
context("caret tidiers")

test_that("tidy works for a 2 class confusion matrix", {
cm2 <- caret::confusionMatrix(factor(rbinom(100,1,.5)),factor(rbinom(100,1,.5)))
td <- tidy(cm2)
check_tidy(td, exp.row = 13, exp.col = 6)
})

test_that("tidy works for a 2 class confusion matrix with show_class = FALSE", {
cm2 <- caret::confusionMatrix(factor(rbinom(100,1,.5)),factor(rbinom(100,1,.5)))
td <- tidy(cm2, show_class = FALSE)
check_tidy(td, exp.row = 2, exp.col = 5)
})

test_that("tidy works for > 2 class confusion matrix", {
cm2 <- caret::confusionMatrix(factor(rbinom(100,3,.5)),factor(rbinom(100,3,.5)))
td <- tidy(cm2)
check_tidy(td, exp.row = 46, exp.col = 6)
})


test_that("tidy works for > 2 class confusion matrix with show_class = FALSE", {
cm2 <- caret::confusionMatrix(factor(rbinom(100,3,.5)),factor(rbinom(100,3,.5)))
td <- tidy(cm2, show_class = FALSE)
check_tidy(td, exp.row = 2, exp.col = 5)
})