Skip to content

Commit

Permalink
Add logic to handle column of NAs
Browse files Browse the repository at this point in the history
mice Imputation code would fail if a data.frame with a column of missing NA variables is passed in. Logic added to catch and remove a column of NAs and notify user of the removal with a warning
  • Loading branch information
jrwishart committed Nov 15, 2019
1 parent c1e99ce commit e404522
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 21 deletions.
20 changes: 9 additions & 11 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
Package: flipImputation
Type: Package
Title: Data frame imputation
Version: 1.0.0
Version: 1.0.1
Author: Displayr <opensource@displayr.com>
Maintainer: Displayr <opensource@displayr.com>
Description: Functions for imputing data using mice and hot.deck.
License: GPL-3
LazyData: TRUE
Imports:
hot.deck,
mice,
flipU
Suggests:
flipExampleData,
testthat
Remotes:
Displayr/flipU
RoxygenNote: 6.1.0
Imports: hot.deck,
mice,
flipU
Suggests: flipExampleData,
testthat
Remotes: Displayr/flipU
Encoding: UTF-8
RoxygenNote: 7.0.0
14 changes: 8 additions & 6 deletions R/imputation.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ Imputation <- function(data = NULL, formula = NULL, method = "try mice", m = 1,
}
FALSE
}
missing.variables <- apply(data, 2, function(x) all(is.na(x)))
if(any(missing.variables))
{
missing.variable.names <- paste0(names(data[missing.variables]), collapse = ", ")
warning("Data has variable(s) that are entirely missing values (all observed values of the variable are missing). ",
"These variable(s) have been removed from the analysis (", missing.variable.names, ").")
data <- data[, !missing.variables]
}
if(!any(is.na(data)))
return(lapply(seq(m), function(x) data))

Expand All @@ -54,12 +62,6 @@ Imputation <- function(data = NULL, formula = NULL, method = "try mice", m = 1,
dat.colnames <- NULL
imputed.data <- suppressWarnings(try(
{
# Require is used instead of Depends because using Depends
# implies that all downstream packages must also use Depends.
# This can substantially increase the load time
# as well as risk of conflicting names when the hierachy of
# dependencies is very deep
require("mice")
set.seed(seed)
dat.colnames <- colnames(data)
colnames(data) <- paste0("A", 1:ncol(data)) # need to replace names to avoid errors in mice v3.0.0
Expand Down
9 changes: 7 additions & 2 deletions man/Imputation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 11 additions & 2 deletions tests/testthat/test-imputation.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ test_that("Errors with missing values",
{
d <- data.frame(y = 1:10, x = 1:10)
expect_warning(Imputation(d, y ~ x, m = 1, seed = 1233), NA) # nothing missing
d$z <- rep(NA, nrow(d))
expect_warning(Imputation(d, y ~ ., m = 1, seed = 1233),
"Data has variable(s) that are entirely missing values (all observed values of the variable are missing). These variable(s) have been removed from the analysis (z).", fixed = TRUE)
d$w <- d$z
expect_warning(Imputation(d, y ~ ., m = 1, seed = 1233),
"Data has variable(s) that are entirely missing values (all observed values of the variable are missing). These variable(s) have been removed from the analysis (z, w).", fixed = TRUE)
d$z <- NULL
d$w <- NULL
d$y[2] <- NA
expect_warning(Imputation(d, y ~ x, m = 1, seed = 1233),
"Imputation has been selected, but the data has no missing values in the predictors, so nothing has been imputed.")
Expand Down Expand Up @@ -1019,7 +1027,8 @@ test_that("Rownames preserved",

test_that("No imputation needed",
{
expect_warning(z <- Imputation(seq(100)), NA)
expect_equal(z[[1]], seq(100))
x <- data.frame(x = seq(100))
expect_warning(z <- Imputation(x), NA)
expect_equal(z[[1]], x)
})

0 comments on commit e404522

Please sign in to comment.