Skip to content

Commit

Permalink
DS-83 Add message to footer for Dummy variable
Browse files Browse the repository at this point in the history
  • Loading branch information
flipDevTools committed Mar 6, 2020
1 parent 848b553 commit 03de2af
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 8 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: flipData
Type: Package
Title: Functions for extracting and describing data
Version: 1.2.5
Version: 1.2.6
Author: Displayr <opensource@displayr.com>
Maintainer: Displayr <opensource@displayr.com>
Description: Functions for extracting data from formulas and
Expand Down
30 changes: 26 additions & 4 deletions R/estimationdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ EstimationData <- function(formula = NULL,
.filter <- if (weighted) subset & weights > 0 & some.data else subset & some.data # Name to avoid bug in subset.data.frame
data.subset <- subset(data, .filter)
data.subset <- CopyAttributes(data.subset, data)

dummy.adjusted <- FALSE
##############
## Imputation
single.imputation <- missing == "Imputation (replace missing values with estimates)"
Expand Down Expand Up @@ -132,7 +132,10 @@ EstimationData <- function(formula = NULL,
stop(paste("Unknown 'missing' method:", missing)))
data.for.estimation <- CopyAttributes(data.for.estimation, data.subset)
if (missing == "Dummy variable adjustment")
{ # Dummy variable adjustment can be selected but sometimes not used
dummy.adjusted <- any(grepl(".dummy.var_GQ9KqD7YOf$", colnames(data.for.estimation)))
data.cols <- names(data.for.estimation) %in% names(data.subset)
}
else
data.cols <- rep(TRUE, ncol(data.for.estimation))
levels.pre <- paste0(rep(labels, vapply(data.for.estimation[data.cols], nlevels, 0L)), ": ",
Expand Down Expand Up @@ -163,11 +166,30 @@ EstimationData <- function(formula = NULL,
if (error.if.insufficient.obs && n.estimation < length(variable.names))
stop(gettextf("There are fewer observations (%d)%s(%d)", n.estimation,
" than there are variables ", length(variable.names)))

description <- SampleDescription(n.total, n.subset, n.estimation,
Labels(subset), weighted, weight.label, missing, imputation.label, m,
if(HasOutcome(formula)) "predictor" else "")

if(HasOutcome(formula)) "predictor" else "",
dummy.adjusted = dummy.adjusted)
# Add statements about removing cases with missing outcomes and/or all predictors missing
if (missing == "Dummy variable adjustment")
{
outcome.name <- OutcomeName(formula)
missing.outcomes <- is.na(data[[outcome.name]])
outcome.index <- which(names(data) == outcome.name)
if (ncol(data) == 2)
all.predictors.missing <- rep(FALSE, nrow(data))
else
all.predictors.missing <- apply(data[-outcome.index], 1, function(x) all(is.na(x)))
if (any(missing.outcomes | all.predictors.missing))
{
missing.outcomes <- if (any(missing.outcomes)) "an outcome variable" else NULL
all.predictors.missing <- if (any(all.predictors.missing)) "all predictor variables" else NULL
description <- paste0(description, paste0(" cases missing ",
paste0(c(missing.outcomes, all.predictors.missing),
collapse = " or missing "),
" have been excluded;"))
}
}
list(estimation.data = data.for.estimation,
weights = weights,
unfiltered.weights = unfiltered.weights,
Expand Down
5 changes: 4 additions & 1 deletion R/missingdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,10 @@ AddDummyVariablesForNAs <- function(data, outcome.name, checks = TRUE)
else
return(data)
}
cases.all.predictors.missing <- apply(predictor.df, 1, function(x) all(is.na(x)))
if (ncol(data) == 2)
cases.all.predictors.missing <- rep(FALSE, nrow(predictor.df))
else
cases.all.predictors.missing <- apply(predictor.df, 1, function(x) all(is.na(x)))

dummy.variable.df <- data.frame(dummy.variable.df, check.names = FALSE)
names(dummy.variable.df) <- paste0(names(dummy.variable.df), ".dummy.var_GQ9KqD7YOf")
Expand Down
53 changes: 51 additions & 2 deletions tests/testthat/test-estimationdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,65 @@ no.level.test <- data.frame(Y = c(1, 2, 2, 3, 3),
X1 = c(NA, 1, NA, 3, 4),
X2 = c(NA, 1, 2, 3, 4),
X3 = c(NA, 3, 2, 1, 4))
dummy.test <- data.frame(Y = c(1:10), X1 = c(NA, 2:10), X2 = c(1, NA, 3:10), X3 = c(1:2, NA, 4:10))

edge.case.dummy <- data.frame(Y = 1:10, X = 1:10)
edge.case.dummy.miss.outcome <- edge.case.dummy
edge.case.dummy.miss.outcome[1, 1] <- NA

edge.case.dummy.miss.pred <- edge.case.dummy
edge.case.dummy.miss.pred[1, 2] <- NA


test_that("Dummy variable adjustment", {
expect_warning(missing.level.output <- EstimationData(Y ~ X1 + X2 + X3, data = missing.level.test,
missing = "Dummy variable adjustment")$estimation.data,
missing = "Dummy variable adjustment"),
"Some categories do not appear in the data: 'Y: A'")
expect_equal(missing.level.output, expected.dummy.missing.level)
expect_equal(missing.level.output$estimation.data, expected.dummy.missing.level)
expect_warning(EstimationData(Y ~ X1 + X2, data = no.level.test), NA)
expect_error(EstimationData(Y ~ X1 + X2, no.level.test[1:4, ]),
"There are fewer observations (2) than there are variables (3)", fixed = TRUE)
dummy.test.output <- expect_error(EstimationData(Y ~ X1 + X2 + X3, data = dummy.test,
missing = "Dummy variable adjustment"),
NA)
expect_equal(dummy.test.output$description,
paste0("n = 10 cases used in estimation; ",
"missing values of predictor variables have been adjusted using ",
"dummy variables;"))
# Set one case to have missing outcome variable
dummy.test.missing.outcome <- dummy.test
dummy.test.missing.outcome[1, 1] <- NA
dummy.test.output.missing.outcome <- expect_error(EstimationData(Y ~ X1 + X2 + X3, data = dummy.test.missing.outcome,
missing = "Dummy variable adjustment"),
NA)
expect_equal(dummy.test.output.missing.outcome$description,
paste0("n = 9 cases used in estimation of a total sample size of 10; ",
"missing values of predictor variables have been adjusted using ",
"dummy variables; cases missing an outcome variable have been excluded;"))
# Remove all predictors in one case
dummy.test.with.missing.preds <- dummy.test
dummy.test.with.missing.preds[4, -1] <- NA
dummy.test.output.missing.predictors <- expect_error(EstimationData(Y ~ X1 + X2 + X3, data = dummy.test.with.missing.preds,
missing = "Dummy variable adjustment"),
NA)
expect_equal(dummy.test.output.missing.predictors$description,
paste0("n = 9 cases used in estimation of a total sample size of 10; ",
"missing values of predictor variables have been adjusted using ",
"dummy variables; cases missing all predictor variables have been excluded;"))
# Test edge case
edge.case.output <- expect_error(EstimationData(Y ~ X, data = edge.case.dummy.miss.outcome,
missing = "Dummy variable adjustment"),
NA)
expect_equal(edge.case.output$description,
paste0("n = 9 cases used in estimation of a total sample size of 10; ",
"cases missing an outcome variable have been excluded;"))
edge.case.output <- expect_error(EstimationData(Y ~ X, data = edge.case.dummy.miss.pred,
missing = "Dummy variable adjustment"),
NA)
expect_equal(edge.case.output$description,
paste0("n = 10 cases used in estimation; ",
"missing values of predictor variables have been adjusted using ",
"dummy variables;"))
})


13 changes: 13 additions & 0 deletions tests/testthat/test-missingdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ test_that("Missing options",
expect_equal(341, suppressWarnings(nrow(EstimationData(Overall ~ Branch, bank, missing = "Exclude cases with missing data")$estimation.data)))
expect_equal(823, nrow(EstimationData(Overall ~ Branch, bank, missing = "Use partial data")$estimation.data))
expect_equal(823, nrow(EstimationData(Overall ~ Branch, bank, missing = "Use partial data (pairwise correlations)")$estimation.data))
expet_equal(759, nrow(EstimationData(Overall ~ Fees + Interest, bank, missing = "Dummy variable adjustment")$estimation.data))
})

test_that("Infinity",
Expand Down Expand Up @@ -66,6 +67,11 @@ expected.missing.factor <- data.frame(Y = 1:3, X1 = factor(c(2, 2:3), labels = L
X1.dummy.var_GQ9KqD7YOf = c(1, 0, 0))
df.with.text <- data.frame(Y = 1:2, X1 = 1:2, X3 = c(NA, LETTERS[1]),
stringsAsFactors = FALSE)
edge.case <- data.frame(Y = 1:5, X = 1:5)

expected.edge.with.missing <- structure(list(Y = 1:5, X = c(4.5, 4.5, 4.5, 4, 5),
X.dummy.var_GQ9KqD7YOf = c(1L, 1L, 1L, 0L, 0L)),
class = "data.frame", row.names = c(NA, -5L))

test_that("Dummy variable adjustment", {
expect_identical(AddDummyVariablesForNAs(no.missing.df, outcome.name = "Y"),
Expand Down Expand Up @@ -101,4 +107,11 @@ test_that("Dummy variable adjustment", {

expect_error(AddDummyVariablesForNAs(df.with.text, outcome.name = "Y"),
"Unexpected class when using dummy variable adjustment.")
# Check edge case with one predictor
expect_identical(AddDummyVariablesForNAs(edge.case, outcome.name = "Y"),
edge.case)
# Recode some to missing in predictor
edge.case[1:3, 2] <- NA
expect_identical(AddDummyVariablesForNAs(edge.case, outcome.name = "Y"),
expected.edge.with.missing)
})

0 comments on commit 03de2af

Please sign in to comment.