Skip to content

Commit

Permalink
Merge pull request #30 from Displayr/DS-3784
Browse files Browse the repository at this point in the history
DS-3784 Bug fixes in CombineVariableSetsAsBinary
  • Loading branch information
chrisfacer committed Jun 23, 2022
2 parents 5d2a182 + b50982b commit c60132e
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 32 deletions.
76 changes: 44 additions & 32 deletions R/combinevariablesetsasbinary.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#' CombineVariableSetsAsBinary
#'
#' @description Combines a list of variable sets to binary variables, matching categories between them.
#' @param ... One or more variable sets which should be Nominal, Ordinal, Nominal/Ordinal - Multi,
#' @param ... One or more variable sets which should be Nominal, Ordinal, Nominal/Ordinal - Multi,
#' Binary - Multi, or Binary - Multi (Compact)
#' @param compute.for.incomplete A boolean value. If \code{FALSE}, cases with any missing data
#' will have a missing vlaue. If \code{TRUE}, only cases whose data is entirely missing will
Expand Down Expand Up @@ -32,17 +32,17 @@ CombineVariableSetsAsBinary <- function(..., compute.for.incomplete = TRUE, unma
if (is.null(question.type)) {
stop("This function should only be applied to variable sets in Displayr.")
}


if (question.type == "PickOneMulti") {
x = x[[1]]
}

if (is.factor(x)) {
levs = levels(x)
return(levs[duplicated(levs)])
}
}

colnames(x)[duplicated(colnames(x))]
})

Expand All @@ -54,32 +54,29 @@ CombineVariableSetsAsBinary <- function(..., compute.for.incomplete = TRUE, unma
stop("The input data contains duplicate labels and cannot be matched. Duplicated labels: " , dup.labels[[1]])
}


binary.versions <- lapply(variable.set.list, FUN = questionToBinary)

# Pick One - Multis will appear as a list.
# Append thoses lists to the main list and delete the
# corresponding original elements
for (j in 1:length(binary.versions)) {
if (!is.data.frame(binary.versions[[j]]) & is.list(binary.versions[[j]])) {
binary.versions <- c(binary.versions, binary.versions[[j]])
binary.versions[j] <- NULL
}
binary.versions <- flattenToSingleList(binary.versions)

n.cases <- vapply(binary.versions, FUN = NROW, FUN.VALUE = numeric(1))

if (!all(n.cases == n.cases[1])) {
stop("The number of cases is not the same in all of the input data.")
}

# If only one variable set then just return it
if (length(binary.versions) == 1) {
return(binary.versions[[1]] == 1)
}
}

# Check matching of column labels in binary data
all.labels = lapply(binary.versions, FUN = colnames)
unique.labels = unique(unlist(all.labels))
common.labels = Reduce(intersect, all.labels)

if (!setequal(unique.labels, common.labels)) {
binary.versions <- lapply(binary.versions,
FUN = fillInCategoriesWhenNotPresent,
binary.versions <- lapply(binary.versions,
FUN = fillInCategoriesWhenNotPresent,
expected.columns = unique.labels,
pick.any.all.missing = unmatched.pick.any.are.missing)
}
Expand All @@ -88,21 +85,25 @@ CombineVariableSetsAsBinary <- function(..., compute.for.incomplete = TRUE, unma
input.args[["match.elements"]] <- "Yes"
input.args[["elements.to.count"]] <- list(numeric = NA, categorical = NULL)
input.args[["ignore.missing"]] <- TRUE

# Count missing values for each case for each binary variable
n.missing <- do.call(Count, input.args)

# Combine the sets of binary variables using AnyOf
input.args[["elements.to.count"]] <- list(numeric = 1, categorical = NULL)
result <- do.call(AnyOf, input.args)

# Handle missing values
if (compute.for.incomplete) { # Only assign NA if all missing
result[n.missing == length(binary.versions)] <- NA
} else { # Assign NA if any missing
result[n.missing > 0] <- NA
}


# Replace blank level labels
c.names <- colnames(result)
c.names[c.names == "<BLANK>"] <- ""
colnames(result) <- c.names
result
}

Expand All @@ -114,33 +115,42 @@ isDefaultNet <- function(codes, unique.codes) {
}


flattenToSingleList <- function(input.list)
{
args <- lapply(input.list, function(x) if (is.list(x) && ! is.data.frame(x)) flattenToSingleList(x) else list(x))
do.call(c, args)
}

# Convert Displayr variable sets to binary.
# Factors are split out with one column per level
questionToBinary <- function(x) {
question.type = attr(x, "questiontype")

# Standalone factor variables can retain the "questiontype"
# value of "PickOneMulti" inherited from their parent
# question
if (is.factor(x)) {
if (is.factor(x)) {
question.type <- "PickOne"
levs <- levels(x)
levs[levs == ""] <- "<BLANK>" # Protecting against blank level labels. Will be put back later.
levels(x) <- levs
}

# Consider generalizing in future
if (is.null(question.type)) {
stop("This function should only be applied to variable sets in Displayr.")
}

if (question.type %in% c("PickAny", "PickAnyCompact")) {
# Identify and remove the NET column basedon the codeframe attribute
cf <- attr(x, "codeframe")
if (!is.null(cf)) {
unique.codes = unique(unlist(cf))
net.cols = vapply(cf, isDefaultNet, FUN.VALUE = logical(1), unique.codes = unique.codes)
x <- x[, !net.cols]
x <- x[, !net.cols]
}
attr(x, "originalquestiontype") <- "Pick Any"
return(x)
return(x)
}

# Each variable in a Pick One - Multi is split separately
Expand All @@ -159,7 +169,7 @@ questionToBinary <- function(x) {
if (is.ordered(x))
class(x) <- class(x)[class(x) != "ordered"]
binary.version <- AsNumeric(x, binary = TRUE, name = levels(x))
colnames(binary.version) <- levels(x)
colnames(binary.version) <- levels(x)
binary.version[is.na(x), ] <- NA
attr(binary.version, "originalquestiontype") <- "Pick One"
return(binary.version)
Expand All @@ -170,9 +180,9 @@ questionToBinary <- function(x) {

# Function to expand the number of columns in the binary data
# when there are fewer columns than expected. expected.columns
# should be a vector of column names.

# should be a vector of column names.
fillInCategoriesWhenNotPresent <- function(binary.data, expected.columns, pick.any.all.missing = TRUE) {

current.colnames <- colnames(binary.data)

if (all(expected.columns %in% current.colnames))
Expand Down Expand Up @@ -201,5 +211,7 @@ fillInCategoriesWhenNotPresent <- function(binary.data, expected.columns, pick.a

binary.data <- cbind(binary.data, new.data)
binary.data <- binary.data[, expected.columns]

binary.data
}
}

16 changes: 16 additions & 0 deletions tests/testthat/test-combinevariablesetsasbinary.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ test_that("Many PickOnes are equivalent to a PickOneMulti", {

})

test_that("Multliple PickOneMulti where one is the subset of the other", {
expect_equal(CombineVariableSetsAsBinary(Q4, Q4.small), CombineVariableSetsAsBinary(Q4))
})

test_that("Combining PickOnes and Pick Any", {

expect_equal(Q4.binary[, -ncol(Q4.binary)], CombineVariableSetsAsBinary(Q4.binary.small, Q4.pepsi.light, Q4.pepsi.max), check.attributes = FALSE)
Expand Down Expand Up @@ -137,4 +141,16 @@ test_that("Error messages", {
aided.2 <- aided
colnames(aided.2)[11] <- "Telstra"
expect_error(CombineVariableSetsAsBinary(aided.2, unaided), "duplicate")

test.case.1 <- factor(c("", "A", "B", "C","A", "B", "C"))
test.case.2 <- factor(c("A", "B", "C","A", "B", "C"), levels = c("", "A", "B", "C"))
expect_error(CombineVariableSetsAsBinary(test.case.1, test.case.2), "cases")
})

test_that("Blank factor labels", {
test.case.1 <- factor(c("", "A", "B", "C","A", "B", "C"))
test.case.2 <- factor(c("A", "A", "B", "C","A", "B", "C"), levels = c("", "A", "B", "C"))

expect_equal(colnames(CombineVariableSetsAsBinary(test.case.1, test.case.2)), c("", "A", "B", "C"))
})

0 comments on commit c60132e

Please sign in to comment.