Skip to content

Commit

Permalink
Updates from JW comments
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisfacer committed Jun 7, 2022
1 parent 5c43dca commit c169577
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 70 deletions.
81 changes: 39 additions & 42 deletions R/combinevariablesetsasbinary.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,17 @@
#' CombineVariableSetsAsBinary
#'
#' @description Combines a list of variable sets to binary variables, matching categories between them.
#' @param variable.set.list A list containing one or more variable sets which should be
#' Nominal, Ordinal, Nominal/Ordinal - Multi, Binary - Multi, or Binary - Multi (Compact)
#' @param ... One or more variable sets which should be Nominal, Ordinal, Nominal/Ordinal - Multi,
#' Binary - Multi, or Binary - Multi (Compact)
#' @param compute.for.incomplete A boolean value. If \code{FALSE}, cases with any missing data
#' will have a missing vlaue. If \code{TRUE}, only cases whose data is entirely missing will
#' be assigned a missing value.
#' @importFrom verbs Count AnyOf
#' @importFrom flipTransformations AsNumeric
#' @export
CombineVariableSetsAsBinary <- function(variable.set.list, compute.for.incomplete = TRUE) {
CombineVariableSetsAsBinary <- function(..., compute.for.incomplete = TRUE) {


if (is.data.frame(variable.set.list) || !is.list(variable.set.list)) {
stop("Input data should be supplied as a list.")
}
variable.set.list <- list(...)

# Check for duplicated labels which make life difficult when matching
duplicated.labels = lapply(variable.set.list, function(x) {
Expand All @@ -38,17 +35,17 @@ CombineVariableSetsAsBinary <- function(variable.set.list, compute.for.incomplet
if (is.factor(x)) {
levs = levels(x)
return(levs[duplicated(levs)])
} else {
return(colnames(x)[duplicated(colnames(x))])
}
}

colnames(x)[duplicated(colnames(x))]
})

n.duplicates = vapply(duplicated.labels, FUN = length, FUN.VALUE = numeric(1))

if (any(n.duplicates > 0)) {
dup.qs = names(duplicated.labels)[n.duplicates > 0]
dup.labels = duplicated.labels[n.duplicates > 0]
stop(paste0("The input data contains duplicate labels and cannot be matched. Duplicated labels: " , dup.labels[[1]]))
stop("The input data contains duplicate labels and cannot be matched. Duplicated labels: " , dup.labels[[1]])
}


Expand All @@ -66,39 +63,37 @@ CombineVariableSetsAsBinary <- function(variable.set.list, compute.for.incomplet

# If only one variable set then just return it
if (length(binary.versions) == 1) {
result <- binary.versions[[1]] == 1
} else {

# Check matching of column labels in binary data
all.labels = lapply(binary.versions, FUN = colnames)
unique.labels = unique(unlist(all.labels))
common.labels = unique.labels
for (j in seq_along(binary.versions)) {
common.labels = intersect(common.labels, colnames(binary.versions[[j]]))
}
if (!setequal(unique.labels, common.labels)) {
stop(paste0("Unable to match categories from the input data. The labels which do not appear in all inputs are: ", paste0(setdiff(unique.labels, common.labels), collapse = ",")))
}
return(binary.versions[[1]] == 1)
}

input.args = binary.versions
input.args[["match.elements"]] <- "Yes"
input.args[["elements.to.count"]] <- list(numeric = NA, categorical = NULL)
input.args[["ignore.missing"]] <- TRUE

# Count missing values for each case for each binary variable
n.missing <- do.call(Count, input.args)

# Combine the sets of binary variables using AnyOf
input.args[["elements.to.count"]] <- list(numeric = 1, categorical = NULL)
result <- do.call(AnyOf, input.args)

# Handle missing values
if (compute.for.incomplete) { # Only assign NA if all missing
result[n.missing == length(binary.versions)] <- NA
} else { # Assign NA if any missing
result[n.missing > 0] <- NA
}
# Check matching of column labels in binary data
all.labels = lapply(binary.versions, FUN = colnames)
unique.labels = unique(unlist(all.labels))
common.labels = Reduce(intersect, all.labels)

if (!setequal(unique.labels, common.labels)) {
stop("Unable to match categories from the input data. The labels which do not appear in all inputs are: ", paste0(setdiff(unique.labels, common.labels), collapse = ","))
}

input.args = binary.versions
input.args[["match.elements"]] <- "Yes"
input.args[["elements.to.count"]] <- list(numeric = NA, categorical = NULL)
input.args[["ignore.missing"]] <- TRUE

# Count missing values for each case for each binary variable
n.missing <- do.call(Count, input.args)

# Combine the sets of binary variables using AnyOf
input.args[["elements.to.count"]] <- list(numeric = 1, categorical = NULL)
result <- do.call(AnyOf, input.args)

# Handle missing values
if (compute.for.incomplete) { # Only assign NA if all missing
result[n.missing == length(binary.versions)] <- NA
} else { # Assign NA if any missing
result[n.missing > 0] <- NA
}

result
}

Expand Down Expand Up @@ -158,4 +153,6 @@ questionToBinary <- function(x) {
binary.version[is.na(x), ] <- NA
return(binary.version)
}

stop("Unsupported data type: ", question.type)
}
57 changes: 29 additions & 28 deletions tests/testthat/test-combinevariablesetsasbinary.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,75 +37,76 @@ Q4.pepsi.max = colas[, "q4f"]
attr(Q4.pepsi.max, "questiontype") <- "PickOne"
attr(Q4.pepsi.max, "dataset") <- "colas"

Q4.binary = CombineVariableSetsAsBinary(list(Q4))
Q4.binary = CombineVariableSetsAsBinary(Q4)
attr(Q4.binary, "questiontype") <- "PickAny"
attr(Q4.binary, "dataset") <- "colas"

Q4.binary.small = CombineVariableSetsAsBinary(list(Q4))
Q4.binary.small = CombineVariableSetsAsBinary(Q4)
attr(Q4.binary.small, "questiontype") <- "PickAny"
attr(Q4.binary.small, "dataset") <- "colas"

test_that("Single PickOne", {

asnumeric = flipTransformations::AsNumeric(Q4.pepsi.light, binary = TRUE)
colnames(asnumeric) = levels(Q4.pepsi.light)
asnumeric = asnumeric == 1
asnumeric = flipTransformations::AsNumeric(Q4.pepsi.light, binary = TRUE)
colnames(asnumeric) = levels(Q4.pepsi.light)
asnumeric = asnumeric == 1

expect_equal(CombineVariableSetsAsBinary(list(Q4.pepsi.light)), asnumeric)
expect_equal(CombineVariableSetsAsBinary(Q4.pepsi.light), asnumeric)

})

test_that("Two PickOnes", {

pepsi.light.numeric = flipTransformations::AsNumeric(Q4.pepsi.light, binary = TRUE)
colnames(pepsi.light.numeric) = levels(Q4.pepsi.light)
pepsi.max.numeric = flipTransformations::AsNumeric(Q4.pepsi.max, binary = TRUE)
colnames(pepsi.max.numeric) = levels(Q4.pepsi.max)
input.args = list(pepsi.light.numeric, pepsi.max.numeric)
input.args[["match.elements"]] <- "Yes"
pepsi.light.numeric = flipTransformations::AsNumeric(Q4.pepsi.light, binary = TRUE)
colnames(pepsi.light.numeric) = levels(Q4.pepsi.light)
pepsi.max.numeric = flipTransformations::AsNumeric(Q4.pepsi.max, binary = TRUE)
colnames(pepsi.max.numeric) = levels(Q4.pepsi.max)
input.args = list(pepsi.light.numeric, pepsi.max.numeric)
input.args[["match.elements"]] <- "Yes"
input.args[["elements.to.count"]] <- list(numeric = 1, categorical = NULL)
input.args[["ignore.missing"]] <- TRUE
pepsi.light.or.max = do.call(AnyOf, input.args)
pepsi.light.or.max = do.call(AnyOf, input.args)

expect_equal(CombineVariableSetsAsBinary(list(Q4.pepsi.light, Q4.pepsi.max)), pepsi.light.or.max)
expect_equal(CombineVariableSetsAsBinary(Q4.pepsi.light, Q4.pepsi.max), pepsi.light.or.max)
})

test_that("Many PickOnes are equivalent to a PickOneMulti", {

expect_equal(CombineVariableSetsAsBinary(list(Q4)), CombineVariableSetsAsBinary(list(colas[, "q4a"], colas[, "q4b"], colas[, "q4c"], colas[, "q4d"], colas[, "q4e"], colas[, "q4f"])))
expect_equal(CombineVariableSetsAsBinary(Q4),
CombineVariableSetsAsBinary(colas[, "q4a"], colas[, "q4b"], colas[, "q4c"], colas[, "q4d"], colas[, "q4e"], colas[, "q4f"]))

})

test_that("Combining PickOnes and Pick Any", {

expect_equal(Q4.binary, CombineVariableSetsAsBinary(list(Q4.binary.small, Q4.pepsi.light, Q4.pepsi.max)), check.attributes = FALSE)
expect_equal(Q4.binary, CombineVariableSetsAsBinary(Q4.binary.small, Q4.pepsi.light, Q4.pepsi.max), check.attributes = FALSE)

})

test_that("Pick Any returns same data", {

expect_equal(CombineVariableSetsAsBinary(list(Q4.binary)), Q4.binary, check.attributes = FALSE)
expect_equal(CombineVariableSetsAsBinary(list(Q4.binary, Q4.binary)), Q4.binary, check.attributes = FALSE)
expect_equal(CombineVariableSetsAsBinary(Q4.binary), Q4.binary, check.attributes = FALSE)
expect_equal(CombineVariableSetsAsBinary(Q4.binary, Q4.binary), Q4.binary, check.attributes = FALSE)

})


test_that("Missing data", {

input.args <- list(aided, unaided)
input.args[["match.elements"]] <- "Yes"
input.args <- list(aided, unaided)
input.args[["match.elements"]] <- "Yes"
input.args[["elements.to.count"]] <- list(numeric = NA, categorical = NULL)
input.args[["ignore.missing"]] <- TRUE

n.missing <- do.call(Count, input.args)
expect_true(all(is.na(CombineVariableSetsAsBinary(list(aided, unaided))[n.missing == 2])))
expect_true(all(is.na(CombineVariableSetsAsBinary(list(aided, unaided), compute.for.incomplete = FALSE)[n.missing > 0])))
n.missing <- do.call(Count, input.args)
expect_true(all(is.na(CombineVariableSetsAsBinary(aided, unaided)[n.missing == 2])))
expect_true(all(is.na(CombineVariableSetsAsBinary(aided, unaided, compute.for.incomplete = FALSE)[n.missing > 0])))
})

test_that("Error messages", {
aided.2 <- aided
colnames(aided.2)[11] <- "Telstra"
expect_error(CombineVariableSetsAsBinary(list(aided.2, unaided)), "duplicate")
colnames(aided.2)[11] <- "Phone company"
expect_error(CombineVariableSetsAsBinary(list(aided.2, unaided)), "Unable to match")
aided.2 <- aided
colnames(aided.2)[11] <- "Telstra"
expect_error(CombineVariableSetsAsBinary(aided.2, unaided), "duplicate")
colnames(aided.2)[11] <- "Phone company"
expect_error(CombineVariableSetsAsBinary(aided.2, unaided), "Unable to match")
})

0 comments on commit c169577

Please sign in to comment.