Skip to content

Commit

Permalink
RS-10303: tolerate stacking of variable with different categories as …
Browse files Browse the repository at this point in the history
…long as they are consistent (#25)

* Accept different ordering of value attributes [revdep skip]

* Bug fix [revdep skip]

* Add tests [revdep skip]

* RS-10303: tolerate stacking of variable with different categories as long as they are consistent
  • Loading branch information
JustinCCYap authored Mar 22, 2022
1 parent d8e1cac commit 486cea6
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 5 deletions.
51 changes: 47 additions & 4 deletions R/stacking.R
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,7 @@ stackingSpecifiedByVariable <- function(manual.stacking,

# Check for mismatching variable types and value attributes
if (!allIdentical(v.types[removeNA(group.ind)]) ||
!allValueAttributesIdentical(v.val.attr[removeNA(group.ind)]))
!isValueAttributesMergable(v.val.attr[removeNA(group.ind)]))
{
warning("The manual stacking input '", input.text,
"' has been ignored as it contains variables with mismatching types or value attributes.")
Expand Down Expand Up @@ -911,7 +911,7 @@ stackingSpecifiedByObservation <- function(manual.stacking,
{
group.ind <- removeNA(manual.stacking.groups[i, ])
if (!allIdentical(v.types[group.ind]) ||
!allValueAttributesIdentical(v.val.attr[group.ind]))
!isValueAttributesMergable(v.val.attr[group.ind]))
{
warning("No manual stacking was conducted as the following variables to be stacked ",
"have mismatching types or value attributes: ",
Expand All @@ -922,6 +922,36 @@ stackingSpecifiedByObservation <- function(manual.stacking,
manual.stacking.groups
}

# Value attributes are mergable if they are consistent with each other
# (i.e. the value labels of their common values match).
isValueAttributesMergable <- function(v.val.attrs)
{
combined.val.attr <- do.call('c', v.val.attrs)
names(combined.val.attr) <- unlist(lapply(v.val.attrs,
function(v.val.attr) names(v.val.attr)),
use.names = FALSE)

# Check that there aren't multiple names per value
unique.vals <- unique(combined.val.attr)
for (v in unique.vals) {
nms <- unique(names(combined.val.attr)[v == combined.val.attr])
if (length(nms) > 1) {
return(FALSE)
}
}

# Check that there aren't multiple values per name
unique.names <- unique(names(combined.val.attr))
for (nm in unique.names) {
vals <- unique(combined.val.attr[nm == names(combined.val.attr)])
if (length(vals) > 1) {
return(FALSE)
}
}

return(TRUE)
}

# See unit tests for mergeCommonLabelAndManualStackingGroups in test-stacking.R
mergeCommonLabelAndManualStackingGroups <- function(common.label.stacking.groups,
manual.stacking.groups)
Expand Down Expand Up @@ -1060,8 +1090,8 @@ stackedDataSet <- function(input.data.set, input.data.set.metadata,
attr(v, "stacking.input.variable.names") <- input.v.names[group.ind]
attr(v, "stacking.input.variable.labels") <- input.v.labels[group.ind]
attr(v, "label") <- stackedVariableLabel(group.ind, input.v.labels, nm)
val.attr <- attr(input.data.set[[removeNA(group.ind)[1]]],
"labels", exact = TRUE)
val.attr <- stackedValueAttributes(group.ind,
input.data.set.metadata$variable.value.attributes)
if (!is.null(val.attr))
{
if (is.integer(v))
Expand Down Expand Up @@ -1181,6 +1211,19 @@ stackedVariableLabel <- function(group.ind, input.variable.labels, stacked.varia
trimws(paste(common.prefix, common.suffix))
}

# Assume that value attributes are mergable (isValueAttributesMergable or
# allValueAttributesIdentical has been run and returned TRUE)
stackedValueAttributes <- function(group.ind, input.value.attributes)
{
v.val.attrs <- input.value.attributes[removeNA(group.ind)]
combined.val.attrs <- do.call('c', v.val.attrs)
names(combined.val.attrs) <- unlist(lapply(v.val.attrs,
function(v.val.attr) names(v.val.attr)),
use.names = FALSE)
is.dup <- duplicated(combined.val.attrs)
sort(combined.val.attrs[!is.dup])
}

# Common prefix from a character vector of names.
# If whole.words is TRUE, the prefix is truncated so that it does not end
# halfway into a word or number.
Expand Down
32 changes: 31 additions & 1 deletion tests/testthat/test-stacking.R
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ test_that("stackingSpecifiedByVariable", {

# Incompatible value attributes
val.attr.2 <- 4:6
names(val.attr.2) <- letters[4:6]
names(val.attr.2) <- letters[1:3]
v.val.attr.2 <- v.val.attr
v.val.attr.2[[2]] <- val.attr.2
expect_warning(stacking.groups <- stackingSpecifiedByVariable(c("Q2_A-Q2_D", "Q3_*"),
Expand Down Expand Up @@ -663,3 +663,33 @@ test_that("parseVariableWildcard", {
"wildcard variable name has been correctly specified. ",
"Warning expected"), fixed = TRUE)
})

test_that("isValueAttributesMergable", {
val.attrs <- list(structure(1:3, .Names = c("A", "B", "C")),
structure(4:6, .Names = c("D", "E", "F")))
expect_true(isValueAttributesMergable(val.attrs))

val.attrs <- list(structure(1:3, .Names = c("A", "B", "C")),
structure(1:3, .Names = c("A", "B", "C")))
expect_true(isValueAttributesMergable(val.attrs))

val.attrs <- list(structure(1:3, .Names = c("A", "B", "C")),
structure(1:3, .Names = c("D", "E", "F")))
expect_false(isValueAttributesMergable(val.attrs))

val.attrs <- list(structure(1:3, .Names = c("A", "B", "C")),
structure(4:6, .Names = c("A", "B", "C")))
expect_false(isValueAttributesMergable(val.attrs))
})

test_that("stackedValueAttributes", {
val.attrs <- list(structure(4:6, .Names = c("D", "E", "F")),
structure(1:3, .Names = c("A", "B", "C")))
expect_equal(stackedValueAttributes(1:2, val.attrs),
structure(1:6, .Names = c("A", "B", "C", "D", "E", "F")))

val.attrs <- list(structure(1:3, .Names = c("A", "B", "C")),
structure(1:3, .Names = c("A", "B", "C")))
expect_equal(stackedValueAttributes(1:2, val.attrs),
structure(1:3, .Names = c("A", "B", "C")))
})

0 comments on commit 486cea6

Please sign in to comment.