Skip to content

Commit

Permalink
RS-9210: fix bug with merging similar value labels (#19)
Browse files Browse the repository at this point in the history
* Fix merging of values when labels are similar

* Bump version [ci skip]
  • Loading branch information
JustinCCYap committed Sep 29, 2021
1 parent 611a2eb commit cbace5f
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 10 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: flipData
Type: Package
Title: Functions for extracting and describing data
Version: 1.5.2
Version: 1.5.3
Author: Displayr <opensource@displayr.com>
Maintainer: Displayr <opensource@displayr.com>
Description: Functions for extracting data from formulas and
Expand Down
32 changes: 23 additions & 9 deletions R/mergedatasetsbycase.R
Original file line number Diff line number Diff line change
Expand Up @@ -2024,6 +2024,7 @@ mergeValueAttributes <- function(value.attributes.list,
match.parameters)
{
merged.val.attr <- numeric(0)
original.val.attr <- numeric(0)
value.map.list <- vector("list", length = length(value.attributes.list))
for (i in seq_along(value.attributes.list))
{
Expand All @@ -2038,13 +2039,15 @@ mergeValueAttributes <- function(value.attributes.list,
val <- labelValue(val.attr, lbl)
merged.val.attr <- mergeValueAndLabelIntoValueAttributes(val, lbl,
merged.val.attr,
original.val.attr,
map,
when.multiple.labels.for.one.value,
match.parameters)
map <- attr(merged.val.attr, "map")
}
if (nrow(map) > 0)
value.map.list[[i]] <- map
original.val.attr <- merged.val.attr
}
attr(merged.val.attr, "map") <- NULL

Expand Down Expand Up @@ -2077,6 +2080,10 @@ mergeValueAttributes <- function(value.attributes.list,
#' @param merged.val.attr Named numeric vector of (incomplete) value attributes
#' (values and labels) of the merged categorical variable. This is iteratively
#' added to with each call of this function and it starts out empty.
#' @param original.val.attr Named numeric vector of (incomplete) value attributes
#' (values and labels) of the merged categorical variable. This is the state of
#' the merged value attributes before the value attributes of the variable from
#' which val and lbl originate are merged in.
#' @param map Numeric matrix where each row represents a mapping from one value
#' to another. The first column contains the original values and the second column
#' contains the new values. The contents of this matrix are not used in the function,
Expand All @@ -2088,7 +2095,8 @@ mergeValueAttributes <- function(value.attributes.list,
#' @return Returns a possibly augmented merged.val.attr, with the attribute "map"
#' containing the matrix map.
#' @noRd
mergeValueAndLabelIntoValueAttributes <- function(val, lbl, merged.val.attr, map,
mergeValueAndLabelIntoValueAttributes <- function(val, lbl, merged.val.attr,
original.val.attr, map,
when.multiple.labels.for.one.value,
match.parameters)
{
Expand All @@ -2105,18 +2113,24 @@ mergeValueAndLabelIntoValueAttributes <- function(val, lbl, merged.val.attr, map
}
else
{
lbls.to.compare.against <- names(merged.val.attr)
match.percentages <- matchPercentagesForValueLabels(lbl = lbl,
lbls.to.compare.against = lbls.to.compare.against,
match.parameters = match.parameters)
is.fuzzy.match <- max(match.percentages) >= match.parameters$min.value.label.match.percentage &&
isNumbersPreserved(lbl, lbls.to.compare.against[which.max(match.percentages)])
if (length(original.val.attr) > 0)
{
lbls.to.compare.against <- names(original.val.attr)
match.percentages <- matchPercentagesForValueLabels(lbl = lbl,
lbls.to.compare.against = lbls.to.compare.against,
match.parameters = match.parameters)
is.fuzzy.match <- max(match.percentages) >= match.parameters$min.value.label.match.percentage &&
isNumbersPreserved(lbl, lbls.to.compare.against[which.max(match.percentages)])
}
else
is.fuzzy.match <- FALSE

if (is.fuzzy.match)
{
merged.val <- unname(merged.val.attr[which.max(match.percentages)])
merged.val <- unname(original.val.attr[which.max(match.percentages)])
if (merged.val != val)
{
map <- rbind(map, c(val, merged.val), deparse.level = 0) # use the value in merged.val.attr
map <- rbind(map, c(val, merged.val), deparse.level = 0) # use the value in original.val.attr
}
# else: similar label, same value, no action required as we treat
# them as the same and one of them is already in merged.val.attr
Expand Down
33 changes: 33 additions & 0 deletions tests/testthat/test-mergedatasetsbycase.R
Original file line number Diff line number Diff line change
Expand Up @@ -1141,3 +1141,36 @@ test_that("mergeIndicesList (indices to keep together specified)", {
c(2L, 7L)))
expect_equal(merged.indices, c(1L, 8L, 2L, 7L, 3L:6L))
})

test_that("RS-9210: check that values merge correctly", {
var.list.val.attr <- list(c(`I've heard only positive things` = 1,
`I've heard mainly positive things` = 2,
`I've heard a few positive and a few negative things` = 3,
`I've heard mainly negative things` = 4,
`I've heard only negative things` = 5),
c(`I've heard only positive things` = 1,
`I've heard mainly positive things` = 2,
`I've heard a few positive and a few negative things` = 3,
`I've heard mainly negative things` = 4,
`I've heard only negative things` = 5))
when.multiple.labels.for.one.value <- "Create new values for the labels"
match.parameters <- list(auto.select.what.to.match.by = TRUE,
match.by.variable.names = TRUE,
match.by.variable.labels = TRUE,
match.by.value.labels = TRUE,
ignore.case = TRUE,
ignore.non.alphanumeric = TRUE,
min.match.percentage = 90,
min.value.label.match.percentage = 90)

result <- mergeValueAttributes(var.list.val.attr,
when.multiple.labels.for.one.value,
match.parameters)
expect_equal(result$merged.value.attributes,
c(`I've heard only positive things` = 1,
`I've heard mainly positive things` = 2,
`I've heard a few positive and a few negative things` = 3,
`I've heard mainly negative things` = 4,
`I've heard only negative things` = 5))
expect_equal(result$value.map.list, list(NULL, NULL))
})

0 comments on commit cbace5f

Please sign in to comment.