Skip to content

Commit

Permalink
Remove warning for duplicate names
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisfacer committed Jan 9, 2023
1 parent 096e190 commit 1197563
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 25 deletions.
2 changes: 0 additions & 2 deletions R/mergingandstackingutilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -532,8 +532,6 @@ sanitizeSPSSVariableNames <- function(variable.names) {
dupes <- duplicated(tolower(variable.names))
if (any(dupes)) {
dupe.ind <- which(dupes)
warning("Some variable names were duplicated after cleaning and have been renamed: ",
paste0(unique(variable.names[dupes]), collapse = ", "))
for (i in dupe.ind) {
variable.names[i] <- uniqueName(variable.names[i],
existing.names = variable.names,
Expand Down
37 changes: 15 additions & 22 deletions tests/testthat/test-mergedatasetsbycase.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,30 +115,28 @@ test_that("Automatically determine match", {
})

test_that("Matching by variable labels", {
expect_warning(result <- MergeDataSetsByCase(data.set.names = c(findInstDirFile("cola1.sav"),
result <- MergeDataSetsByCase(data.set.names = c(findInstDirFile("cola1.sav"),
findInstDirFile("cola7.sav"),
findInstDirFile("cola8.sav")),
auto.select.what.to.match.by = FALSE,
match.by.variable.names = FALSE,
match.by.variable.labels = TRUE,
match.by.value.labels = FALSE,
include.merged.data.set.in.output = TRUE),
"Some variable names were duplicated after cleaning and have been renamed:")
include.merged.data.set.in.output = TRUE)
# Q1_F_c matched despite variable names being different
expect_true(all(!is.na(result$merged.data.set$Q1_F_c)))
})

test_that("Matching by value labels", {
expect_warning(result <- MergeDataSetsByCase(data.set.names = c(findInstDirFile("cola1.sav"),
result <- MergeDataSetsByCase(data.set.names = c(findInstDirFile("cola1.sav"),
findInstDirFile("cola7.sav"),
findInstDirFile("cola8.sav")),
auto.select.what.to.match.by = FALSE,
match.by.variable.names = FALSE,
match.by.variable.labels = FALSE,
match.by.value.labels = TRUE,
include.merged.data.set.in.output = TRUE,
variables.to.omit = "Q1_E_c1-Q1_B_c1"),
"Some variable names were duplicated after cleaning and have been renamed:")
variables.to.omit = "Q1_E_c1-Q1_B_c1")
# Q1_F_c matched despite variable names being different
expect_true(all(!is.na(result$merged.data.set$Q1_F_c)))
})
Expand Down Expand Up @@ -166,15 +164,14 @@ test_that("Ignore non-alphanumeric characters when matching", {
})

test_that("Minimum match percentage", {
expect_warning(result <- MergeDataSetsByCase(data.set.names = c(findInstDirFile("cola10.sav"),
result <- MergeDataSetsByCase(data.set.names = c(findInstDirFile("cola10.sav"),
findInstDirFile("cola12.sav")),
auto.select.what.to.match.by = FALSE,
match.by.variable.names = FALSE,
match.by.variable.labels = TRUE,
match.by.value.labels = FALSE,
min.match.percentage = 90,
include.merged.data.set.in.output = TRUE),
"Some variable names were duplicated after cleaning and have been renamed:")
include.merged.data.set.in.output = TRUE)
# Q4_A_3 not merged together since "Coke" differs from "Coca Cola" beyond
# the min match percentage
expect_true("Q4_A_3_1" %in% names(result$merged.data.set))
Expand Down Expand Up @@ -244,12 +241,11 @@ test_that("Manually combine variables by specifying names of variables to combin
})

test_that("Manually combine variables by specifying names of variables (with data set index) to combine", {
expect_warning(result <- MergeDataSetsByCase(data.set.names = c(findInstDirFile("cola1.sav"),
result <- MergeDataSetsByCase(data.set.names = c(findInstDirFile("cola1.sav"),
findInstDirFile("cola2.sav"),
findInstDirFile("cola4.sav")),
include.merged.data.set.in.output = TRUE,
variables.to.combine = "Q3_3_new_name(3),Q3_3(2)"),
"Some variable names were duplicated after cleaning and have been renamed:")
include.merged.data.set.in.output = TRUE,
variables.to.combine = "Q3_3_new_name(3),Q3_3(2)")
merged.data.set <- result$merged.data.set
expect_true("Q3_3" %in% names(merged.data.set))
# new variable created with Q3_3 from data set 1 since only Q3_3 from
Expand Down Expand Up @@ -366,11 +362,10 @@ test_that("Error when two variables to be combined are found in the same data se
})

test_that("Variables to not combine", {
expect_warning(result <- MergeDataSetsByCase(data.set.names = c(findInstDirFile("cola1.sav"),
result <- MergeDataSetsByCase(data.set.names = c(findInstDirFile("cola1.sav"),
findInstDirFile("cola2.sav"),
findInstDirFile("cola3.sav")),
variables.to.not.combine = "Q3"),
"Some variable names were duplicated after cleaning and have been renamed:")
variables.to.not.combine = "Q3")
expect_true(all(c("Q3", "Q3_1", "Q3_2") %in% result$merged.data.set.metadata$variable.names))
})

Expand Down Expand Up @@ -461,12 +456,11 @@ test_that("Variable type conversion (text to date)", {
})

test_that("Non-combinable variables", {
expect_warning(result <- MergeDataSetsByCase(data.set.names = c(findInstDirFile("cola3.sav"),
result <- MergeDataSetsByCase(data.set.names = c(findInstDirFile("cola3.sav"),
findInstDirFile("cola7.sav")),
match.by.variable.names = TRUE,
match.by.variable.labels = FALSE,
match.by.value.labels = FALSE),
"Some variable names were duplicated after cleaning and have been renamed:")
match.by.value.labels = FALSE)
# Q3_3 in the two datasets cannot be combined as the latter is a text
# variable with many different values. So a new variable Q3_3_1 is created
# immediately below Q3_3
Expand Down Expand Up @@ -1102,9 +1096,8 @@ test_that("mergedVariableNames (variable renamed due to name conflict)", {
# Q1 appears in 2 rows so the second one gets renamed
matched.names <- matrix(c("Q1", NA_character_, NA_character_, "Q3",
NA_character_, "Q1", "Q2", "Q3B"), ncol = 2)
expect_warning(merged.names <- mergedVariableNames(matched.names = matched.names,
use.names.and.labels.from = "First data set"),
"Some variable names were duplicated after cleaning and have been renamed:")
merged.names <- mergedVariableNames(matched.names = matched.names,
use.names.and.labels.from = "First data set")
expect_equal(merged.names,
structure(c("Q1", "Q1_1", "Q2", "Q3"),
renamed.variables = structure(c("Q1", "Q1_1"), .Dim = 1:2,
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-mergingandstackingutilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,6 @@ test_that("DS-4210: SPSS variable names sanitized before attempting to save", {
# Prevent duplicates
bad.names <- c("A", "B", "WITH", "A", "B", "WITH")
expect_warning(z <- sanitizeSPSSVariableNames(bad.names),
"Some variable names were duplicated after cleaning and have been renamed:")
"Cannot save variables whose names are SPSS reserved keywords")
expect_equal(z, c("A", "B", "WITH_r", "A_1", "B_1", "WITH_r_1"))
})

0 comments on commit 1197563

Please sign in to comment.