From 92c995e335eeb8b1efa8644752a29d3c640ed3ab Mon Sep 17 00:00:00 2001 From: chrisfacer Date: Fri, 20 May 2022 14:46:52 +1000 Subject: [PATCH] DS-3646: Catching empty levels when Calibrate called outside widget (#26) * Empty levels should always be dropped [revdep skip] Empty levels should always be dropped when checking the categorical target variables. Previously, levels were not being dropped when the subset argument is NULL. However, when Calibrate calls categoricalTargets, the default value of subset is NULL, so that any empty categories would not be dropped. Empty categories cause errors when calling rake from the survey package. Users using the widget would never have encountered this error because the widget always passes a non-null value for subset. * Test error case * Ensure weights normalized prior to checking for trims Ensure weights normalized prior to checking for trims --- R/calibrate.R | 11 ++++++++++- tests/testthat/test-calibrate.R | 22 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/R/calibrate.R b/R/calibrate.R index 3065654..e61ae9e 100755 --- a/R/calibrate.R +++ b/R/calibrate.R @@ -135,7 +135,7 @@ categoricalTargets <- function(adjustment.variables, categorical.targets, subset tgt = categorical.targets[[i]] targets[[i]] = suppressWarnings(as.numeric(str_trim(tgt[, 2]))) names(targets[[i]]) = tgt[, 1] - adj.variable = if(is.null(subset)) adjustment.variables[[i]] else droplevels(adjustment.variables[[i]][subset]) + adj.variable = if(is.null(subset)) droplevels(adjustment.variables[[i]]) else droplevels(adjustment.variables[[i]][subset]) adj.unique = levels(adj.variable) missing.targets = ! adj.unique %in% tgt[, 1] varname = names(adjustment.variables)[i] @@ -323,6 +323,13 @@ trimWeight = function(weight, lower, upper) trimmedCalibrate <- function(adjustment.variables, margins, input.weight, lower, upper, trim.iterations, raking, package) { weight = computeCalibrate(adjustment.variables, margins, input.weight, raking, package) + # DS-3682: computeCalibrate produces weights which are not normalized to a mean of 1. + # As a result, the calculations below which compare the weight to upper and lower, + # which are user-specified bounds for a weight with a mean value of 1, do not produce + # the desired effect (typically the weight is either not trimmed, or trimming produces + # a weight where all values are identical because the original values were all below + # the lower bound. + weight = weight / mean(weight) trims = 0 prev_diff = Inf dif = diffCalculation(weight, lower, upper) @@ -334,6 +341,8 @@ trimmedCalibrate <- function(adjustment.variables, margins, input.weight, lower, trims = trims + 1 weight = trimWeight(weight, lower, upper) weight = computeCalibrate(adjustment.variables, margins, weight, raking, package) + # DS-3682 see above + weight = weight / mean(weight) prev_diff = dif dif = diffCalculation(weight, lower, upper) } diff --git a/tests/testthat/test-calibrate.R b/tests/testthat/test-calibrate.R index 34dd263..aa2acb2 100644 --- a/tests/testthat/test-calibrate.R +++ b/tests/testthat/test-calibrate.R @@ -306,3 +306,25 @@ test_that("DS-3458: Catch CVXR solver errors from bad input data", numeric.targets = targets), "check that the supplied targets are appropriate for your data.") }) + +test_that("DS-3646: Always drop empty levels when checking validity of targets", +{ + categorical.vars <- list(Gender = factor(c("Male", "Female", "Male", "Female")), + Age = factor(c("Old", "Old", "Young", "Young"), levels = c("Old", "Young", "Middle-aged"))) + targets <- list(Gender = rbind(c("Male", 0.5), c("Female", 0.5)), + Age = rbind(c("Old", 0.25), c("Young", 0.25), c("Middle-aged",0.5))) + expect_error(Calibrate(categorical.vars, targets), + "does not appear") + +}) + +test_that("DS-3682: Normalize rake weight before trimming", { + upper = 2 + lower = 0.3 + x = Calibrate(list(Age = input.age, Gender = input.gender), + list(Age = variable.targets.age, Gender = variable.targets.gender), + upper = upper, + lower = lower) + expect_equal(round(min(x), 7), 0.4089635) + expect_equal(round(max(x), 6), 2.148377) +})