diff --git a/R/calibrate.R b/R/calibrate.R index 3065654..e61ae9e 100755 --- a/R/calibrate.R +++ b/R/calibrate.R @@ -135,7 +135,7 @@ categoricalTargets <- function(adjustment.variables, categorical.targets, subset tgt = categorical.targets[[i]] targets[[i]] = suppressWarnings(as.numeric(str_trim(tgt[, 2]))) names(targets[[i]]) = tgt[, 1] - adj.variable = if(is.null(subset)) adjustment.variables[[i]] else droplevels(adjustment.variables[[i]][subset]) + adj.variable = if(is.null(subset)) droplevels(adjustment.variables[[i]]) else droplevels(adjustment.variables[[i]][subset]) adj.unique = levels(adj.variable) missing.targets = ! adj.unique %in% tgt[, 1] varname = names(adjustment.variables)[i] @@ -323,6 +323,13 @@ trimWeight = function(weight, lower, upper) trimmedCalibrate <- function(adjustment.variables, margins, input.weight, lower, upper, trim.iterations, raking, package) { weight = computeCalibrate(adjustment.variables, margins, input.weight, raking, package) + # DS-3682: computeCalibrate produces weights which are not normalized to a mean of 1. + # As a result, the calculations below which compare the weight to upper and lower, + # which are user-specified bounds for a weight with a mean value of 1, do not produce + # the desired effect (typically the weight is either not trimmed, or trimming produces + # a weight where all values are identical because the original values were all below + # the lower bound. + weight = weight / mean(weight) trims = 0 prev_diff = Inf dif = diffCalculation(weight, lower, upper) @@ -334,6 +341,8 @@ trimmedCalibrate <- function(adjustment.variables, margins, input.weight, lower, trims = trims + 1 weight = trimWeight(weight, lower, upper) weight = computeCalibrate(adjustment.variables, margins, weight, raking, package) + # DS-3682 see above + weight = weight / mean(weight) prev_diff = dif dif = diffCalculation(weight, lower, upper) } diff --git a/tests/testthat/test-calibrate.R b/tests/testthat/test-calibrate.R index 34dd263..aa2acb2 100644 --- a/tests/testthat/test-calibrate.R +++ b/tests/testthat/test-calibrate.R @@ -306,3 +306,25 @@ test_that("DS-3458: Catch CVXR solver errors from bad input data", numeric.targets = targets), "check that the supplied targets are appropriate for your data.") }) + +test_that("DS-3646: Always drop empty levels when checking validity of targets", +{ + categorical.vars <- list(Gender = factor(c("Male", "Female", "Male", "Female")), + Age = factor(c("Old", "Old", "Young", "Young"), levels = c("Old", "Young", "Middle-aged"))) + targets <- list(Gender = rbind(c("Male", 0.5), c("Female", 0.5)), + Age = rbind(c("Old", 0.25), c("Young", 0.25), c("Middle-aged",0.5))) + expect_error(Calibrate(categorical.vars, targets), + "does not appear") + +}) + +test_that("DS-3682: Normalize rake weight before trimming", { + upper = 2 + lower = 0.3 + x = Calibrate(list(Age = input.age, Gender = input.gender), + list(Age = variable.targets.age, Gender = variable.targets.gender), + upper = upper, + lower = lower) + expect_equal(round(min(x), 7), 0.4089635) + expect_equal(round(max(x), 6), 2.148377) +})