Displayr · mwmclean · May 20, 2022 · May 17, 2022 · May 17, 2022 · May 20, 2022
diff --git a/R/calibrate.R b/R/calibrate.R
@@ -135,7 +135,7 @@ categoricalTargets <- function(adjustment.variables, categorical.targets, subset
         tgt = categorical.targets[[i]]
         targets[[i]] = suppressWarnings(as.numeric(str_trim(tgt[, 2])))
         names(targets[[i]]) = tgt[, 1]
-        adj.variable = if(is.null(subset)) adjustment.variables[[i]] else droplevels(adjustment.variables[[i]][subset])
+        adj.variable = if(is.null(subset)) droplevels(adjustment.variables[[i]]) else droplevels(adjustment.variables[[i]][subset])
         adj.unique = levels(adj.variable)
         missing.targets = ! adj.unique %in% tgt[, 1]
         varname = names(adjustment.variables)[i]
@@ -323,6 +323,13 @@ trimWeight = function(weight, lower, upper)
 trimmedCalibrate <- function(adjustment.variables, margins, input.weight, lower, upper, trim.iterations, raking, package)
 {
     weight = computeCalibrate(adjustment.variables, margins, input.weight, raking, package)
+    # DS-3682: computeCalibrate produces weights which are not normalized to a mean of 1.
+    # As a result, the calculations below which compare the weight to upper and lower,
+    # which are user-specified bounds for a weight with a mean value of 1, do not produce
+    # the desired effect (typically the weight is either not trimmed, or trimming produces
+    # a weight where all values are identical because the original values were all below
+    # the lower bound.
+    weight = weight / mean(weight)
     trims = 0
     prev_diff = Inf
     dif = diffCalculation(weight, lower, upper)
@@ -334,6 +341,8 @@ trimmedCalibrate <- function(adjustment.variables, margins, input.weight, lower,
         trims = trims + 1
         weight = trimWeight(weight, lower, upper)
         weight = computeCalibrate(adjustment.variables, margins, weight, raking, package)
+        # DS-3682 see above
+        weight = weight / mean(weight)
         prev_diff = dif
         dif = diffCalculation(weight, lower, upper)
     }

diff --git a/tests/testthat/test-calibrate.R b/tests/testthat/test-calibrate.R
@@ -306,3 +306,25 @@ test_that("DS-3458: Catch CVXR solver errors from bad input data",
                            numeric.targets = targets),
                  "check that the supplied targets are appropriate for your data.")
 })
+
+test_that("DS-3646: Always drop empty levels when checking validity of targets",
+{
+    categorical.vars <- list(Gender = factor(c("Male", "Female", "Male", "Female")),
+                             Age = factor(c("Old", "Old", "Young", "Young"), levels = c("Old", "Young", "Middle-aged")))
+    targets <- list(Gender = rbind(c("Male", 0.5), c("Female", 0.5)),
+                    Age = rbind(c("Old", 0.25), c("Young", 0.25), c("Middle-aged",0.5)))
+    expect_error(Calibrate(categorical.vars, targets),
+                 "does not appear")
+
+})
+
+test_that("DS-3682: Normalize rake weight before trimming", {
+    upper = 2
+    lower = 0.3
+    x = Calibrate(list(Age = input.age, Gender = input.gender), 
+                  list(Age = variable.targets.age, Gender = variable.targets.gender), 
+                  upper = upper, 
+                  lower = lower)
+    expect_equal(round(min(x), 7), 0.4089635)
+    expect_equal(round(max(x), 6), 2.148377)
+})