DS-3465: R Code for new Displayr Weighting dialog box (RS-8146)

* Merge pull request #16 from Displayr/DS-3465
Displayr · Aug 10, 2021 · 4c92b84 · 4c92b84
2 parents 8e44894 + e131eaf
commit 4c92b84
Show file tree

Hide file tree

Showing 8 changed files with 406 additions and 7 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,4 +6,5 @@ inst/doc
 # testthat artifact
 tests/testthat/Rplots.pdf
 tests/testthat/Combined data set.sav
+inst/testdata/Cola stacked.sav
 *.Rhistory
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: flipData
 Type: Package
 Title: Functions for extracting and describing data
-Version: 1.4.0
+Version: 1.5.0
 Author: Displayr <opensource@displayr.com>
 Maintainer: Displayr <opensource@displayr.com>
 Description: Functions for extracting data from formulas and

diff --git a/NAMESPACE b/NAMESPACE
@@ -44,6 +44,7 @@ export(SplitFormQuestions)
 export(StackData)
 export(TidyRawData)
 export(WeightedSurveyDesign)
+export(WeightingDialog)
 importFrom(CVXR,Minimize)
 importFrom(CVXR,Problem)
 importFrom(CVXR,Variable)

diff --git a/R/calibrate.R b/R/calibrate.R
@@ -8,9 +8,9 @@
 #' @param lower A lower bound weight value (not guaranteed to be achieved).
 #' @param upper An upper bound weight value (not guaranteed to be achieved).
 #' @param trim.iterations The number of times to run the trim loop over the final weightings
-#' @param always.calibrate If \code{FALSE}, whcih is the default,
+#' @param always.calibrate If \code{FALSE}, which is the default,
 #' problems with only categorical adjustment variables are solved via
-#' iterative-proprtional fitting (raking). Otherwise, they are solved via calibration.
+#' iterative-proportional fitting (raking). Otherwise, they are solved via calibration.
 #' @param package The R package used to calibrate the model when raking is not conducted.
 #' Defaults to \code{CVXR} (see https://cvxr.rbind.io/cvxr_examples/cvxr_survey_calibration/). Other options
 #' are \code{icarus} and \code{survey}. .
@@ -124,6 +124,8 @@ convertToDataFrame <- function(x)
 categoricalTargets <- function(adjustment.variables, categorical.targets, subset)
 {
     targets = list()
+    if (missing(subset))
+        subset <- rep(TRUE, nrow(adjustment.variables))
     n.categorical = length(adjustment.variables)
     if (n.categorical != length(categorical.targets)) {
         stop("The number of categorical adjustment variables needs to be the same as the number of sets of targets (it isn't)")
@@ -175,6 +177,8 @@ numericTargets <- function(targets, adjustment.variables, numeric.targets, subse
 {
     n.categorical = length(targets)
     n = NROW(adjustment.variables)
+    if (missing(subset))
+        subset <- rep(TRUE, n)
     if (length(adjustment.variables) - n.categorical != length(numeric.targets))
     {
         stop("The number of numeric adjustment variables needs to be the same as the number of sets of targets (it isn't)")
@@ -358,7 +362,7 @@ print.Calibrate <- function (x, ...)
     if (!is.null(product))
         instruction.for.getting.variable <- "\n\nTo save the variable, click SAVE VARIABLE(S) > Save Weight Variable from Configuration"
     ess = EffectiveSampleSize(x)
-    ess.percent = round(ess / length(x) * 100)
+    ess.percent = round(ess / length(x) * 100, 1)
     n = length(x)
     rng = range(x)
 
@@ -368,6 +372,6 @@ print.Calibrate <- function (x, ...)
               FormatAsReal(ess, decimals = 0),
               " (",  ess.percent, "%)\n",
               "Smallest weight is ", FormatAsReal(rng[1], decimals = 3), "\n",
-              "Largest weight is ", FormatAsReal(rng[2], decimals = 3), " (", FormatAsReal(rng[2] / rng[1], decimals = 1), " times the smallest weight)",
+              "Largest weight is ", FormatAsReal(rng[2], decimals = 3), " (", FormatAsReal(rng[2] / rng[1], decimals = 3), " times the smallest weight)",
               instruction.for.getting.variable))
 }
diff --git a/R/weightingdialog.R b/R/weightingdialog.R
@@ -0,0 +1,123 @@
+#' \code{WeightsDialog}
+#' @description Code for the Displayr Weighting dialog box.
+#' @param categorical.variables An optional list or data frame of categorical adjustment variables.
+#' @param categorical.targets The target probabilities for each category listed in \code{categorical.variables}
+#' @param numeric.variables An optional list or data frame of categorical adjustment variables.
+#' @param numeric.targets the target mean for each numeric variable in numeric.variables
+#' @param lower A lower bound weight value (not guaranteed to be achieved).
+#' @param upper An upper bound weight value (not guaranteed to be achieved).
+#' @param calfun The calibration function: \code{"Raking"} (Default), \code{"Linear"}, or \code{"Logit"}.
+#' @param input.weight An optional weight variable; if supplied, the created weight is created to be as close
+#' to this input.weight as possible
+#' @param force.to.n Force the sum of weights to equal the sample size.
+#' @return numeric A vector of weights
+#' @export
+WeightingDialog <- function(categorical.variables = NULL,
+                      categorical.targets = NULL,
+                      numeric.variables = NULL,
+                      numeric.targets = NULL,
+                      lower = "",
+                      upper = "",
+                      calfun = c("Raking", "Linear", "Logit")[1],
+                      force.to.n = TRUE,
+                      input.weight = NULL)
+{
+
+    # Preparing inputs
+    adjustment.variables = NULL
+    calfun = tolower(calfun)
+    targets = list()
+
+    if ((is.null(categorical.variables) || length(categorical.variables) == 0) && (is.null(numeric.variables) || length(numeric.variables) == 0)) {
+        stop("Nothing to do! At least one categorical OR numeric variable required.")
+    }
+
+    # Categorical inputs
+    n.categorical = if(is.null(categorical.variables)) 0 else NCOL(categorical.variables)
+    if (n.categorical > 0 )
+    {
+        adjustment.variables = convertToDataFrame(categorical.variables)
+        categorical.targets = if (is.null(categorical.targets) || is.list(categorical.targets)) categorical.targets else list(categorical.targets)
+        gross <- rep(NA, n.categorical)
+        for (i in 1:n.categorical)
+        {
+            nm <- as.numeric(categorical.targets[[i]][, 2])
+            gross[i] <- sum(nm)
+            categorical.targets[[i]][,2] <- prop.table(nm)
+        }
+        targets = categoricalTargets(adjustment.variables, categorical.targets)
+    }
+
+    # Numeric inputs
+    has.numerics <- !is.null(numeric.variables)
+    if (has.numerics)
+    {
+        num.adjustment.variables = convertToDataFrame(numeric.variables)
+        adjustment.variables = if (is.null(categorical.variables)) num.adjustment.variables else cbind(adjustment.variables, num.adjustment.variables)
+        targets = numericTargets(targets, adjustment.variables, numeric.targets)
+    }
+
+    # Adding input.weight or a proxy (and normalizing to a mean of Total / n)
+    n = NROW(adjustment.variables)
+    weight = if (is.null(input.weight)) rep(1, n) else input.weight / mean(input.weight)
+
+    # Removing empty factor levels
+    if (n.categorical > 0)
+        for (i in 1:n.categorical) # ordered = FALSE needed for weirdness in survey package
+            adjustment.variables[[i]] = factor(adjustment.variables[[i]], ordered = FALSE)
+
+    # Creating the table of margins/targets in the desired format
+    marg = createMargins(targets, adjustment.variables, n.categorical, FALSE, "survey")
+    # Calculating/updating the weight
+    wgt = computeWeightsDialog(adjustment.variables, has.numerics, marg, weight, lower, upper, calfun)
+    wgt / mean(wgt)
+    if (!force.to.n)
+        wgt <- wgt * gross[[1]] / n
+    class(wgt) <- "Calibrate"
+    wgt
+}
+
+
+
+# Calibration function
+#' @importFrom survey calibrate rake
+#' @importFrom stats model.matrix weights terms.formula
+#' @importFrom CVXR Variable Minimize Problem entr solve
+#' @importFrom verbs Sum
+computeWeightsDialog <- function(adjustment.variables, has.numerics, margins, input.weight, lower, upper, calfun)
+{
+    if (lower == "" & upper == "" & !has.numerics)
+        stop("This should be processed via the existing Q algorithm and this code should not have been called.")
+    # Bounds
+    lower = if (lower == "") lower = 0 else as.numeric(lower)
+    upper = if (upper == "") upper = Inf else as.numeric(upper)
+    if (calfun == "logit")
+    {
+        upper = min(1000, upper)# Avoiding having a denominator if Inf in Phi_R
+        # We are using CVXR to do logit just in case survey package doesn't scale well, so there is a fallback
+        # That is, we could have done it all in survey, but
+            formula = createFormula(adjustment.variables)
+            # wrapping formula with terms.formula fixes the
+            # "argument "frml" is missing, with no default"
+            # bug on the R server
+            X <- model.matrix(object = terms.formula(formula),
+                              data = adjustment.variables)
+            A <- input.weight * X
+            n <- NROW(X)
+            g <- Variable(n)
+            constraints = list(t(A) %*% g == margins)
+            Phi_R = Minimize(sum(input.weight * (-entr((g - lower) / (upper - lower)) - (entr((upper - g) / (upper - lower))))))
+            p = Problem(Phi_R, constraints)
+            res = solve(p)
+            as.numeric(input.weight * res$getValue(g))
+    } else {
+        weights(calibrate(svydesign(ids = ~1, weights = ~input.weight, data = adjustment.variables),
+                          createFormula(adjustment.variables),
+                          maxit = 1000,
+                          epsilon = 1e-8,
+                          bounds = c(lower, upper),
+                          population = margins,
+                          calfun = calfun))
+    }
+}
+
diff --git a/man/Calibrate.Rd b/man/Calibrate.Rd
diff --git a/man/WeightingDialog.Rd b/man/WeightingDialog.Rd