Skip to content

Commit

Permalink
DS-3465: R Code for new Displayr Weighting dialog box (RS-8146)
Browse files Browse the repository at this point in the history
* Merge pull request #16 from Displayr/DS-3465
  • Loading branch information
mwmclean committed Aug 10, 2021
2 parents 8e44894 + e131eaf commit 4c92b84
Show file tree
Hide file tree
Showing 8 changed files with 406 additions and 7 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ inst/doc
# testthat artifact
tests/testthat/Rplots.pdf
tests/testthat/Combined data set.sav
inst/testdata/Cola stacked.sav
*.Rhistory
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: flipData
Type: Package
Title: Functions for extracting and describing data
Version: 1.4.0
Version: 1.5.0
Author: Displayr <opensource@displayr.com>
Maintainer: Displayr <opensource@displayr.com>
Description: Functions for extracting data from formulas and
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ export(SplitFormQuestions)
export(StackData)
export(TidyRawData)
export(WeightedSurveyDesign)
export(WeightingDialog)
importFrom(CVXR,Minimize)
importFrom(CVXR,Problem)
importFrom(CVXR,Variable)
Expand Down
12 changes: 8 additions & 4 deletions R/calibrate.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
#' @param lower A lower bound weight value (not guaranteed to be achieved).
#' @param upper An upper bound weight value (not guaranteed to be achieved).
#' @param trim.iterations The number of times to run the trim loop over the final weightings
#' @param always.calibrate If \code{FALSE}, whcih is the default,
#' @param always.calibrate If \code{FALSE}, which is the default,
#' problems with only categorical adjustment variables are solved via
#' iterative-proprtional fitting (raking). Otherwise, they are solved via calibration.
#' iterative-proportional fitting (raking). Otherwise, they are solved via calibration.
#' @param package The R package used to calibrate the model when raking is not conducted.
#' Defaults to \code{CVXR} (see https://cvxr.rbind.io/cvxr_examples/cvxr_survey_calibration/). Other options
#' are \code{icarus} and \code{survey}. .
Expand Down Expand Up @@ -124,6 +124,8 @@ convertToDataFrame <- function(x)
categoricalTargets <- function(adjustment.variables, categorical.targets, subset)
{
targets = list()
if (missing(subset))
subset <- rep(TRUE, nrow(adjustment.variables))
n.categorical = length(adjustment.variables)
if (n.categorical != length(categorical.targets)) {
stop("The number of categorical adjustment variables needs to be the same as the number of sets of targets (it isn't)")
Expand Down Expand Up @@ -175,6 +177,8 @@ numericTargets <- function(targets, adjustment.variables, numeric.targets, subse
{
n.categorical = length(targets)
n = NROW(adjustment.variables)
if (missing(subset))
subset <- rep(TRUE, n)
if (length(adjustment.variables) - n.categorical != length(numeric.targets))
{
stop("The number of numeric adjustment variables needs to be the same as the number of sets of targets (it isn't)")
Expand Down Expand Up @@ -358,7 +362,7 @@ print.Calibrate <- function (x, ...)
if (!is.null(product))
instruction.for.getting.variable <- "\n\nTo save the variable, click SAVE VARIABLE(S) > Save Weight Variable from Configuration"
ess = EffectiveSampleSize(x)
ess.percent = round(ess / length(x) * 100)
ess.percent = round(ess / length(x) * 100, 1)
n = length(x)
rng = range(x)

Expand All @@ -368,6 +372,6 @@ print.Calibrate <- function (x, ...)
FormatAsReal(ess, decimals = 0),
" (", ess.percent, "%)\n",
"Smallest weight is ", FormatAsReal(rng[1], decimals = 3), "\n",
"Largest weight is ", FormatAsReal(rng[2], decimals = 3), " (", FormatAsReal(rng[2] / rng[1], decimals = 1), " times the smallest weight)",
"Largest weight is ", FormatAsReal(rng[2], decimals = 3), " (", FormatAsReal(rng[2] / rng[1], decimals = 3), " times the smallest weight)",
instruction.for.getting.variable))
}
123 changes: 123 additions & 0 deletions R/weightingdialog.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#' \code{WeightsDialog}
#' @description Code for the Displayr Weighting dialog box.
#' @param categorical.variables An optional list or data frame of categorical adjustment variables.
#' @param categorical.targets The target probabilities for each category listed in \code{categorical.variables}
#' @param numeric.variables An optional list or data frame of categorical adjustment variables.
#' @param numeric.targets the target mean for each numeric variable in numeric.variables
#' @param lower A lower bound weight value (not guaranteed to be achieved).
#' @param upper An upper bound weight value (not guaranteed to be achieved).
#' @param calfun The calibration function: \code{"Raking"} (Default), \code{"Linear"}, or \code{"Logit"}.
#' @param input.weight An optional weight variable; if supplied, the created weight is created to be as close
#' to this input.weight as possible
#' @param force.to.n Force the sum of weights to equal the sample size.
#' @return numeric A vector of weights
#' @export
WeightingDialog <- function(categorical.variables = NULL,
categorical.targets = NULL,
numeric.variables = NULL,
numeric.targets = NULL,
lower = "",
upper = "",
calfun = c("Raking", "Linear", "Logit")[1],
force.to.n = TRUE,
input.weight = NULL)
{

# Preparing inputs
adjustment.variables = NULL
calfun = tolower(calfun)
targets = list()

if ((is.null(categorical.variables) || length(categorical.variables) == 0) && (is.null(numeric.variables) || length(numeric.variables) == 0)) {
stop("Nothing to do! At least one categorical OR numeric variable required.")
}

# Categorical inputs
n.categorical = if(is.null(categorical.variables)) 0 else NCOL(categorical.variables)
if (n.categorical > 0 )
{
adjustment.variables = convertToDataFrame(categorical.variables)
categorical.targets = if (is.null(categorical.targets) || is.list(categorical.targets)) categorical.targets else list(categorical.targets)
gross <- rep(NA, n.categorical)
for (i in 1:n.categorical)
{
nm <- as.numeric(categorical.targets[[i]][, 2])
gross[i] <- sum(nm)
categorical.targets[[i]][,2] <- prop.table(nm)
}
targets = categoricalTargets(adjustment.variables, categorical.targets)
}

# Numeric inputs
has.numerics <- !is.null(numeric.variables)
if (has.numerics)
{
num.adjustment.variables = convertToDataFrame(numeric.variables)
adjustment.variables = if (is.null(categorical.variables)) num.adjustment.variables else cbind(adjustment.variables, num.adjustment.variables)
targets = numericTargets(targets, adjustment.variables, numeric.targets)
}

# Adding input.weight or a proxy (and normalizing to a mean of Total / n)
n = NROW(adjustment.variables)
weight = if (is.null(input.weight)) rep(1, n) else input.weight / mean(input.weight)

# Removing empty factor levels
if (n.categorical > 0)
for (i in 1:n.categorical) # ordered = FALSE needed for weirdness in survey package
adjustment.variables[[i]] = factor(adjustment.variables[[i]], ordered = FALSE)

# Creating the table of margins/targets in the desired format
marg = createMargins(targets, adjustment.variables, n.categorical, FALSE, "survey")
# Calculating/updating the weight
wgt = computeWeightsDialog(adjustment.variables, has.numerics, marg, weight, lower, upper, calfun)
wgt / mean(wgt)
if (!force.to.n)
wgt <- wgt * gross[[1]] / n
class(wgt) <- "Calibrate"
wgt
}



# Calibration function
#' @importFrom survey calibrate rake
#' @importFrom stats model.matrix weights terms.formula
#' @importFrom CVXR Variable Minimize Problem entr solve
#' @importFrom verbs Sum
computeWeightsDialog <- function(adjustment.variables, has.numerics, margins, input.weight, lower, upper, calfun)
{
if (lower == "" & upper == "" & !has.numerics)
stop("This should be processed via the existing Q algorithm and this code should not have been called.")
# Bounds
lower = if (lower == "") lower = 0 else as.numeric(lower)
upper = if (upper == "") upper = Inf else as.numeric(upper)
if (calfun == "logit")
{
upper = min(1000, upper)# Avoiding having a denominator if Inf in Phi_R
# We are using CVXR to do logit just in case survey package doesn't scale well, so there is a fallback
# That is, we could have done it all in survey, but
formula = createFormula(adjustment.variables)
# wrapping formula with terms.formula fixes the
# "argument "frml" is missing, with no default"
# bug on the R server
X <- model.matrix(object = terms.formula(formula),
data = adjustment.variables)
A <- input.weight * X
n <- NROW(X)
g <- Variable(n)
constraints = list(t(A) %*% g == margins)
Phi_R = Minimize(sum(input.weight * (-entr((g - lower) / (upper - lower)) - (entr((upper - g) / (upper - lower))))))
p = Problem(Phi_R, constraints)
res = solve(p)
as.numeric(input.weight * res$getValue(g))
} else {
weights(calibrate(svydesign(ids = ~1, weights = ~input.weight, data = adjustment.variables),
createFormula(adjustment.variables),
maxit = 1000,
epsilon = 1e-8,
bounds = c(lower, upper),
population = margins,
calfun = calfun))
}
}

4 changes: 2 additions & 2 deletions man/Calibrate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 44 additions & 0 deletions man/WeightingDialog.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 4c92b84

Please sign in to comment.