Simplify propensity calculation

Remove "inverted" argument.
Nth-iteration-labs · Jul 25, 2020 · a2ffe17 · a2ffe17
1 parent 0424c54
commit a2ffe17
Show file tree

Hide file tree

Showing 4 changed files with 28 additions and 41 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -2,7 +2,8 @@ contextual 0.9.8.4
 ==================
 
 * Minor documentation updates.
-* Fix for Exp3 bug (thanks, @leferrad !)
+* Fix for Exp3 bug (thanks, @leferrad)
+* Cleanup of propensity score related code (thanks again, @leferrad)
 * Updated tests.
 
 
@@ -11,7 +12,7 @@ contextual 0.9.8.3
 
 * Tested and confirmed to be R 4.0.0 proof.
 * Minor documentation updates.
-* Now correctly restores global seed on completing a simulation (thanks, @pstansell !)
+* Now correctly restores global seed on completing a simulation (thanks, @pstansell)
 
 
 contextual 0.9.8.2

diff --git a/R/bandit_offline_doubly_robust.R b/R/bandit_offline_doubly_robust.R
@@ -9,15 +9,13 @@ OfflineDoublyRobustBandit <- R6::R6Class(
     n = NULL
   ),
   public = list(
-    inverted = NULL,
     threshold = NULL,
     class_name = "OfflineDoublyRobustBandit",
     initialize   = function(formula,
                             data, k = NULL, d = NULL,
                             unique = NULL, shared = NULL,
-                            inverted = FALSE, threshold = 0,
+                            threshold = 0,
                             randomize = TRUE) {
-      self$inverted  <- inverted
       self$threshold <- threshold
       super$initialize(formula,
                        data, k, d,
@@ -44,15 +42,11 @@ OfflineDoublyRobustBandit <- R6::R6Class(
       p                 <- private$p[index]
       indicator         <- ind(private$z[index] == choice)
       if (indicator) {
-        p   <- private$p[index]
-        if (self$inverted) p <- 1 / p
-        if (self$threshold > 0) {
-          if (isTRUE(self$inverted))  p <- 1 / p
-          p <- 1 / max(p,self$threshold)
-        } else {
-          if (!isTRUE(self$inverted)) p <- 1 / p
-        }
-        prop_reward       <- (data_reward - model_reward) * p
+
+        p <- max(private$p[index], self$threshold) # when threshold 0 (default)
+                                                   # p = private$p[index]
+        w <- 1 / p
+        prop_reward       <- (data_reward - model_reward) * w
       } else {
         prop_reward       <- 0
       }
@@ -77,7 +71,7 @@ OfflineDoublyRobustBandit <- R6::R6Class(
 #'   bandit <- OfflineDoublyRobustBandit(formula,
 #'                                       data, k = NULL, d = NULL,
 #'                                       unique = NULL, shared = NULL,
-#'                                       inverted = FALSE, randomize = TRUE)
+#'                                       randomize = TRUE)
 #' }
 #'
 #' @section Arguments:
@@ -121,9 +115,6 @@ OfflineDoublyRobustBandit <- R6::R6Class(
 #'   \item{\code{shared}}{
 #'     integer vector; index of shared features (optional)
 #'   }
-#'   \item{\code{inverted}}{
-#'     logical; have the propensities been inverted (1/p) or not (p)?
-#'   }
 #'   \item{\code{threshold}}{
 #'     float (0,1); Lower threshold or Tau on propensity score values. Smaller Tau makes for less biased
 #'     estimates with more variance, and vice versa. For more information, see paper by Strehl at all (2010).

diff --git a/R/bandit_offline_propensity_weighting.R b/R/bandit_offline_propensity_weighting.R
@@ -10,18 +10,15 @@ OfflinePropensityWeightingBandit <- R6::R6Class(
   ),
   public = list(
     class_name = "OfflinePropensityWeightingBandit",
-    inverted = NULL,
     threshold = NULL,
     drop_value = NULL,
     stabilized = NULL,
     initialize   = function(formula,
                             data, k = NULL, d = NULL,
                             unique = NULL, shared = NULL,
                             randomize = TRUE, replacement = FALSE,
-                            jitter = FALSE, arm_multiply = FALSE,
-                            inverted = FALSE, threshold = 0,
+                            jitter = FALSE, arm_multiply = FALSE, threshold = 0,
                             stabilized = TRUE, drop_unequal_arm = TRUE) {
-      self$inverted   <- inverted
       self$threshold  <- threshold
       self$stabilized <- stabilized
       if(isTRUE(drop_unequal_arm)) {
@@ -47,20 +44,22 @@ OfflinePropensityWeightingBandit <- R6::R6Class(
     },
     get_reward = function(index, context, action) {
       if (private$z[[index]] == action$choice) {
-        p   <- private$p[index]
-        if (self$threshold > 0) {
-          if (isTRUE(self$inverted)) p  <- 1 / p
-          p <- 1 / max(p,self$threshold)
-        } else {
-          if (!isTRUE(self$inverted)) p <- 1 / p
-        }
+
+        p <- max(private$p[index], self$threshold) # when threshold 0 (default)
+                                                   # p = private$p[index]
+
+        w <- 1 / p
+
         if (self$stabilized) {
+
           inc(private$n)     <- 1
-          inc(private$p_hat) <- (p - private$p_hat) / private$n
-          prop_reward          <- as.double((private$y[index]*p)/private$p_hat)
+          inc(private$p_hat) <- (w - private$p_hat) / private$n
+          prop_reward        <- as.double((private$y[index]*w)/private$p_hat)
+
         } else {
-          prop_reward          <- as.double(private$y[index]*p)
+          prop_reward        <- as.double(private$y[index]*w)
         }
+
         list(
           reward         = prop_reward,
           optimal_reward = ifelse(private$or, as.double(private$S$optimal_reward[[index]]), NA),
@@ -94,8 +93,7 @@ OfflinePropensityWeightingBandit <- R6::R6Class(
 #'                                              data, k = NULL, d = NULL,
 #'                                              unique = NULL, shared = NULL,
 #'                                              randomize = TRUE, replacement = TRUE,
-#'                                              jitter = TRUE, arm_multiply = TRUE,
-#'                                              inverted = FALSE)
+#'                                              jitter = TRUE, arm_multiply = TRUE)
 #' }
 #'
 #' @section Arguments:
@@ -134,10 +132,7 @@ OfflinePropensityWeightingBandit <- R6::R6Class(
 #'   \item{\code{arm_multiply}}{
 #'     logical; multiply the horizon by the number of arms (optional, default: TRUE)
 #'   }
-#'   \item{\code{inverted}}{
-#'     logical; have the propensity scores been weighted (optional, default: FALSE)
-#'   }
- #'  \item{\code{threshold}}{
+#'  \item{\code{threshold}}{
 #'     float (0,1); Lower threshold or Tau on propensity score values. Smaller Tau makes for less biased
 #'     estimates with more variance, and vice versa. For more information, see paper by Strehl at all (2010).
 #'     Values between 0.01 and 0.05 are known to work well.
@@ -168,7 +163,7 @@ OfflinePropensityWeightingBandit <- R6::R6Class(
 #' \describe{
 #'
 #'   \item{\code{new(formula, data, k = NULL, d = NULL, unique = NULL, shared = NULL, randomize = TRUE,
-#'                   replacement = TRUE, jitter = TRUE, arm_multiply = TRUE, inverted = FALSE)}}{
+#'                   replacement = TRUE, jitter = TRUE, arm_multiply = TRUE)}}{
 #'                   generates and instantializes a new \code{OfflinePropensityWeightingBandit} instance. }
 #'
 #'   \item{\code{get_context(t)}}{

diff --git a/demo/demo_simpsons_paradox_propensity.R b/demo/demo_simpsons_paradox_propensity.R
@@ -214,13 +214,13 @@ print(paste("Movie:",round(sum(prop_dt[choice==2]$reward)/nrow(prop_dt[choice==2
 #                                         stop.method = "es.mean", verbose=FALSE)
 # b_dt$choice                       <- b_dt$choice + 1
 #
-# weights                           <- get.weights(ip, stop.method = "es.mean")  # already inverted
+# weights                           <- get.weights(ip, stop.method = "es.mean")
 # b_dt$p                            <- weights
 #
 # f                                 <- formula("reward ~ choice | X.1 + X.2 | p")
 #
 # bandit                            <- OfflinePropensityWeightingBandit$new(formula = f, data = b_dt,
-#                                                                           k = 2 , d = 2, inverted = TRUE)
+#                                                                           k = 2 , d = 2)
 # policy                            <- EpsilonGreedyPolicy$new(0.1)
 # agent                             <- Agent$new(policy, bandit, "prop")
 #