Skip to content

Commit

Permalink
Simplify propensity calculation
Browse files Browse the repository at this point in the history
Remove "inverted" argument.
  • Loading branch information
robinvanemden committed Jul 25, 2020
1 parent 0424c54 commit a2ffe17
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 41 deletions.
5 changes: 3 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ contextual 0.9.8.4
==================

* Minor documentation updates.
* Fix for Exp3 bug (thanks, @leferrad !)
* Fix for Exp3 bug (thanks, @leferrad)
* Cleanup of propensity score related code (thanks again, @leferrad)
* Updated tests.


Expand All @@ -11,7 +12,7 @@ contextual 0.9.8.3

* Tested and confirmed to be R 4.0.0 proof.
* Minor documentation updates.
* Now correctly restores global seed on completing a simulation (thanks, @pstansell !)
* Now correctly restores global seed on completing a simulation (thanks, @pstansell)


contextual 0.9.8.2
Expand Down
23 changes: 7 additions & 16 deletions R/bandit_offline_doubly_robust.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,13 @@ OfflineDoublyRobustBandit <- R6::R6Class(
n = NULL
),
public = list(
inverted = NULL,
threshold = NULL,
class_name = "OfflineDoublyRobustBandit",
initialize = function(formula,
data, k = NULL, d = NULL,
unique = NULL, shared = NULL,
inverted = FALSE, threshold = 0,
threshold = 0,
randomize = TRUE) {
self$inverted <- inverted
self$threshold <- threshold
super$initialize(formula,
data, k, d,
Expand All @@ -44,15 +42,11 @@ OfflineDoublyRobustBandit <- R6::R6Class(
p <- private$p[index]
indicator <- ind(private$z[index] == choice)
if (indicator) {
p <- private$p[index]
if (self$inverted) p <- 1 / p
if (self$threshold > 0) {
if (isTRUE(self$inverted)) p <- 1 / p
p <- 1 / max(p,self$threshold)
} else {
if (!isTRUE(self$inverted)) p <- 1 / p
}
prop_reward <- (data_reward - model_reward) * p

p <- max(private$p[index], self$threshold) # when threshold 0 (default)
# p = private$p[index]
w <- 1 / p
prop_reward <- (data_reward - model_reward) * w
} else {
prop_reward <- 0
}
Expand All @@ -77,7 +71,7 @@ OfflineDoublyRobustBandit <- R6::R6Class(
#' bandit <- OfflineDoublyRobustBandit(formula,
#' data, k = NULL, d = NULL,
#' unique = NULL, shared = NULL,
#' inverted = FALSE, randomize = TRUE)
#' randomize = TRUE)
#' }
#'
#' @section Arguments:
Expand Down Expand Up @@ -121,9 +115,6 @@ OfflineDoublyRobustBandit <- R6::R6Class(
#' \item{\code{shared}}{
#' integer vector; index of shared features (optional)
#' }
#' \item{\code{inverted}}{
#' logical; have the propensities been inverted (1/p) or not (p)?
#' }
#' \item{\code{threshold}}{
#' float (0,1); Lower threshold or Tau on propensity score values. Smaller Tau makes for less biased
#' estimates with more variance, and vice versa. For more information, see paper by Strehl at all (2010).
Expand Down
37 changes: 16 additions & 21 deletions R/bandit_offline_propensity_weighting.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,15 @@ OfflinePropensityWeightingBandit <- R6::R6Class(
),
public = list(
class_name = "OfflinePropensityWeightingBandit",
inverted = NULL,
threshold = NULL,
drop_value = NULL,
stabilized = NULL,
initialize = function(formula,
data, k = NULL, d = NULL,
unique = NULL, shared = NULL,
randomize = TRUE, replacement = FALSE,
jitter = FALSE, arm_multiply = FALSE,
inverted = FALSE, threshold = 0,
jitter = FALSE, arm_multiply = FALSE, threshold = 0,
stabilized = TRUE, drop_unequal_arm = TRUE) {
self$inverted <- inverted
self$threshold <- threshold
self$stabilized <- stabilized
if(isTRUE(drop_unequal_arm)) {
Expand All @@ -47,20 +44,22 @@ OfflinePropensityWeightingBandit <- R6::R6Class(
},
get_reward = function(index, context, action) {
if (private$z[[index]] == action$choice) {
p <- private$p[index]
if (self$threshold > 0) {
if (isTRUE(self$inverted)) p <- 1 / p
p <- 1 / max(p,self$threshold)
} else {
if (!isTRUE(self$inverted)) p <- 1 / p
}

p <- max(private$p[index], self$threshold) # when threshold 0 (default)
# p = private$p[index]

w <- 1 / p

if (self$stabilized) {

inc(private$n) <- 1
inc(private$p_hat) <- (p - private$p_hat) / private$n
prop_reward <- as.double((private$y[index]*p)/private$p_hat)
inc(private$p_hat) <- (w - private$p_hat) / private$n
prop_reward <- as.double((private$y[index]*w)/private$p_hat)

} else {
prop_reward <- as.double(private$y[index]*p)
prop_reward <- as.double(private$y[index]*w)
}

list(
reward = prop_reward,
optimal_reward = ifelse(private$or, as.double(private$S$optimal_reward[[index]]), NA),
Expand Down Expand Up @@ -94,8 +93,7 @@ OfflinePropensityWeightingBandit <- R6::R6Class(
#' data, k = NULL, d = NULL,
#' unique = NULL, shared = NULL,
#' randomize = TRUE, replacement = TRUE,
#' jitter = TRUE, arm_multiply = TRUE,
#' inverted = FALSE)
#' jitter = TRUE, arm_multiply = TRUE)
#' }
#'
#' @section Arguments:
Expand Down Expand Up @@ -134,10 +132,7 @@ OfflinePropensityWeightingBandit <- R6::R6Class(
#' \item{\code{arm_multiply}}{
#' logical; multiply the horizon by the number of arms (optional, default: TRUE)
#' }
#' \item{\code{inverted}}{
#' logical; have the propensity scores been weighted (optional, default: FALSE)
#' }
#' \item{\code{threshold}}{
#' \item{\code{threshold}}{
#' float (0,1); Lower threshold or Tau on propensity score values. Smaller Tau makes for less biased
#' estimates with more variance, and vice versa. For more information, see paper by Strehl at all (2010).
#' Values between 0.01 and 0.05 are known to work well.
Expand Down Expand Up @@ -168,7 +163,7 @@ OfflinePropensityWeightingBandit <- R6::R6Class(
#' \describe{
#'
#' \item{\code{new(formula, data, k = NULL, d = NULL, unique = NULL, shared = NULL, randomize = TRUE,
#' replacement = TRUE, jitter = TRUE, arm_multiply = TRUE, inverted = FALSE)}}{
#' replacement = TRUE, jitter = TRUE, arm_multiply = TRUE)}}{
#' generates and instantializes a new \code{OfflinePropensityWeightingBandit} instance. }
#'
#' \item{\code{get_context(t)}}{
Expand Down
4 changes: 2 additions & 2 deletions demo/demo_simpsons_paradox_propensity.R
Original file line number Diff line number Diff line change
Expand Up @@ -214,13 +214,13 @@ print(paste("Movie:",round(sum(prop_dt[choice==2]$reward)/nrow(prop_dt[choice==2
# stop.method = "es.mean", verbose=FALSE)
# b_dt$choice <- b_dt$choice + 1
#
# weights <- get.weights(ip, stop.method = "es.mean") # already inverted
# weights <- get.weights(ip, stop.method = "es.mean")
# b_dt$p <- weights
#
# f <- formula("reward ~ choice | X.1 + X.2 | p")
#
# bandit <- OfflinePropensityWeightingBandit$new(formula = f, data = b_dt,
# k = 2 , d = 2, inverted = TRUE)
# k = 2 , d = 2)
# policy <- EpsilonGreedyPolicy$new(0.1)
# agent <- Agent$new(policy, bandit, "prop")
#
Expand Down

0 comments on commit a2ffe17

Please sign in to comment.