Join GitHub today
GitHub is home to over 50 million developers working together to host and review code, manage projects, and build software together.
Sign up| #' @export | |
| RandomPolicy <- R6::R6Class( | |
| portable = FALSE, | |
| class = FALSE, | |
| inherit = Policy, | |
| public = list( | |
| class_name = "RandomPolicy", | |
| initialize = function() { | |
| super$initialize() | |
| }, | |
| set_parameters = function(context_params) { | |
| self$theta_to_arms <- list('n' = 0, 'mean' = 0) | |
| }, | |
| get_action = function(t, context) { | |
| action$choice <- sample.int(context$k, 1, replace = TRUE) | |
| action$propensity <- 1/context$k | |
| action | |
| }, | |
| set_reward = function(t, context, action, reward) { | |
| arm <- action$choice | |
| reward <- reward$reward | |
| inc(self$theta$n[[arm]]) <- 1 | |
| inc(self$theta$mean[[arm]]) <- (reward - self$theta$mean[[arm]]) / self$theta$n[[arm]] | |
| self$theta | |
| } | |
| ) | |
| ) | |
| #' Policy: Random | |
| #' | |
| #' \code{RandomPolicy} always explores, choosing arms uniformly at random. | |
| #' In that respect, \code{RandomPolicy} is the mirror image of a pure greedy policy, | |
| #' which would always seek to exploit. | |
| #' | |
| #' @name RandomPolicy | |
| #' | |
| #' | |
| #' @section Usage: | |
| #' \preformatted{ | |
| #' policy <- RandomPolicy(name = "RandomPolicy") | |
| #' } | |
| #' | |
| #' @section Arguments: | |
| #' | |
| #' \describe{ | |
| #' \item{\code{name}}{ | |
| #' character string specifying this policy. \code{name} | |
| #' is, among others, saved to the History log and displayed in summaries and plots. | |
| #' } | |
| #' } | |
| #' | |
| #' @section Methods: | |
| #' | |
| #' \describe{ | |
| #' \item{\code{new()}}{ Generates a new \code{RandomPolicy} object. Arguments are defined in the Argument | |
| #' section above.} | |
| #' } | |
| #' | |
| #' \describe{ | |
| #' \item{\code{set_parameters()}}{each policy needs to assign the parameters it wants to keep track of | |
| #' to list \code{self$theta_to_arms} that has to be defined in \code{set_parameters()}'s body. | |
| #' The parameters defined here can later be accessed by arm index in the following way: | |
| #' \code{theta[[index_of_arm]]$parameter_name} | |
| #' } | |
| #' } | |
| #' | |
| #' \describe{ | |
| #' \item{\code{get_action(context)}}{ | |
| #' here, a policy decides which arm to choose, based on the current values | |
| #' of its parameters and, potentially, the current context. | |
| #' } | |
| #' } | |
| #' | |
| #' \describe{ | |
| #' \item{\code{set_reward(reward, context)}}{ | |
| #' in \code{set_reward(reward, context)}, a policy updates its parameter values | |
| #' based on the reward received, and, potentially, the current context. | |
| #' } | |
| #' } | |
| #' | |
| #' @references | |
| #' | |
| #' Gittins, J., Glazebrook, K., & Weber, R. (2011). Multi-armed bandit allocation indices. John Wiley & Sons. | |
| #' (Original work published 1989) | |
| #' | |
| #' @seealso | |
| #' | |
| #' Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, | |
| #' \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} | |
| #' | |
| #' Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, | |
| #' \code{\link{OfflineReplayEvaluatorBandit}} | |
| #' | |
| #' Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} | |
| #' | |
| #' @examples | |
| #' | |
| #' horizon <- 100L | |
| #' simulations <- 100L | |
| #' weights <- c(0.9, 0.1, 0.1) | |
| #' | |
| #' policy <- RandomPolicy$new() | |
| #' bandit <- BasicBernoulliBandit$new(weights = weights) | |
| #' agent <- Agent$new(policy, bandit) | |
| #' | |
| #' history <- Simulator$new(agent, horizon, simulations, do_parallel = FALSE)$run() | |
| #' | |
| #' plot(history, type = "arms") | |
| NULL |