Join GitHub today
GitHub is home to over 50 million developers working together to host and review code, manage projects, and build software together.
Sign up| #' @importFrom R6 R6Class | |
| #' @export | |
| Bandit <- R6::R6Class( | |
| class = FALSE, | |
| public = list( | |
| k = NULL, # Number of arms (integer, required) | |
| d = NULL, # Dimension of context feature vector (integer, required) | |
| unique = NULL, # Vector of arm indices of unique context features (vector, optional) | |
| shared = NULL, # Vector of arm indices of context features shared between arms (vector, optional) | |
| class_name = "Bandit", | |
| initialize = function() { | |
| # Is called before the Policy instance has been cloned. | |
| # Initialize Bandit. Set self$d and self$k here. | |
| }, | |
| post_initialization = function() { | |
| # Is called after a Simulator has cloned the Bandit instance [number_of_simulations] times. | |
| # Do sim level random generation here. | |
| invisible(self) | |
| }, | |
| get_context = function(t) { | |
| stop("Bandit subclass needs to implement bandit$get_context()", call. = FALSE) | |
| # Return a list with number of arms self$k, number of feature dimensions self$d and, where | |
| # applicable, a self$d dimensional context vector or self$d x self$k dimensional context matrix X. | |
| list(X = context, k = arms, d = features) # nocov | |
| }, | |
| get_reward = function(t, context, action) { | |
| stop("Bandit subclass needs to implement bandit$get_reward()", call. = FALSE) | |
| # Return a list with the reward of the chosen arm and, if available, optimal arm reward and index | |
| list(reward = reward_for_choice_made, optimal_reward = optimal_reward, optimal_arm = optimal_arm) # nocov | |
| }, | |
| generate_bandit_data = function(n) { | |
| # Optionally pregenerate n contexts and rewards here. | |
| }, | |
| final = function() { | |
| # called on object destruction | |
| } | |
| ) | |
| ) | |
| #' Bandit: Superclass | |
| #' | |
| #' Parent or superclass of all \code{\{contextual\}} \code{Bandit} subclasses. | |
| #' | |
| #' In \code{\{contextual\}}, \code{Bandits} are responsible for the generation of (either | |
| #' synthetic or offline) contexts and rewards. | |
| #' | |
| #' On initialisation, a \code{Bandit} subclass has to define the number of arms \code{self$k} | |
| #' and the number of contextual feature dimensions \code{self$d}. | |
| #' | |
| #' For each \emph{t} = \{1, \ldots, T\} a \code{Bandit} then generates a \code{list} containing | |
| #' current context in \code{d x k} dimensional matrix \code{context$X}, | |
| #' the number of arms in \code{context$k} and the number of features in \code{context$d}. | |
| #' | |
| #' Note: in context-free scenario's, \code{context$X} can be omitted. | |
| #' | |
| #'  | |
| #' | |
| #' On receiving the index of a \code{\link{Policy}}-chosen arm through \code{action$choice}, | |
| #' \code{Bandit} is expected to return a named \code{list} containing at least \code{reward$reward} | |
| #' and, where computable, \code{reward$optimal}. | |
| #' | |
| #'  | |
| #' | |
| #' @name Bandit | |
| #' @aliases post_initialization get_context generate_bandit_data bandit | |
| #' | |
| #' @section Usage: | |
| #' \preformatted{ | |
| #' bandit <- Bandit$new() | |
| #' } | |
| #' | |
| #' @section Methods: | |
| #' | |
| #' \describe{ | |
| #' | |
| #' \item{\code{new()}}{ generates and instantializes a new \code{Bandit} instance. } | |
| #' | |
| #' \item{\code{get_context(t)}}{ | |
| #' argument: | |
| #' \itemize{ | |
| #' \item \code{t}: integer, time step \code{t}. | |
| #' } | |
| #' returns a named \code{list} | |
| #' containing the current \code{d x k} dimensional matrix \code{context$X}, | |
| #' the number of arms \code{context$k} and the number of features \code{context$d}. | |
| #' } | |
| #' | |
| #' \item{\code{get_reward(t, context, action)}}{ | |
| #' arguments: | |
| #' \itemize{ | |
| #' \item \code{t}: integer, time step \code{t}. | |
| #' \item \code{context}: list, containing the current \code{context$X} (d x k context matrix), | |
| #' \code{context$k} (number of arms) and \code{context$d} (number of context features) | |
| #' (as set by \code{bandit}). | |
| #' \item \code{action}: list, containing \code{action$choice} (as set by \code{policy}). | |
| #' } | |
| #' returns a named \code{list} containing \code{reward$reward} and, where computable, | |
| #' \code{reward$optimal} (used by "oracle" policies and to calculate regret). | |
| #' } | |
| #' | |
| #' \item{\code{post_initialization()}}{ | |
| #' Is called after a Simulator has cloned the Bandit instance \code{number_of_simulations} times. | |
| #' Do sim level random generation here. | |
| #' } | |
| #' | |
| #' \item{\code{generate_bandit_data(n)}}{ | |
| #' Is called after cloning the Bandit instance \code{number_of_simulations} times. | |
| #' Differentiates itself from \code{post_initialization()} in that it is called after the optional | |
| #' arm-multiplier option is applied in Simulator, and in that it is possible to set the length of | |
| #' the to be generated data with the function's \code{n} parameter. | |
| #' } | |
| #' } | |
| #' | |
| #' @seealso | |
| #' | |
| #' Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}}, | |
| #' \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}} | |
| #' | |
| #' Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}}, | |
| #' \code{\link{OfflineReplayEvaluatorBandit}} | |
| #' | |
| #' Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}} | |
| NULL |