### Demo on comparison of different multi-armed strategies.

In [1]:
from src.strategy import UCB
from src.strategy import EpsilonGreedy
from src.strategy import BaysianUCB
from src.strategy import ThompsonBeta
from src.strategy import Softmax
from src.strategy import Exp3
from src.bandit import BernoulliBandit
from src.environment import IndEnv

In [2]:
import math
def anneal_tau(n):
    return 1 / math.log(n + 3)

In [3]:
bbandit = BernoulliBandit(3, [0.1, 0.2, 0.5])
env = IndEnv(3000, bbandit)
eg_obj = EpsilonGreedy(0.2)
ucb_obj = UCB(coef=0.5)
baysian_ucb_obj = BaysianUCB()
thompson_beta_obj = ThompsonBeta()
soft_max_obj = Softmax(tau=1)
soft_max_annealing_obj = Softmax(tau=anneal_tau)
exp3_obj = Exp3(gamma=0.1)


env.strategy_register(eg_obj.strategy, strategy_name='epsilon_greedy')
env.strategy_register(ucb_obj.strategy, strategy_name='ucb')
env.strategy_register(baysian_ucb_obj.strategy, strategy_name='bayesian_ucb')
env.strategy_register(thompson_beta_obj.strategy, strategy_name='thompson_beta')
env.strategy_register(soft_max_obj.strategy, strategy_name='softmax')
env.strategy_register(soft_max_annealing_obj.strategy, strategy_name='softmax_annealing')
env.strategy_register(exp3_obj.strategy, strategy_name='exp3')


In [4]:
env.run()

In [5]:
env.altair_chart_reward()

In [6]:
env.altair_chart_regret()

In [7]:
env.altair_action_history(name='ucb')

In [8]:
env.altair_action_history(name='epsilon_greedy')

In [9]:
env.altair_action_history(name='softmax')

In [10]:
env.altair_action_history(name='softmax_annealing')

In [11]:
env.altair_action_history(name='exp3')