In [None]:
import numpy as np
import matplotlib.pyplot as plt
%pylab inline

from MultiArmedBandit import Arm, bernoulliArm, evolvingBernoulliArm
from Exp3 import exp3_Bianchi, exp3P_Bianchi, exp3_IX
from OtherBanditAlgorithms import UCB1, Random

In [None]:
### Simple vicious adversary

At each turn only one of the arms has a 1-reward

Exp3 are much better than UCB, but appears more or less as a random algorithm

In [None]:
T = 10000

nb_arms = 10

Rewards = [[0 for _ in range(T)] for _ in range(nb_arms)]
for i in range(T):
    Rewards[i % nb_arms][i] = 1
MAB = [Arm(rewards_sequence=Rewards[i]) for i in range(nb_arms)]

In [None]:
K = len(MAB)
base_parameter = np.sqrt(np.log(K) / (T * K))

T = 10000
T2 = 5000

n_iter = 50
nb_multipliers = 10
R_T = np.zeros((3, nb_multipliers, n_iter))
R_T2 = np.zeros((3, nb_multipliers, n_iter))

for j, multiplier in enumerate(np.linspace(1e-3, 5e1, nb_multipliers)):
    print(multiplier)
    for i in range(50):
        eta = multiplier * base_parameter
        rew, _ = exp3_Bianchi(MAB, T, eta=[eta for _ in range(T)])

        rew_P, _ = exp3P_Bianchi(MAB, T, beta=eta, gamma=K * eta, eta=eta)
        rew_IX, _ = exp3_IX(MAB, T, eta, eta / 2)

        exp3_T2, exp3_P_T2, exp3_IX_T2 = np.sum(
            rew[: T2]), np.sum(rew_P[: T2]), np.sum(rew_IX[: T2])

        exp3_T, exp3_P_T, exp3_IX_T = exp3_T2 + np.sum(rew[T2:]), exp3_P_T2 + np.sum(rew_P[T2:]
                                                                                     ), exp3_IX_T2 + np.sum(rew_IX[T2:])

        R_T2[:, j, i] = [exp3_T2, exp3_P_T2, exp3_IX_T2]
        R_T[:, j, i] = [exp3_T, exp3_P_T, exp3_IX_T]

In [None]:
R_T_mean = np.mean(R_T, axis=2)
R_T2_mean = np.mean(R_T2, axis=2)
R_T_std = np.std(R_T, axis=2)
R_T2_std = np.std(R_T2, axis=2)

plt.subplot(1,2,1)
plt.errorbar(np.linspace(0, 50, 10), R_T2_mean[0], R_T2_std[0], label="Exp3")
plt.errorbar(np.linspace(0, 50, 10), R_T2_mean[1], R_T2_std[1], label="Exp3.P")
plt.errorbar(np.linspace(0, 50, 10), R_T2_mean[2], R_T2_std[2], label="Exp3.P")

plt.subplot(1,2,2)
plt.errorbar(np.linspace(0, 50, 10), R_T_mean[0], R_T_std[0], label="Exp3")
plt.errorbar(np.linspace(0, 50, 10), R_T_mean[1], R_T_std[1], label="Exp3.P")
plt.errorbar(np.linspace(0, 50, 10), R_T_mean[2], R_T_std[2], label="Exp3-IX")
plt.legend()

In [None]:
rew = exp3_Bianchi(MAB, T, eta=etas)
rew_P = exp3P_Bianchi(MAB, T, beta=beta, gamma=gamma, eta=eta)
rew_IX, _ = exp3_IX(MAB, T, eta=eta, gamma=0.5)
rew_UCB, _ = UCB1(MAB, T, rho=0.7)
rew_random = Random(MAB, T)

In [None]:
#set up the parameters

K = len(MAB)
eta = np.sqrt(2 * np.log(K) / T * K)
etas_exp3 = [eta for _ in range(T)]

delta = 0.05
eta = 0.95 * np.sqrt(np.log(K) / (T * K))
gamma = 1.05 * np.sqrt(np.log(K) * K / T)
beta = np.sqrt(np.log(K / delta) /  (T * K))

In [None]:
exp3_cumsum = np.cumsum(rew)
exp3_P_cumsum = np.cumsum(rew_P)
exp3_IX_cumsum = np.cumsum(rew_IX)
UCB_cumsum = np.cumsum(rew_UCB)
rew_cumsum = np.cumsum(rew_random)

plt.plot(exp3_cumsum, '--o', markevery=1500, label="Exp3")

plt.plot(exp3_P_cumsum, '--o', markevery=1500, label="Exp3.P")
plt.plot(exp3_IX_cumsum, '--o', markevery=1500, label="Exp3-IX")
plt.plot(UCB_cumsum, '--o', markevery=1500, label="UCB")
plt.plot(rew_cumsum, '--o', markevery=1500, label="random")

plt.legend(loc="upper left")
plt.title("Cumulative reward")