In [1]:
import itertools
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import time
import sys
import datetime

sys.path.append("..")

mpl.style.use("classic")
mpl.rcParams["figure.figsize"] = [5, 3]

mpl.rcParams["axes.linewidth"] = 0.75
mpl.rcParams["grid.linewidth"] = 0.75
mpl.rcParams["lines.linewidth"] = 0.75
mpl.rcParams["patch.linewidth"] = 0.75
mpl.rcParams["xtick.major.size"] = 3
mpl.rcParams["ytick.major.size"] = 3

mpl.rcParams["pdf.fonttype"] = 42
mpl.rcParams["ps.fonttype"] = 42
mpl.rcParams["font.size"] = 9
mpl.rcParams["axes.titlesize"] = 10
mpl.rcParams["legend.fontsize"] = "medium"

print("matplotlib %s" % mpl.__version__)
print('Successful!')

matplotlib 2.0.0
Successful!


In [2]:
from bandits.simulator import *
from bandits.algorithms import *
from bandits.phe import *
from bandits.hse import *

def linestyle2dashes(style):
  if style == "--":
    return (3, 3)
  elif style == ":":
    return (0.5, 2.5)
  else:
    return (None, None)

In [None]:
algs = [
    ("UCB1", {}, "cyan", "-", "UCB1"),
    ("UCBV", {}, "gray", "-", "UCB-V"),
    ("NonParaTS", {}, "blue", "-", r"NP-TS"),
    ("SSMC", {}, "black", "-", r"SSMC"),
    # ("TS", {}, "red", "-", r"Ber-TS"),
    ("GaussTS", {"sigma": 0.5}, "red", "-", r"Gauss-TS ($\sigma = 0.5$)"),
    # ("PHE", {"a": 1.1, "pseudo_reward": "bernoulli"}, "darkorange", "-", 
         # r"Ber-PHE ($a = 1.1$)"),
    ("PHE", {"a": 0.5, "pseudo_reward": "Gauss"}, "darkorange", "-", 
           r"Gauss-PHE ($a = 0.5$)"),
    ("HS_SWR_scale", {"sample_portion": 0.6, 'z': 0.6}, "darkgreen", "-", 
           r"CORe ($\alpha = z = 0.6$)"),
]

num_runs = 5
n = 10000
K = 10
noise = 'gauss'

step = np.arange(1, n + 1)
sube = (step.size // 10) * np.arange(1, 11) - 1

plt.figure(figsize=(3.8, 2.6))
for fig in range(1):
  env = []
  for run in range(num_runs):
    mu = 0.5 * np.random.rand(K) + 0.25
    if noise == 'ber':
        env.append(BerBandit(mu))
    elif noise == 'beta':
        env.append(BetaBandit(mu))
    elif noise == 'gauss':
        env.append(GaussBandit(mu, sigma=0.5))

  plt.subplot(1, 1, fig + 1)
  alg_num = 0
  for alg in algs:
    alg_class = globals()[alg[0]]
    regret, _ = evaluate_parallel(alg_class, alg[1], env, n, num_process=5)

    cum_regret = regret.cumsum(axis=0)
    plt.plot(step, cum_regret.mean(axis=1),
      alg[2], dashes=linestyle2dashes(alg[3]),
      label=alg[4]) 
    plt.errorbar(step[sube], cum_regret[sube, :].mean(axis=1),
      cum_regret[sube, :].std(axis=1) / np.sqrt(cum_regret.shape[1]),
      fmt="none", ecolor=alg[2], capsize=2.5)

    alg_num += 1

  if fig == 0:
    if noise == 'ber':
        plt.title("Bernoulli bandit")
    elif noise == 'beta':
        plt.title("Beta bandit")
    elif noise == 'gauss':
        plt.title(r"Gaussian bandit, $\sigma = 0.5$")
    
  plt.xlabel("Round n",size=10)
  plt.ylim(bottom=1, top=300)
#   plt.yscale("log")

  if not fig:
    plt.ylabel("Regret (average {} runs)".format(num_runs),size=10)
  if fig <= 2:
    plt.legend(loc="upper right", ncol=2, frameon=False)

plt.tight_layout()

now = datetime.datetime.now()
with open("results/compare_baselines_meanrange05_{}_{}_{}_{}_{}.pdf". \
          format(noise, now.month, now.day, now.hour, now.minute), "wb") as f:
      plt.savefig(f, format="pdf", dpi=1200, bbox_inches=0)


Evaluating UCB1..... 0.8 seconds
Regret: 445.12 +/- 36.43 (median: 462.73, max: 546.88, min: 322.24)
Evaluating UCB-V..... 1.0 seconds
Regret: 763.11 +/- 54.57 (median: 773.10, max: 935.15, min: 581.20)
Evaluating Non-Parametric TS..... 36.3 seconds
Regret: 131.58 +/- 19.24 (median: 108.53, max: 214.44, min: 95.84)
Evaluating SSMC..... 12.5 seconds
Regret: 117.68 +/- 15.13 (median: 135.95, max: 155.95, min: 69.43)
Evaluating Gaussian TS..... 0.5 seconds
Regret: 136.69 +/- 13.84 (median: 129.69, max: 173.21, min: 99.25)
Evaluating PHE..... 1.1 seconds
Regret: 86.41 +/- 14.93 (median: 75.47, max: 143.40, min: 49.40)
Evaluating HS-SampleWithReplacement