In [3]:
import numpy as np; np.random.seed(0)
import datetime
import itertools
import matplotlib as mpl
import matplotlib.pyplot as plt
import time
import sys
import datetime

sys.path.append("..")

mpl.style.use("classic")
mpl.rcParams["figure.figsize"] = [5, 3]

mpl.rcParams["axes.linewidth"] = 0.75
mpl.rcParams["grid.linewidth"] = 0.75
mpl.rcParams["lines.linewidth"] = 0.75
mpl.rcParams["patch.linewidth"] = 0.75
mpl.rcParams["xtick.major.size"] = 3
mpl.rcParams["ytick.major.size"] = 3

mpl.rcParams["pdf.fonttype"] = 42
mpl.rcParams["ps.fonttype"] = 42
mpl.rcParams["font.size"] = 9
mpl.rcParams["axes.titlesize"] = 10
mpl.rcParams["legend.fontsize"] = "medium"

print("matplotlib %s" % mpl.__version__)
print('Successful!')

matplotlib 2.0.0
Successful!


In [4]:
from bandits.simulator import *
from bandits.algorithms import *
from bandits.phe import *
from bandits.hse import *

def linestyle2dashes(style):
  if style == "--":
    return (3, 3)
  elif style == ":":
    return (0.5, 2.5)
  else:
    return (None, None)

In [5]:
reward_noise = "normal"

algs = [
  ("LinUCB", {}, "cyan", "-", r"LinUCB"),
  ("LinTS", {"sigma":0.5}, "red", "-", r"LinTS ($\sigma=0.5$)"),
  ("LinPHE", {"a": 0.5}, "darkorange", "-", r"Gauss-LinPHE ($a = 0.5$)"),
  ("LinHS_SWR_scale", {"sample_portion": 0.6, "z":0.6}, "darkgreen", "-", 
                       r"LinCORe ($\alpha = z = 0.6$)"),
]

num_runs = 5
n = 10000
K = 50

step = np.arange(1, n + 1)
sube = (step.size // 10) * np.arange(1, 11) - 1

plt.figure(figsize=(3.8, 2.6))
for fig in range(1):
  d = 10

  env = []
  for run in range(num_runs):
    # standard d-dimensional basis (with a bias term)
    basis = np.eye(d)
    basis[:, -1] = 1

    # arm features in a unit (d - 2)-sphere
    X = np.random.randn(K, d - 1)
    X /= np.sqrt(np.square(X).sum(axis=1))[:, np.newaxis]
    X = np.hstack((X, np.ones((K, 1))))  # bias term
    X[: basis.shape[0], :] = basis

    # parameter vector in a (d - 2)-sphere with radius 0.5
    theta = np.random.randn(d - 1)
    theta *= 0.5 / np.sqrt(np.square(theta).sum())
    theta = np.append(theta, [0.5])

    # create environment
    env.append(LinBandit(X, theta, reward_noise, sigma=0.5))
    print("%3d: %.2f %.2f | " % (env[-1].best_arm,
      env[-1].mu.min(), env[-1].mu.max()), end="")
    if (run + 1) % 10 == 0:
      print()

  plt.subplot(1, 1, fig + 1)
  alg_num = 0
  for alg in algs:
    alg_class = eval(alg[0])

    if d < 14:
        regret, _ = evaluate_parallel(alg_class, alg[1], env, n, num_process=5)
    else:
        regret, _ = evaluate(alg_class, alg[1], env, n)

    cum_regret = regret.cumsum(axis=0)
    std_regret = cum_regret.std(axis=1) / np.sqrt(cum_regret.shape[1])
    plt.plot(step, cum_regret.mean(axis=1),
      alg[2], dashes=linestyle2dashes(alg[3]),
      label=alg[4]) # if alg_num // 3 == fig else None)

    plt.errorbar(step[sube], cum_regret[sube, :].mean(axis=1),
      cum_regret[sube, :].std(axis=1) / np.sqrt(cum_regret.shape[1]),
      fmt="none", ecolor=alg[2], capsize=2)

    alg_num += 1

  # plt.title(r"Beta linear bandit")
  plt.title(r"d = %d, Gaussian linear bandit ($\sigma = 0.5$) " % d)
  plt.xlabel("Round n",size=10)
  if not fig:
    plt.ylabel("Regret (average {} runs)".format(num_runs), size=10)
  # plt.ylim(0, 400)
  plt.ylim(bottom=10,top=1000)
  plt.yscale("log")
  if fig <= 1:
    plt.legend(loc="best", ncol=1, frameon=False, fontsize=8)

plt.tight_layout()

now = datetime.datetime.now()
with open("results/compare_baselines_linear_{}_{}_{}_{}_{}.pdf". \
          format(reward_noise, now.month, now.day, now.hour, now.minute), "wb") as f:
      plt.savefig(f, format="pdf", dpi=1200, bbox_inches=0)

 23: 0.17 0.91 |  49: 0.11 0.89 |   2: 0.29 0.86 |  42: 0.18 0.72 |   6: 0.16 0.83 | Evaluating LinUCB..... 2.4 seconds
Regret: 602.42 +/- 44.62 (median: 634.06, max: 702.27, min: 412.99)
Evaluating LinTS..... 5.5 seconds
Regret: 142.81 +/- 15.03 (median: 132.36, max: 199.26, min: 110.29)
Evaluating LinPHE..... 3.1 seconds
Regret: 122.86 +/- 7.07 (median: 129.92, max: 141.30, min: 100.73)
Evaluating Lin HS-SWR-scale..... 60.4 seconds
Regret: 107.42 +/- 9.34 (median: 98.89, max: 140.01, min: 79.46)
