# Exp 26 analysis

See `./informercial/Makefile` for experimental
details.

In [5]:
import os
import numpy as np

from IPython.display import Image
import matplotlib
import matplotlib.pyplot as plt

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set_style('ticks')

matplotlib.rcParams.update({'font.size': 16})
matplotlib.rc('axes', titlesize=16)

from infomercial.exp import meta_bandit
from infomercial.local_gym import bandit
from infomercial.exp.meta_bandit import load_checkpoint

import gym

In [6]:
# ls ../data/exp2*

# Load and process data

In [7]:
data_path ="/Users/qualia/Code/infomercial/data/"
exp_name = "exp26"
best_params = load_checkpoint(os.path.join(data_path, f"{exp_name}_best.pkl"))
sorted_params = load_checkpoint(os.path.join(data_path, f"{exp_name}_sorted.pkl"))

In [8]:
sorted_params

{0: {'lr': 0.09797562139042901,
  'tie_threshold': 0.09034497661925166,
  'total_R': 1453.0},
 1: {'lr': 0.08897838666023337,
  'tie_threshold': 0.08647412795011832,
  'total_R': 1445.0},
 2: {'lr': 0.0856615104180137,
  'tie_threshold': 0.08047369624659799,
  'total_R': 1294.0},
 3: {'lr': 0.060444695805847305,
  'tie_threshold': 0.09980171523727559,
  'total_R': 1148.0},
 4: {'lr': 0.08881757972259673,
  'tie_threshold': 0.09777114477450259,
  'total_R': 1111.0},
 5: {'lr': 0.08634195279950234,
  'tie_threshold': 0.00041671112725527614,
  'total_R': 1091.0},
 6: {'lr': 0.08893359866174975,
  'tie_threshold': 0.09767676819132176,
  'total_R': 1043.0},
 7: {'lr': 0.063997375896676,
  'tie_threshold': 0.09630838485943594,
  'total_R': 1021.0},
 8: {'lr': 0.08946997927876622,
  'tie_threshold': 0.08699644429433895,
  'total_R': 1019.0},
 9: {'lr': 0.07512472748345514,
  'tie_threshold': 0.09665553797230741,
  'total_R': 1011.0},
 10: {'lr': 0.09392386204832667,
  'tie_threshold': 0.09775

# Performance

of best parameters

In [None]:
env_name = 'BanditOneHigh1000-v0'

# Run w/ best params
result = meta_bandit(
    env_name=env_name,
    num_episodes=3000, 
    lr=best_params["lr"], 
    tie_threshold=best_params["tie_threshold"],
    seed_value=19,
    save="exp26_best_model.pkl"
)

  result = entry_point.load(False)


In [None]:
# Plot run
episodes = result["episodes"]
actions =result["actions"]
scores_R = result["scores_R"]
values_R = result["values_R"]

# Get some data from the gym...
env = gym.make(env_name)
best = env.env.best
print(f"Best arm: {best}, last arm: {actions[-1]}")

# Init plot
fig = plt.figure(figsize=(6, 14))
grid = plt.GridSpec(5, 1, wspace=0.3, hspace=0.8)

# Do plots:
# Arm
plt.subplot(grid[0, 0])
plt.scatter(episodes, actions, color="black", alpha=.5, s=2, label="Bandit")
plt.plot(episodes, np.repeat(best, np.max(episodes)+1), 
         color="red", alpha=0.8, ls='--', linewidth=2)
plt.ylim(-.1, np.max(actions)+1.1)
plt.ylabel("Arm choice")
plt.xlabel("Episode")

# score
plt.subplot(grid[1, 0])
plt.scatter(episodes, scores_R, color="grey", alpha=0.4, s=2, label="R")
plt.ylabel("log score")
plt.xlabel("Episode")
plt.semilogy()
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
_ = sns.despine()

# Q
plt.subplot(grid[2, 0])
plt.scatter(episodes, values_R, color="grey", alpha=0.4, s=2, label="R")
plt.ylabel("log Q(s,a)")
plt.xlabel("Episode")
plt.semilogy()
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
_ = sns.despine()

# -
plt.savefig("figures/epsilon_bandit.pdf",  bbox_inches='tight')
plt.savefig("figures/epsilon_bandit.eps",  bbox_inches='tight')

# Sensitivity

to parameter choices

In [None]:
total_Rs = [] 
betas = []
lrs = []
trials = list(sorted_params.keys())
for t in trials:
    total_Rs.append(sorted_params[t]['total_R'])
    betas.append(sorted_params[t]['beta'])
    lrs.append(sorted_params[t]['lr'])
    
# Init plot
fig = plt.figure(figsize=(10, 18))
grid = plt.GridSpec(4, 1, wspace=0.3, hspace=0.8)

# Do plots:
# Arm
plt.subplot(grid[0, 0])
plt.scatter(trials, total_Rs, color="black", alpha=.5, s=6, label="total R")
plt.xlabel("Sorted params")
plt.ylabel("total R")
_ = sns.despine()

plt.subplot(grid[1, 0])
plt.scatter(betas, total_Rs, color="black", alpha=.3, s=6, label="total R")
plt.xlabel("beta")
plt.ylabel("total R")

_ = sns.despine()

plt.subplot(grid[2, 0])
plt.scatter(lrs, total_Rs, color="black", alpha=.5, s=6, label="total R")
plt.xlabel("lr")
plt.ylabel("total R")
_ = sns.despine()

plt.subplot(grid[3, 0])
plt.scatter(betas, lrs, color="black", alpha=.5, s=np.asarray(total_Rs)/50, label="total R")
plt.xlabel("beta")
plt.ylabel("ls")
_ = sns.despine()