# Figure 3

COmbine results for all agents and bandits, creating Fig 3 for the draft at [https://www.biorxiv.org/content/10.1101/671362v2]()

See `./informercial/Makefile` for experimental
details.

In [2]:
import os
import numpy as np
import pandas as pd
from pprint import pprint

from IPython.display import Image
import matplotlib
import matplotlib.pyplot as plt

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set_style('ticks')

matplotlib.rcParams.update({'font.size': 16})
matplotlib.rc('axes', titlesize=16)

from infomercial.exp import meta_bandit
from infomercial.exp import epsilon_bandit
from infomercial.exp import beta_bandit
from infomercial.exp import softbeta_bandit
from infomercial.local_gym import bandit
from infomercial.exp.meta_bandit import load_checkpoint

import gym

# Load and process data

Save results to `.csv` files.

In [3]:
data_path ="/Users/qualia/Code/infomercial/data/"

## BanditOneHigh10

## Example trace 

In [8]:
env_name = "BanditOneHigh10-v0"
exp_names = ["exp128",]
agent_names = ["eta",] # , "epsilon"]
num_trials = 100

# --------------------------------
# Re-save traces
table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n in range(num_trials):
        result = load_checkpoint(os.path.join(data_path, f"{exp_name}_{n+1}.pkl"))
        
        rows = [
            result["episodes"],
            result["p_bests"], 
            result["values_R"],
            result["values_E"],
            result["scores_R"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "p_bests", 
                          "values_R", 
                          "values_E", 
                          "scores_R", 
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n"]
                     )
    df.to_csv(f"../data/example_traces_{agent}_{env_name}.csv", index=False)
    
# -
# exp_names = ["exp131",]
exp_names = ['exp158',]
agent_names = ["epsilon",] # , "epsilon"]
num_trials = 100

# --------------------------------
# Re-save traces
table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n in range(num_trials):
        result = load_checkpoint(os.path.join(data_path, f"{exp_name}_{n+1}.pkl"))
        
        rows = [
            result["episodes"],
            result["p_bests"], 
            result["values_R"],
            np.zeros_like(result["values_R"]),  # value_E doesn't exist here...
            result["scores_R"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "p_bests", 
                          "values_R", 
                          "values_E", 
                          "scores_R", 
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n"]
                     )
    df.to_csv(f"../data/example_traces_{agent}_{env_name}.csv", index=False)

## All

In [3]:
env_name = "BanditOneHigh10-v0"

# Orginal
# exp_names = ["exp128", "exp129", "exp130", "exp131", "exp149", "exp153"]

# Actor() rerun
exp_names = ["exp128", "exp129", "exp157", "exp158", "exp165", "exp169"]

agent_names = ["eta", "beta", "softbeta", "epsilon", "anneal-epsilon", "random"]
num_trials = 100

# --------------------------------
# Re-save traces
table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n in range(num_trials):
        result = load_checkpoint(os.path.join(data_path, f"{exp_name}_{n+1}.pkl"))
        
        rows = [
            result["episodes"],
            result["p_bests"], 
            result["values_R"],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "episodes", 
                      "p_bests", 
                      "values_R", 
                      "arms",
                      "exps", 
                      "agents",
                      "n"]
                 )
df.to_csv(f"../data/table_traces_{env_name}.csv", index=False)

# --------------------------------
# Save total regret and reward
table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n in range(num_trials):
        result = load_checkpoint(os.path.join(data_path, f"{exp_name}_{n+1}.pkl"))
        
        rows = [
            result['total_R'],
            result['p_bests'][-1],
            np.sum(result['regrets']),
            exp_name,
            agent,
            n
        ]

        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "total_R",
                      "p_bests",
                      "regrets", 
                      "exps", 
                      "agents",
                      "n"]
                 )
df.to_csv(f"../data/table_total_{env_name}.csv", index=False)

# --------------------------------
# Save final critic weights/values
critic_names = ["critic_R", "critic", "critic", "critic_R", "critic_R", "critic_R"]

table = []
for agent, exp_name, critic_name in zip(agent_names, exp_names, critic_names):
    for n in range(num_trials):
        result = load_checkpoint(os.path.join(data_path, f"{exp_name}_{n+1}.pkl"))

        critic = result[critic_name]
        rows = [
            list(critic.keys()), # arms
            list(critic.values()),  # values
            [exp_name] * len(critic.keys()),
            [agent] * len(critic.keys()),
            [n] * len(critic.keys())
        ]
        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "critic_values", 
                      "exps", 
                      "agents",
                      "n"
                  ]
                 )
df.to_csv(f"../data/table_critic_{env_name}.csv", index=False)

# --------------------------------
# Save bandit p(R)
env = gym.make(env_name)

table = [
    list(range(env.env.n_bandits)),
    env.env.p_dist 
]
table = np.vstack(table).T
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "p_reward"
                  ]
                 )
df.to_csv(f"../data/table_bandit_{env_name}.csv", index=False)

In [26]:
result.keys()

dict_keys(['best', 'episodes', 'policies', 'actions', 'p_bests', 'ties', 'critic_E', 'critic_R', 'total_E', 'total_R', 'total_E_R', 'scores_E', 'scores_R', 'values_E', 'values_R', 'regrets', 'env_name', 'num_episodes', 'tie_break', 'tie_threshold', 'lr_R'])

# BanditTwoHigh10

In [28]:
env_name = "BanditTwoHigh10-v0"
# exp_names = ["exp132", "exp133", "exp134", "exp135", "exp150", "exp154"]
exp_names = ["exp132", "exp133", "exp159", "exp160", "exp166", "exp170"]
agent_names = ["eta", "beta", "softbeta", "epsilon", "anneal-epsilon", "random"]
num_trials = 100

# --------------------------------
# Re-save traces
table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n in range(num_trials):
        result = load_checkpoint(os.path.join(data_path, f"{exp_name}_{n+1}.pkl"))
        
        rows = [
            result["episodes"],
            result["p_bests"], 
            result["values_R"],
            result["actions"],            
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "episodes", 
                      "p_bests", 
                      "values_R",
                      "arms",
                      "exps", 
                      "agents",
                      "n"]
                 )
df.to_csv(f"../data/table_traces_{env_name}.csv", index=False)

# --------------------------------
# Save total regret and reward
table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n in range(num_trials):
        result = load_checkpoint(os.path.join(data_path, f"{exp_name}_{n+1}.pkl"))
        
        rows = [
            result['total_R'],
            result['p_bests'][-1],
            np.sum(result['regrets']),
            exp_name,
            agent,
            n
        ]

        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "total_R",
                      "p_bests",
                      "regrets", 
                      "exps", 
                      "agents",
                      "n"]
                 )
df.to_csv(f"../data/table_total_{env_name}.csv", index=False)


# --------------------------------
# Save final critic weights/values
critic_names = ["critic_R", "critic", "critic", "critic_R", "critic_R", "critic_R"]

table = []
for agent, exp_name, critic_name in zip(agent_names, exp_names, critic_names):
    for n in range(num_trials):
        result = load_checkpoint(os.path.join(data_path, f"{exp_name}_{n+1}.pkl"))

        critic = result[critic_name]
        rows = [
            list(critic.keys()), # arms
            list(critic.values()),  # values
            [exp_name] * len(critic.keys()),
            [agent] * len(critic.keys()),
            [n] * len(critic.keys())
        ]
        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "critic_values", 
                      "exps", 
                      "agents",
                      "n"
                  ]
                 )
df.to_csv(f"../data/table_critic_{env_name}.csv", index=False)

# --------------------------------
# Save bandit p(R)
env = gym.make(env_name)

table = [
    list(range(env.env.n_bandits)),
    env.env.p_dist 
]
table = np.vstack(table).T
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "p_reward"
                  ]
                 )
df.to_csv(f"../data/table_bandit_{env_name}.csv", index=False)

# BanditUniform121

In [29]:
env_name = "BanditUniform121-v0"
# exp_names = ["exp136", "exp137", "exp138", "exp139", "exp151", "exp155"]
exp_names = ["exp136", "exp137", "exp161", "exp162", "exp167", "exp171"]
agent_names = ["eta", "beta", "softbeta", "epsilon", "anneal-epsilon", "random"]
num_trials = 100

# --------------------------------
# Re-save traces
table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n in range(num_trials):
        result = load_checkpoint(os.path.join(data_path, f"{exp_name}_{n+1}.pkl"))
        
        # To much data to save. Keep every Nth point
        Nth = 100
        index = np.arange(0, len(result["episodes"]), Nth)
        
        rows = [
            [result["episodes"][i] for i in index],
            [result["p_bests"][i] for i in index], 
            [result["values_R"][i] for i in index],
            [result["actions"][i] for i in index],
            [exp_name] * len(index),
            [agent] * len(index),
            [n] * len(index)
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "episodes", 
                      "p_bests", 
                      "values_R",
                      "arms", 
                      "exps", 
                      "agents",
                      "n"]
                 )
df.to_csv(f"../data/table_traces_{env_name}.csv", index=False)

# --------------------------------
# Save total regret and reward
table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n in range(num_trials):
        result = load_checkpoint(os.path.join(data_path, f"{exp_name}_{n+1}.pkl"))
        
        rows = [
            result['total_R'],
            result['p_bests'][-1],
            np.sum(result['regrets']),
            exp_name,
            agent,
            n
        ]

        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "total_R",
                      "p_bests",
                      "regrets", 
                      "exps", 
                      "agents",
                      "n"]
                 )
df.to_csv(f"../data/table_total_{env_name}.csv", index=False)

# --------------------------------
# Save final critic weights/values
critic_names = ["critic_R", "critic", "critic", "critic_R", "critic_R", "critic_R"]

table = []
for agent, exp_name, critic_name in zip(agent_names, exp_names, critic_names):
    for n in range(num_trials):
        result = load_checkpoint(os.path.join(data_path, f"{exp_name}_{n+1}.pkl"))

        critic = result[critic_name]
        rows = [
            list(critic.keys()), # arms
            list(critic.values()),  # values
            [exp_name] * len(critic.keys()),
            [agent] * len(critic.keys()),
            [n] * len(critic.keys())
        ]
        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "critic_values", 
                      "exps", 
                      "agents",
                      "n"
                  ]
                 )
df.to_csv(f"../data/table_critic_{env_name}.csv", index=False)

# --------------------------------
# Save bandit p(R)
env = gym.make(env_name)

table = [
    list(range(env.env.n_bandits)),
    env.env.p_dist 
]
table = np.vstack(table).T
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "p_reward"
                  ]
                 )
df.to_csv(f"../data/table_bandit_{env_name}.csv", index=False)

# BanditHardAndSparse10

In [30]:
env_name = "BanditHardAndSparse10-v0"
# exp_names = ["exp140", "exp141", "exp142", "exp143", "exp152", "exp156"]
exp_names = ["exp140", "exp141", "exp163", "exp164", "exp168", "exp172"]
agent_names = ["eta", "beta", "softbeta", "epsilon", "anneal-epsilon", "random"]
num_trials = 100

# --------------------------------
# Re-save traces
table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n in range(num_trials):
        result = load_checkpoint(os.path.join(data_path, f"{exp_name}_{n+1}.pkl"))
        
        # To much data to save. Keep every Nth point
        Nth = 100
        index = np.arange(0, len(result["episodes"]), Nth)
        
        rows = [
            [result["episodes"][i] for i in index],
            [result["p_bests"][i] for i in index], 
            [result["values_R"][i] for i in index],
            [result["actions"][i] for i in index],            
            [exp_name] * len(index),
            [agent] * len(index),
            [n] * len(index)
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "episodes", 
                      "p_bests", 
                      "values_R", 
                      "arms",
                      "exps", 
                      "agents",
                      "n"]
                 )
df.to_csv(f"../data/table_traces_{env_name}.csv", index=False)

# --------------------------------
# Save total regret and reward
table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n in range(num_trials):
        result = load_checkpoint(os.path.join(data_path, f"{exp_name}_{n+1}.pkl"))
        
        rows = [
            result['total_R'],
            result['p_bests'][-1],
            np.sum(result['regrets']),
            exp_name,
            agent,
            n
        ]

        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "total_R",
                      "p_bests",
                      "regrets", 
                      "exps", 
                      "agents",
                      "n"]
                 )
df.to_csv(f"../data/table_total_{env_name}.csv", index=False)

# --------------------------------
# Save final critic weights/values
critic_names = ["critic_R", "critic", "critic", "critic_R", "critic_R", "critic_R"]

table = []
for agent, exp_name, critic_name in zip(agent_names, exp_names, critic_names):
    for n in range(num_trials):
        result = load_checkpoint(os.path.join(data_path, f"{exp_name}_{n+1}.pkl"))

        critic = result[critic_name]
        rows = [
            list(critic.keys()), # arms
            list(critic.values()),  # values
            [exp_name] * len(critic.keys()),
            [agent] * len(critic.keys()),
            [n] * len(critic.keys())
        ]
        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "critic_values", 
                      "exps", 
                      "agents",
                      "n"
                  ]
                 )
df.to_csv(f"../data/table_critic_{env_name}.csv", index=False)

# --------------------------------
# Save bandit p(R)
env = gym.make(env_name)

table = [
    list(range(env.env.n_bandits)),
    env.env.p_dist 
]
table = np.vstack(table).T
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "p_reward"
                  ]
                 )
df.to_csv(f"../data/table_bandit_{env_name}.csv", index=False)