In [11]:
import os
import numpy as np
import pandas as pd
from pprint import pprint
from itertools import product

from IPython.display import Image
import matplotlib
import matplotlib.pyplot as plt

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set_style('ticks')

matplotlib.rcParams.update({'font.size': 16})
matplotlib.rc('axes', titlesize=16)

from infomercial.exp import wsls_bandit
from infomercial.exp import epsilon_bandit
from infomercial.exp import beta_bandit
from infomercial.exp import softbeta_bandit
from infomercial.local_gym import bandit
from infomercial.exp.wsls_bandit import load_checkpoint

import gym

# Set the shared data source path

In [12]:
data_path ="/Users/qualia/Code/infomercial/data/"

# 1. Extract examples

First we extract example timecourses for value traces and exploratory behavoir for select agents, and envs. No need to show everything to get the main point across.

There are two loops, one for values one for choice behace

### BanditOneHigh10

Extract value dynamics, but for dual value only

In [13]:
env_name = "BanditOneHigh10-v0"
exp_names = ["exp310",]
agent_names = ["eta",]
num_trials = 10
num_hp = 10

# --------------------------------
# Re-save traces
table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        
        rows = [
            result["episodes"],
            result["values_R"],
            result["values_E"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"]),
            [m] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "values_R", 
                          "values_E",                           
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n",
                          "rank"]
                     )
    df.to_csv(
        f"../data/example_value_traces_{agent}_{env_name}.csv", 
        index=False)

Extract model behavoir, and reward value, for all models on `BanditOneHigh10`

In [14]:
env_name = "BanditOneHigh10-v0"
exp_names = ["exp310",]
agent_names = ["eta",]
num_trials = 10
num_hp = 10

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result["episodes"],
            result["values_R"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"]),
            [m] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "values_R", 
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n",
                          "rank"]
                     )
    df.to_csv(
        f"../data/example_traces_{agent}_{env_name}.csv", 
        index=False)
    
# ----------------------------------
exp_names = ["exp311",]
agent_names = ["epsilon",]
num_trials = 10
num_hp = 10

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result["episodes"],
            result["values_R"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"]),
            [m] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "values_R", 
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n",
                          "rank"]
                     )
    df.to_csv(
        f"../data/example_traces_{agent}_{env_name}.csv", 
        index=False)
    
# ----------------------------------
exp_names = ["exp313",]
agent_names = ["softbeta",]
num_trials = 10
num_hp = 10

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result["episodes"],
            result["values_R"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"]),
            [m] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "values_R", 
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n",
                          "rank"]
                     )
    df.to_csv(
        f"../data/example_traces_{agent}_{env_name}.csv", 
        index=False)

#### DeceptiveBanditOneHigh10

In [15]:
env_name = "DeceptiveBanditOneHigh10-v0"

# --------------------------------
agent_names = ["eta",] # , "epsilon"]
exp_names = ["exp322",]
num_trials = 10
num_hp = 10

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result["episodes"],
            result["values_R"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"]),
            [m] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "values_R", 
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n",
                          "rank"]
                     )
    df.to_csv(
        f"../data/example_traces_{agent}_{env_name}.csv", 
        index=False)
    
# --------------------------------
agent_names = ["epsilon",]
exp_names = ["exp323",]
num_trials = 10
num_hp = 10

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result["episodes"],
            result["values_R"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"]),
            [m] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "values_R", 
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n",
                          "rank"]
                     )
    df.to_csv(
        f"../data/example_traces_{agent}_{env_name}.csv", 
        index=False)
    
# --------------------------------
agent_names = ["softbeta",]
exp_names = ["exp325",]
num_trials = 10
num_hp = 10

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))        
        rows = [
            result["episodes"],
            result["values_R"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"]),
            [m] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "values_R", 
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n",
                          "rank"]
                     )
    df.to_csv(
        f"../data/example_traces_{agent}_{env_name}.csv", 
        index=False)

### BanditHardAndSparse10

In [16]:
env_name = "BanditHardAndSparse10-v0"

# --------------------------------
agent_names = ["eta",] 
exp_names = ["exp314",]
num_trials = 10
num_hp = 10

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result["episodes"],
            result["values_R"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"]),
            [m] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "values_R", 
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n",
                          "rank"]
                     )
    df.to_csv(
        f"../data/example_traces_{agent}_{env_name}.csv", 
        index=False)
    
# --------------------------------
agent_names = ["epsilon",] 
exp_names = ["exp315",]
num_trials = 10
num_hp = 10

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result["episodes"],
            result["values_R"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"]),
            [m] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "values_R", 
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n",
                          "rank"]
                     )
    df.to_csv(
        f"../data/example_traces_{agent}_{env_name}.csv", 
        index=False)

# --------------------------------
agent_names = ["softbeta",] 
exp_names = ["exp317",]
num_trials = 10
num_hp = 10

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result["episodes"],
            result["values_R"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"]),
            [m] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "values_R", 
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n",
                          "rank"]
                     )
    df.to_csv(
        f"../data/example_traces_{agent}_{env_name}.csv", 
        index=False)

### BanditUniform121

In [17]:
env_name = "BanditUniform121-v0"

# --------------------------------
agent_names = ["eta",] 
exp_names = ["exp318",]
num_trials = 10
num_hp = 10

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result["episodes"],
            result["values_R"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"]),
            [m] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "values_R", 
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n",
                          "rank"]
                     )
    df.to_csv(
        f"../data/example_traces_{agent}_{env_name}.csv", 
        index=False)
    
# --------------------------------
agent_names = ["epsilon",] 
exp_names = ["exp319",]
num_trials = 10
num_hp = 10

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result["episodes"],
            result["values_R"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"]),
            [m] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "values_R", 
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n",
                          "rank"]
                     )
    df.to_csv(
        f"../data/example_traces_{agent}_{env_name}.csv", 
        index=False)

# --------------------------------
agent_names = ["softbeta",] 
exp_names = ["exp321",]
num_trials = 10
num_hp = 10

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result["episodes"],
            result["values_R"],
            result['regrets'],
            result["actions"],
            [exp_name] * len(result["episodes"]),
            [agent] * len(result["episodes"]),
            [n] * len(result["episodes"]),
            [m] * len(result["episodes"])
        ]
        
        rows = np.vstack(rows).T
        table.append(rows)
        
    table = np.vstack(table)
    df = pd.DataFrame(table, 
                      columns=[
                          "episodes", 
                          "values_R", 
                          "regrets",
                          "arms",
                          "exps", 
                          "agents",
                          "n",
                          "rank"]
                     )
    df.to_csv(
        f"../data/example_traces_{agent}_{env_name}.csv", 
        index=False)

# 2. Extract final performance 

For all envs and agents.

## BanditOneHigh10

Exract final performance, reward, regret, p_best

In [18]:
env_name = "BanditOneHigh10-v0"
table = []

# Learning agents
num_trials = 10
num_hp = 10
exp_names = ["exp310", 
             "exp313", 
             "exp311", 
             "exp312"]
agent_names = ["eta", 
               "softbeta", 
               "epsilon", 
               "anneal-epsilon"]
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result['total_R'],
            result['p_bests'][-1],
            np.sum(result['regrets']),
            exp_name,
            agent,
            n,
            m
        ]

        rows = np.vstack(rows).T
        table.append(rows)

# Random agent
random_name = "exp254"
num_random = 100
for n in range(num_random):
    result = load_checkpoint(
            os.path.join(data_path, 
                         f"{random_name}_{n+1}.pkl"))
    rows = [
        result['total_R'],
        result['p_bests'][-1],
        np.sum(result['regrets']),
        exp_name,
        agent,
        n,
        1
    ]
    table.append(rows)

# Save all
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "total_R",
                      "p_bests",
                      "regrets", 
                      "exps", 
                      "agents",
                      "n",
                      "rank"]
                 )
df.to_csv(
    f"../data/table_total_{env_name}.csv", 
    index=False)

Extract the critic values (learned only)

In [19]:
critic_names = ["critic_R", 
                "critic", 
                "critic_R", 
                "critic_R"]

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        critic = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            list(critic.keys()), # arms
            list(critic.values()),  # values
            [exp_name] * len(critic.keys()),
            [agent] * len(critic.keys()),
            [n] * len(critic.keys()),
            [m] * len(critic.keys())
        ]
        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "critic_values", 
                      "exps", 
                      "agents",
                      "n",
                      "rank"
                  ]
                 )
df.to_csv(
    f"../data/table_critic_{env_name}.csv", 
    index=False)

Extract the gym data

In [20]:
env = gym.make(env_name)
table = [
    list(range(env.n_bandits)),
    env.p_dist 
]
table = np.vstack(table).T
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "p_reward"
                  ]
                 )
df.to_csv(
    f"../data/table_bandit_{env_name}.csv", 
    index=False)

## DeceptiveBanditOneHigh10

In [21]:
env_name = "DeceptiveBanditOneHigh10-v0"
table = []

# Learning agents
num_trials = 10
num_hp = 10
exp_names = ["exp322", 
             "exp325", 
             "exp323", 
             "exp324"]
agent_names = ["eta", 
               "softbeta", 
               "epsilon", 
               "anneal-epsilon"]
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result['total_R'],
            result['p_bests'][-1],
            np.sum(result['regrets']),
            exp_name,
            agent,
            n,
            m
        ]

        rows = np.vstack(rows).T
        table.append(rows)

# Random agent
random_name = "exp254"
num_random = 100
for n in range(num_random):
    result = load_checkpoint(
            os.path.join(data_path, 
                         f"{random_name}_{n+1}.pkl"))
    rows = [
        result['total_R'],
        result['p_bests'][-1],
        np.sum(result['regrets']),
        exp_name,
        agent,
        n,
        1
    ]
    table.append(rows)

# Save all
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "total_R",
                      "p_bests",
                      "regrets", 
                      "exps", 
                      "agents",
                      "n",
                      "rank"]
                 )
df.to_csv(
    f"../data/table_total_{env_name}.csv", 
    index=False)

Extract critic values (learned only)

In [22]:
critic_names = ["critic_R", 
                "critic", 
                "critic_R", 
                "critic_R"]

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        critic = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            list(critic.keys()), # arms
            list(critic.values()),  # values
            [exp_name] * len(critic.keys()),
            [agent] * len(critic.keys()),
            [n] * len(critic.keys()),
            [m] * len(critic.keys())
        ]
        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "critic_values", 
                      "exps", 
                      "agents",
                      "n",
                      "rank"
                  ]
                 )
df.to_csv(
    f"../data/table_critic_{env_name}.csv", 
    index=False)

Extract gym data

In [23]:
env = gym.make(env_name)
table = [
    list(range(env.n_bandits)),
    env.p_dist 
]
table = np.vstack(table).T
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "p_reward"
                  ]
                 )
df.to_csv(
    f"../data/table_bandit_{env_name}.csv", 
    index=False)

# BanditUniform121

In [24]:
env_name = "BanditUniform121-v0"
table = []

# Learning agents
num_trials = 10
num_hp = 10
exp_names = ["exp318", 
             "exp321", 
             "exp319", 
             "exp320"]
agent_names = ["eta", 
               "softbeta", 
               "epsilon", 
               "anneal-epsilon"]
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result['total_R'],
            result['p_bests'][-1],
            np.sum(result['regrets']),
            exp_name,
            agent,
            n,
            m
        ]

        rows = np.vstack(rows).T
        table.append(rows)

# Random agent
random_name = "exp254"
num_random = 100
for n in range(num_random):
    result = load_checkpoint(
            os.path.join(data_path, 
                         f"{random_name}_{n+1}.pkl"))
    rows = [
        result['total_R'],
        result['p_bests'][-1],
        np.sum(result['regrets']),
        exp_name,
        agent,
        n,
        1
    ]
    table.append(rows)

# Save all
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "total_R",
                      "p_bests",
                      "regrets", 
                      "exps", 
                      "agents",
                      "n",
                      "rank"]
                 )
df.to_csv(
    f"../data/table_total_{env_name}.csv", 
    index=False)

In [25]:
critic_names = ["critic_R", 
                "critic", 
                "critic_R", 
                "critic_R"]

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        critic = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            list(critic.keys()), # arms
            list(critic.values()),  # values
            [exp_name] * len(critic.keys()),
            [agent] * len(critic.keys()),
            [n] * len(critic.keys()),
            [m] * len(critic.keys())
        ]
        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "critic_values", 
                      "exps", 
                      "agents",
                      "n",
                      "rank"
                  ]
                 )
df.to_csv(
    f"../data/table_critic_{env_name}.csv", 
    index=False)

In [26]:
env = gym.make(env_name)
table = [
    list(range(env.n_bandits)),
    env.p_dist 
]
table = np.vstack(table).T
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "p_reward"
                  ]
                 )
df.to_csv(
    f"../data/table_bandit_{env_name}.csv", 
    index=False)

# BanditHardAndSparse10

In [27]:
env_name = "BanditHardAndSparse10-v0"
table = []

# Learning agents
num_trials = 10
num_hp = 10
exp_names = ["exp314", 
             "exp317", 
             "exp315", 
             "exp316"]
agent_names = ["eta", 
               "softbeta", 
               "epsilon", 
               "anneal-epsilon"]
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        result = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            result['total_R'],
            result['p_bests'][-1],
            np.sum(result['regrets']),
            exp_name,
            agent,
            n,
            m
        ]

        rows = np.vstack(rows).T
        table.append(rows)

# Random agent
random_name = "exp254"
num_random = 100
for n in range(num_random):
    result = load_checkpoint(
            os.path.join(data_path, 
                         f"{random_name}_{n+1}.pkl"))
    rows = [
        result['total_R'],
        result['p_bests'][-1],
        np.sum(result['regrets']),
        exp_name,
        agent,
        n,
        1
    ]
    table.append(rows)

# Save all
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "total_R",
                      "p_bests",
                      "regrets", 
                      "exps", 
                      "agents",
                      "n",
                      "rank"]
                 )
df.to_csv(
    f"../data/table_total_{env_name}.csv", 
    index=False)

In [28]:
critic_names = ["critic_R", 
                "critic", 
                "critic_R", 
                "critic_R"]

table = []
for agent, exp_name in zip(agent_names, exp_names):
    for n, m in product(range(num_trials), range(num_hp)):
        critic = load_checkpoint(
            os.path.join(data_path, 
                         f"{exp_name}_{m}_{n+1}.pkl"))
        rows = [
            list(critic.keys()), # arms
            list(critic.values()),  # values
            [exp_name] * len(critic.keys()),
            [agent] * len(critic.keys()),
            [n] * len(critic.keys()),
            [m] * len(critic.keys())
        ]
        rows = np.vstack(rows).T
        table.append(rows)
        
table = np.vstack(table)
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "critic_values", 
                      "exps", 
                      "agents",
                      "n",
                      "rank"
                  ]
                 )
df.to_csv(
    f"../data/table_critic_{env_name}.csv", 
    index=False)

In [29]:
env = gym.make(env_name)
table = [
    list(range(env.n_bandits)),
    env.p_dist 
]
table = np.vstack(table).T
df = pd.DataFrame(table, 
                  columns=[
                      "arms", 
                      "p_reward"
                  ]
                 )
df.to_csv(
    f"../data/table_bandit_{env_name}.csv", 
    index=False)