In [None]:
from copy import deepcopy

# Training
import numpy as np
import torch
from hydra import initialize, compose
from tqdm import tqdm

# Evaluation
import seaborn as sns
sns.set_theme()
from matplotlib import pyplot as plt

from cats.evaluation import *
from cats.run import run

# DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE = "cpu"
MASTER_SEED = 235790
def generate_random_seeds(n: int):
    rng = np.random.default_rng(MASTER_SEED)
    return list(map(int, rng.integers(0, 2**32-1, size=(n, ))))

In [None]:
# fig:cats-reset-policy

with initialize(version_base=None, config_path="cats/config"):
    cfg = compose(
        config_name="defaults_off_policy.yaml",
        overrides=[
            "train.total_frames=2000",
            "intrinsic=rnd",
            "cats.reset_action.enable=true",
            "cats.reset_inject_critic=true",
            "cats.teleport.enable=true",
        ]
    )
    cfg.noise.scale = [0.1, 0.01]

experiment = run(cfg)

fig, axs = plt.subplots(1,3, constrained_layout=True)
fig.set_size_inches(15,4)
fig.subplots_adjust(wspace=0.5)

fig.suptitle("When to reset: MountainCarContinuous")
visualise_memory(experiment, fig, axs[0])
visualise_experiment_value_estimate(experiment, fig, axs[1])
visualise_reset_policy(experiment, fig, axs[2])

fig.savefig("fig:cats-reset-policy.pdf", format="pdf", bbox_inches="tight")

In [None]:
# Optimising exploration

TOTAL_FRAMES = 5000

with initialize(version_base=None, config_path="cats/config"):
    base_cfg_baseline = compose(
        config_name="defaults_off_policy.yaml",
        overrides=[
            "env.name=MountainCarContinuous-v0", # Environment as default on gymnasium
            "env.max_episode_steps=999",
            f"train.total_frames={TOTAL_FRAMES}",
            "intrinsic=disagreement",
            "cats.fixed_reset=true",
            "cats.death_not_end=true",
            "cats.enable_policy_sampling=false",
        ]
    )

    base_cfg_teleport = deepcopy(base_cfg_baseline)
    base_cfg_teleport.env.max_episode_steps = math.inf
    base_cfg_teleport.cats.teleport.enable = True
    base_cfg_teleport.cats.teleport_interval_enable = True # No reset as an action
    base_cfg_teleport.cats.teleport.enable = True
    base_cfg_teleport.cats.teleport.memory = {
        "type": "fifo",
        "capacity": TOTAL_FRAMES
    }

    # base_cfg_teleport.cats.teleport.type = "ucb"    # UCB teleportation
    # base_cfg_teleport.cats.teleport.kwargs = {"c": 1}

In [None]:
# noise

seeds = generate_random_seeds(5)
all_noise = [0, 0.2, 0.4, 0.6 ,0.8, 1.0, 1.5, 2.0, 3.0]

data = {
    "baseline": {},
    "cats": {}
}
for noise in all_noise:
    data["baseline"][noise] = []
    data["cats"][noise] = []

for noise in all_noise:
    for seed in seeds:
        # Baseline
        cfg = deepcopy(base_cfg_baseline)
        cfg.noise.scale = float(noise)
        cfg.seed = seed
        experiment = run(cfg, save=False, device=DEVICE)
        data["baseline"][noise].append(experiment)

        # CATS
        cfg = deepcopy(base_cfg_teleport)
        cfg.noise.scale = float(noise)
        cfg.seed = seed
        experiment = run(cfg, save=False, device=DEVICE)
        data["cats"][noise].append(experiment)

In [None]:
import os
import pickle as pkl

path = "evaluate/data/cats-exploration"
path = os.path.join(path, "exploration_noise.pkl")
# with open(path, "wb") as f:
#     pkl.dump(data, f)
with open(path, "rb") as f:
    data = pkl.load(f)

In [None]:
noise_results_baseline = []
for k, v in data["baseline"].items():
    noise_results_baseline.append([
        evaluate_disagreement(x)
        for x in v
    ])

noise_results_cats = []
for k, v in data["cats"].items():
    noise_results_cats.append([
        evaluate_disagreement(x)
        for x in v
    ])

In [None]:
fig, ax = plt.subplots()
colours = sns.color_palette()

mu_baseline = np.array([np.array(x).mean() for x in noise_results_baseline])
mu_cats = np.array([np.array(x).mean() for x in noise_results_cats])
std_baseline = np.array([np.array(x).std() for x in noise_results_baseline])
std_cats = np.array([np.array(x).std() for x in noise_results_cats])

ax.plot(all_noise, mu_baseline, label="baseline", color=colours[0])
ax.scatter(all_noise, mu_baseline, color=colours[0])
ax.fill_between(all_noise, mu_baseline-std_baseline, mu_baseline+std_baseline, alpha=0.2, color=colours[0])

ax.plot(all_noise, mu_cats, label="cats", color=colours[1])
ax.scatter(all_noise, mu_cats, color=colours[1])
ax.fill_between(all_noise, mu_cats-std_cats, mu_cats+std_cats, alpha=0.2, color=colours[1])

ax.set_ylabel("Disagreement (Avaible Reward)")
ax.set_xlabel("Noise Scale σ")

ax.set_title("MountainCarContinuous")
ax.legend()
fig.savefig("mcc-cats-noise.pdf", format="pdf", bbox_inches="tight")

In [None]:
# gamma
seeds = generate_random_seeds(5)
discount_factors = [0.0, 0.25, 0.5, 0.7, 0.9, 0.95, 0.99, 0.999]

data_gamma = {
    "baseline": {},
    "cats": {}
}
for gamma in discount_factors:
    data_gamma["baseline"][gamma] = []
    data_gamma["cats"][gamma] = []

for gamma in discount_factors:
    for seed in seeds:
        # Baseline
        cfg = deepcopy(base_cfg_baseline)
        cfg.algorithm.gamma = gamma
        cfg.seed = seed
        experiment = run(cfg, save=False, device=DEVICE)
        data_gamma["baseline"][gamma].append(experiment)

        # CATS
        cfg = deepcopy(base_cfg_teleport)
        cfg.algorithm.gamma = gamma
        cfg.seed = seed
        experiment = run(cfg, save=False, device=DEVICE)
        data_gamma["cats"][gamma].append(experiment)

In [None]:
import os
import pickle as pkl

path = "evaluate/data/cats-exploration"
path = os.path.join(path, "discount_factor.pkl")
# with open(path, "wb") as f:
#     pkl.dump(data_gamma, f)

with open(path, "rb") as f:
    data = pkl.load(f)

In [None]:
gamma_results_baseline = []
for k, v in data["baseline"].items():
    gamma_results_baseline.append([
        evaluate_disagreement(x)
        for x in v
    ])

gamma_results_cats = []
for k, v in data["cats"].items():
    gamma_results_cats.append([
        evaluate_disagreement(x)
        for x in v
    ])

In [None]:
fig, ax = plt.subplots()
colours = sns.color_palette()

mu_baseline = np.array([np.array(x).mean() for x in gamma_results_baseline])
mu_cats = np.array([np.array(x).mean() for x in gamma_results_cats])
std_baseline = np.array([np.array(x).std() for x in gamma_results_baseline])
std_cats = np.array([np.array(x).std() for x in gamma_results_cats])

ax.plot(discount_factors, mu_baseline, label="baseline", color=colours[0])
ax.scatter(discount_factors, mu_baseline, color=colours[0])
ax.fill_between(discount_factors, mu_baseline-std_baseline, mu_baseline+std_baseline, alpha=0.2, color=colours[0])

ax.plot(discount_factors, mu_cats, label="cats", color=colours[1])
ax.scatter(discount_factors, mu_cats, color=colours[1])
ax.fill_between(discount_factors, mu_cats-std_cats, mu_cats+std_cats, alpha=0.2, color=colours[1])

ax.set_ylabel("Disagreement (Avaible Reward)")
ax.set_xlabel("Discount Factor γ")

ax.set_title("MountainCarContinuous")
ax.legend()
fig.savefig("mcc-cats-discount.pdf", format="pdf", bbox_inches="tight")

In [None]:
mu_baseline[-1] / mu_cats[-2]

In [None]:
mu_cats[-1] / mu_cats[-2]