In [None]:
from copy import deepcopy

# Training
import numpy as np
import torch
from hydra import initialize, compose
from tqdm import tqdm

# Evaluation
import seaborn as sns

sns.set_theme()
from matplotlib import pyplot as plt

from cats.evaluation import *
from cats.run import run

import gym_continuous_maze

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# DEVICE = "cpu"
MASTER_SEED = 235790


def generate_random_seeds(n: int):
    rng = np.random.default_rng(MASTER_SEED)
    return list(map(int, rng.integers(0, 2**32 - 1, size=(n,))))

In [None]:
TOTAL_FRAMES = 10000

with initialize(version_base=None, config_path="cats/config"):
    base_cfg = compose(
        config_name="defaults_off_policy.yaml",
        overrides=[
            "env.name=ContinuousLidarMaze-v0",
            f"env.max_episode_steps={TOTAL_FRAMES}",
            f"train.total_frames={TOTAL_FRAMES}",
            "intrinsic=disagreement",
            "cats.fixed_reset=true",
            "cats.death_not_end=true",
            "noise.scale=0.1",
        ],
    )

    cats_cfg = deepcopy(base_cfg)
    cats_cfg.cats.teleport.enable = True
    cats_cfg.env.max_episode_steps = math.inf
    cats_cfg.cats.teleport_interval_enable
    cats_cfg.cats.teleport.memory = {"type": "fifo", "capacity": 5000}

In [None]:
seeds = generate_random_seeds(5)

baseline = []
cats = []

for seed in seeds:
    cfg = deepcopy(base_cfg)
    cfg.seed = seed
    baseline.append(run(cfg, save=False, device=DEVICE))

    cfg = deepcopy(cats_cfg)
    cfg.seed = seed
    cats.append(run(cfg, save=False, device=DEVICE))

In [None]:
experiment = cats[3]
log = experiment.logger.engine.results
fig, axs = plt.subplots(1, 3, constrained_layout=True)
fig.set_size_inches(15, 4)
fig.subplots_adjust(wspace=0.5)

visualise_memory(experiment, fig, axs[0])
visualise_experiment_policy(experiment, fig, axs[1], 0)
axs[1].set_title("Policy Actions (X)")
visualise_experiment_policy(experiment, fig, axs[2], 1)
axs[2].set_title("Policy Actions (Y)")

fig.suptitle("ContinuousLidarMaze")
fig.savefig("maze-policy.pdf", format="pdf", bbox_inches="tight")

In [None]:
data = {"baseline": baseline, "cats": cats}

import os
import pickle as pkl

path = "evaluate/data/cats-maze"
path = os.path.join(path, f"10000.pkl")
# with open(path, "wb") as f:
#     pkl.dump(data, f)
# with open(path, "rb") as f:
#     experiment = pkl.load(f)

In [None]:
# Final Disagreement Value
final_disagreement = {}
final_entropy = {}

for k, v in data.items():
    final_disagreement[k] = np.array([evaluate_disagreement(x) for x in v])
    final_entropy[k] = np.array([entropy_memory(x.memory.rb) for x in v])

In [None]:
baseline_collected = np.array(
    [x.logger.engine.results["collected_intrinsic_reward"] for x in baseline]
)
cats_collected = np.array(
    [x.logger.engine.results["collected_intrinsic_reward"] for x in cats]
)

fig, ax = plt.subplots()
colours = sns.color_palette()

x = [i * 100 for i in range(baseline_collected.shape[1])]

mu = baseline_collected.mean(axis=0)
std = baseline_collected.std(axis=0)
ax.plot(x, mu, color=colours[0], label="Baseline")
ax.fill_between(x, mu - std, mu + std, alpha=0.2, color=colours[0])

mu = cats_collected.mean(axis=0)
std = cats_collected.std(axis=0)
ax.plot(x, mu, color=colours[1], label="CATS")
ax.fill_between(x, mu - std, mu + std, alpha=0.2, color=colours[1])

ax.set_title("ContinuousLidarMaze - Reward Collection")
ax.set_ylabel("Collected Intrinsic Reward - Running Total")
ax.set_xlabel("Collection Frame")
ax.legend()

fig.savefig("maze-collected.pdf", format="pdf", bbox_inches="tight")

In [None]:
def mu_std(data: dict):
    for k, v in data.items():
        mu = v.mean()
        print(k, mu, v.std())


print("Entropy")
mu_std(final_entropy)
print("Disagreement")
mu_std(final_disagreement)