In [None]:
from copy import deepcopy

# Training
import numpy as np
import torch
from hydra import initialize, compose
from tqdm import tqdm

# Evaluation
import seaborn as sns
from matplotlib import pyplot as plt

sns.set_theme()

from cats.evaluation import *
from cats.run import run

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MASTER_SEED = 235790
TOTAL_FRAMES = 10000

def generate_random_seeds(n: int):
    rng = np.random.default_rng(MASTER_SEED)
    return list(map(int, rng.integers(0, 2**32-1, size=(n, ))))

seeds = generate_random_seeds(20)
print(seeds)



with initialize(version_base=None, config_path="cats/config"):
    base_cfg = compose(
        config_name="defaults_off_policy.yaml",
        overrides=[
            "intrinsic=disagreement",
            "env.name=MountainCarContinuous-v0",   # Environment Selection
            f"train.total_frames={TOTAL_FRAMES}",            # Collection frames
            "algorithm.gamma=0.5"
        ],
    )   

In [None]:
# Baseline 0.5

baseline_05 = []
for seed in tqdm(seeds):
    cfg = deepcopy(base_cfg)
    cfg.seed = seed
    cfg.env.max_episode_steps= 999
    experiment = run(cfg)
    baseline_05.append(experiment)

In [None]:
# CATS_EPISODE
cats_episode = []
for seed in tqdm(seeds):
    cfg = deepcopy(base_cfg)
    cfg.seed = seed
    cfg.env.max_episode_steps=TOTAL_FRAMES
    cfg.cats.teleport_interval_enable=True
    cfg.cats.teleport.enable=True
    cfg.cats.teleport.memory.type="episode"
    experiment = run(cfg)
    cats_episode.append(experiment)

In [None]:
# CATS_FIFO_1
cats_fifo_1 = []
for seed in tqdm(seeds):
    cfg = deepcopy(base_cfg)
    cfg.seed = seed
    cfg.env.max_episode_steps=TOTAL_FRAMES
    cfg.cats.teleport_interval_enable=True
    cfg.cats.teleport.enable=True
    cfg.cats.teleport.memory.type="fifo"
    cfg.cats.teleport.memory.capacity=TOTAL_FRAMES
    experiment = run(cfg)
    cats_fifo_1.append(experiment)

In [None]:
# CATS_FIFO_2
cats_fifo_2 = []
for seed in tqdm(seeds):
    cfg = deepcopy(base_cfg)
    cfg.seed = seed
    cfg.env.max_episode_steps=TOTAL_FRAMES
    cfg.cats.teleport_interval_enable=True
    cfg.cats.teleport.enable=True
    cfg.cats.teleport.memory.type="fifo"
    cfg.cats.teleport.memory.capacity=TOTAL_FRAMES // 4
    experiment = run(cfg)
    cats_fifo_2.append(experiment)

In [None]:
# CATS_FIFO_3
cats_fifo_3 = []
for seed in tqdm(seeds):
    cfg = deepcopy(base_cfg)
    cfg.seed = seed
    cfg.env.max_episode_steps=TOTAL_FRAMES
    cfg.cats.teleport_interval_enable=True
    cfg.cats.teleport.enable=True
    cfg.cats.teleport.memory.type="fifo"
    cfg.cats.teleport.memory.capacity=TOTAL_FRAMES // 16
    experiment = run(cfg)
    cats_fifo_3.append(experiment)

In [None]:
# CATS_FIFO_4
cats_fifo_4 = []
for seed in tqdm(seeds):
    cfg = deepcopy(base_cfg)
    cfg.seed = seed
    cfg.env.max_episode_steps=TOTAL_FRAMES
    cfg.cats.teleport_interval_enable=True
    cfg.cats.teleport.enable=True
    cfg.cats.teleport.memory.type="fifo"
    cfg.cats.teleport.memory.capacity=TOTAL_FRAMES // 50
    experiment = run(cfg)
    cats_fifo_4.append(experiment)

In [None]:
# CATS_FIFO_5
cats_fifo_5 = []
for seed in tqdm(seeds):
    cfg = deepcopy(base_cfg)
    cfg.seed = seed
    cfg.env.max_episode_steps=TOTAL_FRAMES
    cfg.cats.teleport_interval_enable=True
    cfg.cats.teleport.enable=True
    cfg.cats.teleport.memory.type="fifo"
    cfg.cats.teleport.memory.capacity=TOTAL_FRAMES // 100
    experiment = run(cfg)
    cats_fifo_5.append(experiment)

In [None]:
data = { 
    "baseline_05": baseline_05,
    "fifo_1": cats_fifo_1,
    "fifo_2": cats_fifo_2,
    "fifo_3": cats_fifo_3,
    "fifo_4": cats_fifo_4,
    "fifo_5": cats_fifo_5,
    "episode": cats_episode
}

In [None]:
# Store Results
import os
import pickle as pkl

path = "evaluate/data/cats-detachment"
path = os.path.join(path, f"detachment_gamma_05.pkl")
# with open(path, "wb") as f:
#     pkl.dump(data, f)

# Load results
data = pkl.load(open(path, "rb"))
baseline_05 = data["baseline_05"]
cats_fifo_1 = data["fifo_1"]
cats_fifo_2 = data["fifo_2"]
cats_fifo_3 = data["fifo_3"]
cats_fifo_4 = data["fifo_4"]
cats_fifo_5 = data["fifo_5"]
cats_episode = data["episode"]

In [None]:
intrinsic_quality_results = {}
for k, v in data.items():
    intrinsic_quality = np.array([evaluate_disagreement(x) for x in v])
    intrinsic_quality_results[k] = intrinsic_quality

In [None]:
intrinsic_quality = intrinsic_quality_results["fifo_1"]
mu = intrinsic_quality.mean()
n = len(intrinsic_quality)
confidence_bound = (((intrinsic_quality-mu)**2).sum() / (n-1))**0.5 / (n**0.5) * 1.96
mu - confidence_bound

In [None]:
key_map = {
    "fifo_1": "FIFO 10000",
    "fifo_2": "FIFO 2500",
    "fifo_3": "FIFO 625",
    "fifo_4": "FIFO 200",
    "fifo_5": "FIFO 100",
    "episode":"Episodic",
    "baseline_05": "Baseline"
}
data = {k: v for k, v in sorted(intrinsic_quality_results.items(), key=lambda x: x[1].mean())}
data = {key_map[k]: v for k, v in data.items()}

ax = sns.barplot(data, orient="h", errorbar=("se", 1.96))
ax.set_title("MountainCarContinuous, γ=0.5")
ax.set_ylabel("Teleportation Memory")
ax.set_xlabel("Expected Reward (Disagreement)")

ax.get_figure().savefig("mcc-cats-detachment-2.pdf", format="pdf", bbox_inches="tight")


In [None]:
INDEX = 0
experiments = [baseline_05[INDEX], cats_fifo_1[INDEX], cats_fifo_3[INDEX], cats_fifo_5[INDEX]]
name = ["Baseline", "FIFO 10000", "FIFO 625", "FIFO 100"]


fig = plt.figure(constrained_layout=True)
fig.suptitle("MountainCarContinuous, γ=0.5")
fig.set_size_inches(16,7)

subfigs = fig.subfigures(2, 1)

subfigs[0].suptitle("State Visitation")
axs = subfigs[0].subplots(nrows=1, ncols=4, sharex=True, sharey=True)
for i, ex in enumerate(experiments):
    ax = axs[i]
    visualise_memory(ex, fig, ax)
    if i != 0: 
        ax.set_ylabel("")
    ax.set_xlabel("")
    ax.set_title(name[i])

subfigs[1].suptitle("Value Function")
axs = subfigs[1].subplots(nrows=1, ncols=4, sharex=True, sharey=True)
for i, ex in enumerate(experiments):
    ax = axs[i]
    visualise_experiment_value_estimate(ex, fig, ax)
    if i != 0: 
        ax.set_ylabel("")
    ax.set_title("")

fig.savefig("mcc-cats-detachment.pdf", format="pdf", bbox_inches="tight")


In [None]:
print(max(cats_episode[INDEX].logger.engine.results["reset_step"]))

In [None]:
plt.plot(cats_episode[2].logger.engine.results["reset_step"])