### Notebook for plots and figures

In [None]:
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
import seaborn as sns
sns.set_theme("paper")

MASTER_SEED = 235790

In [None]:
# Generate Random Seeds

def generate_random_seeds(n: int):
    rng = np.random.default_rng(MASTER_SEED)
    return rng.integers(0, 2**32-1, size=(n, ))

seeds = generate_random_seeds(5)

print(seeds)

In [None]:
# fig:kitten-intrinsic-experiment

WINDOW = 5

rewards_hopper = pd.read_csv("data/kitten-im-mcc/evaluation_reward.csv").T

# Data Cleanup
im = ["baseline", "rnd", "disagreement", "icm"]
im_reg = "|".join(im)
rewards_hopper = rewards_hopper.filter(regex="^(?!.*(MIN|MAX|Step)).+", axis=0)
rewards_hopper = rewards_hopper.reset_index()
rewards_hopper["index"] = rewards_hopper["index"].str.extract(f"^.+({im_reg}).+$")
rewards_mhopper_mean = rewards_hopper.groupby("index").mean().T
rewards_hopper_std = rewards_hopper.groupby("index").std().T

# Plot
fig, axs = plt.subplots(1, 2)
fig.set_size_inches(10,4)
fig.subplots_adjust(wspace=0.3)

# MCC
ax: Axes = axs[0]
colours = sns.color_palette()
for i, col in enumerate(im):
    c = colours[i]
    mean_ma = rewards_mhopper_mean[col].rolling(window=WINDOW).mean()
    std_ma = rewards_hopper_std[col].rolling(window=WINDOW).mean()

    x = list(range(len(mean_ma)))
    ax.plot(x, mean_ma, label=col, color=c)
    ax.fill_between(x, mean_ma-std_ma, mean_ma+std_ma, alpha=0.2)

ax.set_title("Mountain Car Continuous")
ax.legend()
ax.set_xlabel("Step")
ax.set_ylabel("Extrinsic Reward per Episode (Rolling Mean 5)")

# ICM
rewards_mcc_icm = rewards_hopper[rewards_hopper["index"] == "icm"]
rewards_mcc_icm = rewards_mcc_icm.T.drop("index")
rewards_mcc_icm = rewards_mcc_icm.rolling(window=WINDOW).mean()

ax: Axes = axs[1]
ax.plot(rewards_mhopper_mean["icm"].rolling(window=WINDOW).mean(), color=colours[3])
ax.plot(rewards_mcc_icm, "--", alpha=0.5)

ax.set_title("Mountain Car Continuous (ICM)")
ax.set_xlabel("Step")
ax.set_ylabel("Extrinsic Reward per Episode (Rolling Mean 5)")

fig.savefig("mcc_kitten_intrinsic.pdf", format="pdf", bbox_inches="tight")


In [None]:
# fig: kitten-intrinsic-hopper
# fig:kitten-intrinsic-experiment

WINDOW = 20

rewards_hopper = pd.read_csv("data/kitten-im-hopper/evaluation_reward.csv").T

# Data Cleanup
im = ["baseline", "rnd", "disagreement", "icm"]
im_reg = "|".join(im)
rewards_hopper = rewards_hopper.filter(regex="^(?!.*(MIN|MAX|Step)).+", axis=0)
rewards_hopper = rewards_hopper.reset_index()
rewards_hopper["index"] = rewards_hopper["index"].str.extract(f"^.+({im_reg}).+$")
rewards_hopper_mean = rewards_hopper.groupby("index").mean().T
rewards_hopper_std = rewards_hopper.groupby("index").std().T

# Plot
fig, ax = plt.subplots(1, 1)
fig.set_size_inches(8,4)
fig.subplots_adjust(wspace=0.3)

# MCC
colours = sns.color_palette()
for i, col in enumerate(im):
    c = colours[i]
    mean_ma = rewards_hopper_mean[col].rolling(window=WINDOW).mean()
    std_ma = rewards_hopper_std[col].rolling(window=WINDOW).mean()

    x = list(range(len(mean_ma)))
    ax.plot(x, mean_ma, label=col, color=c)
    ax.fill_between(x, mean_ma-std_ma, mean_ma+std_ma, alpha=0.2)

ax.set_title("Hopper")
ax.legend()
ax.set_xlabel("Step")
ax.set_ylabel("Extrinsic Reward per Episode (Rolling Mean 20)")

fig.savefig("mcc_kitten_intrinsic_hopper.pdf", format="pdf", bbox_inches="tight")

In [None]:
rewards_hopper = pd.read_csv("data/kitten-im-hopper/evaluation_reward.csv").T

# Data Cleanup
im = ["baseline", "rnd", "disagreement", "icm"]
im_reg = "|".join(im)
# rewards_hopper = rewards_hopper.filter(regex="^(?!.*(MIN|MAX|Step)).+", axis=0)
# rewards_hopper = rewards_hopper.reset_index()
# rewards_hopper["index"] = rewards_hopper["index"].str.extract(f"^.+({im_reg}).+$")
# rewards_hopper
rewards_hopper