Imports

In [1]:
import numpy as np
import os
from plotting import plot_sample_efficiency_curve, COLOR_MAPPING, LABEL_MAPPING, load_and_process_experiment_data, arange_frames

Load data

In [None]:
envs = [
    "emm",
    "emp",
    "ess"
]

run_ids = [
    "gru",
    "trxl",
    "gru_rec",
    "trxl_rec"
]

skip = 1

# Loop over all possible paths and load data
raw_data_dict = {}
for env in envs:
    raw_data_dict[env] = {}
    for run_id in run_ids:
        path = f"./results/{env}/{run_id}/"
        if os.path.exists(path) and os.path.isdir(path):
            data = load_and_process_experiment_data(path, "reward")
            # Average over the episodes dimension
            data = data.mean(axis=2)

            multiplier = 1
            if env == "emm":
                multiplier = 10
            elif env == "emp":
                multiplier = 1
            elif env == "ess":
                multiplier = 4

            raw_data_dict[env][run_id] = data[::skip] * multiplier
        else:
            continue


Process and aggregate data

(101, 5, 150)

101 Checkpoints
5 Runs
150 Episodes

In [3]:
# Aggregate data
mean_dict = {}
std_dict = {}
min_dict = {}
max_dict = {}
for env in raw_data_dict:
    mean_dict[env] = {}
    std_dict[env] = {}
    min_dict[env] = {}
    max_dict[env] = {}
    for run_id in raw_data_dict[env]:
        mean_dict[env][run_id] = raw_data_dict[env][run_id].mean(axis=1)
        std_dict[env][run_id] = raw_data_dict[env][run_id].std(axis=1)
        min_dict[env][run_id] = raw_data_dict[env][run_id].min(axis=1)
        max_dict[env][run_id] = raw_data_dict[env][run_id].max(axis=1) 

# Setup frames
frames = arange_frames(mean_dict["emm"]["gru"].shape[0], skip)

Plot mean and std across runs

EMM

In [None]:
plot_sample_efficiency_curve(frames,
                             mean_dict["emm"],
                             std_dict["emm"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(16.5,3.5),
                             xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Commands Executed",
                             marker="")

Plot mean and std across runs

EMP

In [None]:
plot_sample_efficiency_curve(frames,
                             mean_dict["emp"],
                             std_dict["emp"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(16.5,3.5),
                             xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Undiscounted Return",
                             marker="")

Plot mean and std across runs

ESS

In [None]:
plot_sample_efficiency_curve(frames,
                             mean_dict["ess"],
                             std_dict["ess"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(16.5,3.5),
                             xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Coins Collected",
                             marker="")

Plot individual mean

EMM

In [None]:
plot_sample_efficiency_curve(frames,
                             raw_data_dict["emm"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(16.5,10),
                             xticks=list(range(0, 820, 100)),
                             yticks=list(range(0, 131, 10)),
                             xlabel="Steps (in millions)",
                             ylabel="Num. Commands",
                             marker="",
                             out="emm_individuals.pdf")

Plot individual mean

EMP

In [None]:
emp_dict = {}
for run_id in raw_data_dict["emp"]:
    emp_dict[run_id] = raw_data_dict["emp"][run_id] * 10

plot_sample_efficiency_curve(frames,
                             emp_dict,
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(16.5,3.5),
                             xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Tiles Visited",
                             marker="",
                             out="emp_individuals.pdf")

Plot individual mean

ESS

In [None]:
plot_sample_efficiency_curve(frames,
                             raw_data_dict["ess"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(16.5,3.5),
                             xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Num. Coins",
                             marker="",
                             out="ess_individuals.pdf")

# Aggregation Plot

In [10]:
# Select data
selected_data = {}

selected_data["emm"] = {}
selected_data["emm"]["gru"] = raw_data_dict["emm"]["gru"].copy()
selected_data["emm"]["trxl"] = raw_data_dict["emm"]["trxl"].copy()

selected_data["emp"] = {}
selected_data["emp"]["gru"] = raw_data_dict["emp"]["gru"].copy()
selected_data["emp"]["trxl"] = raw_data_dict["emp"]["trxl"].copy()

selected_data["ess"] = {}
selected_data["ess"]["gru"] = raw_data_dict["ess"]["gru_rec"].copy()
selected_data["ess"]["trxl"] = raw_data_dict["ess"]["trxl_rec"].copy()


In [11]:
run_ids = ["gru", "trxl"]
normalizer = "trxl"

agg_mean_dict = {}
agg_std_dict = {}
max_mean_return = {}
num_checkpoints = selected_data["emm"]["gru"].shape[0]

# Initialize structures to accumulate data across environments
accumulated_data = {run_id: [] for run_id in run_ids}
for env in envs:
    max_mean_return[env] = {}
    for run_id in run_ids:
        if env == "emm":
            max_mean_return[env][run_id] = round(selected_data[env][run_id].mean(axis=(1)).max(), 2) 
        else:
            max_mean_return[env][run_id] = round(selected_data[env][run_id].mean(axis=(1)).max(), 2)

for env in envs:
    for run_id in run_ids:
        data = selected_data[env][run_id]
        data = data / max_mean_return[env][normalizer]
        accumulated_data[run_id].append(data)

for run_id in run_ids:
    stacked_data = np.hstack(accumulated_data[run_id])
    agg_mean_dict[run_id] = stacked_data.mean(axis=1)
    agg_std_dict[run_id] = stacked_data.std(axis=1)

In [None]:
plot_sample_efficiency_curve(frames,
                             agg_mean_dict,
                             agg_std_dict,
                             algorithms=["gru", "trxl"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(16.5,3.5),
                             xticks=list(range(0, 820, 100)),
                             yticks=list([0, 1, 2, 3, 4, 5]),
                             xlabel="Steps (in millions)",
                             ylabel="Noramlized Score",
                             marker="",
                             out="aggregation_endless.pdf")