Imports

In [1]:
import numpy as np
import os
from plotting import plot_sample_efficiency_curve, COLOR_MAPPING, LABEL_MAPPING, load_and_process_experiment_data, arange_frames

Load data

In [None]:
# Determine tiles visited
path = f"./results/mp_off_dense/trxl_rec/"
original_shape = (51, 5, 50, 3)
rewards = load_and_process_experiment_data(path, "reward").reshape(original_shape)
successes = load_and_process_experiment_data(path, "success").reshape(original_shape)
seeds = load_and_process_experiment_data(path, "seed").reshape(original_shape)
successes_agg = successes[:, :, :, :].max(axis=(0, 1, 3))
tiles_visited = rewards[:, :, :, :].max(axis=(0, 1, 3)) - 0.9
print(f"Tiles visited: {tiles_visited * 10}")

In [None]:
envs = [
    "mm_act_grid",
    "mm_grid",
    "mm10",
    "mp_grid_on",
    "mp_grid_off",
    "mp_off_dense",
    "ss"
]

run_ids = [
    "gru",
    "trxl",
    "gru_rec",
    "trxl_rec",
    "gru_25",
    "trxl_25",
    "gru_rec_25",
    "trxl_rec_25",
]

skip = 1

# Loop over all possible paths and load data
raw_data_dict = {}
for env in envs:
    raw_data_dict[env] = {}
    for run_id in run_ids:
        path = f"./results/{env}/{run_id}/"
        if os.path.exists(path) and os.path.isdir(path):
            data = load_and_process_experiment_data(path, "reward")

            if "dense" in env:
                success_data = load_and_process_experiment_data(path, "success")
                original_shape = (success_data.shape[0], 5, 50, 3)
                target_shape = (success_data.shape[0], 5, 150)
                success_data = success_data.reshape(original_shape)
                success_data = (success_data * 0.9)
                reward_data = data.reshape(original_shape)
                reward_data = reward_data - success_data
                data = reward_data / tiles_visited[np.newaxis, np.newaxis, :, np.newaxis]
                data = data.reshape(target_shape)

            if "ss" in env:
                data[data == 0.25] = 0.5
                data[data == 1.25] = 1.0

            # Average over the episodes dimension
            data = data.mean(axis=2)

            multiplier = 1
            # if "mm" in env:
            #     multiplier = 10

            raw_data_dict[env][run_id] = data[::skip] * multiplier
        else:
            continue


Process and aggregate data

(101, 5, 150)

101 Checkpoints
5 Runs
150 Episodes

In [4]:
# Aggregate data
mean_dict = {}
std_dict = {}
min_dict = {}
max_dict = {}
for env in raw_data_dict:
    mean_dict[env] = {}
    std_dict[env] = {}
    min_dict[env] = {}
    max_dict[env] = {}
    for run_id in raw_data_dict[env]:
        mean_dict[env][run_id] = raw_data_dict[env][run_id].mean(axis=1)
        std_dict[env][run_id] = raw_data_dict[env][run_id].std(axis=1)
        min_dict[env][run_id] = raw_data_dict[env][run_id].min(axis=1)
        max_dict[env][run_id] = raw_data_dict[env][run_id].max(axis=1) 

Plot mean and std across runs

mm_act_grid

In [None]:
frames = arange_frames(mean_dict["mm_act_grid"]["gru"].shape[0])
plot_sample_efficiency_curve(frames,
                             mean_dict["mm_act_grid"],
                             std_dict["mm_act_grid"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(8,7.5),
                            #  xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Commands Executed",
                             marker="")

Plot mean and std across runs

mm_grid

In [None]:
frames = arange_frames(mean_dict["mm_grid"]["gru"].shape[0])
plot_sample_efficiency_curve(frames,
                             mean_dict["mm_grid"],
                             std_dict["mm_grid"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(8,7.5),
                            #  xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Commands Executed",
                             marker="")

Plot mean and std across runs

mm10

In [None]:
frames = arange_frames(mean_dict["mm10"]["gru"].shape[0])
plot_sample_efficiency_curve(frames,
                             mean_dict["mm10"],
                             std_dict["mm10"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(8,7.5),
                            #  xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Commands Executed",
                             marker="")

Plot mean and std across runs

mp_grid_on

In [None]:
frames = arange_frames(mean_dict["mp_grid_on"]["gru"].shape[0])
plot_sample_efficiency_curve(frames,
                             mean_dict["mp_grid_on"],
                             std_dict["mp_grid_on"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(8,7.5),
                            #  xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Success Rate",
                             marker="")

Plot mean and std across runs

mp_grid_off

In [None]:
frames = arange_frames(mean_dict["mp_grid_off"]["gru"].shape[0])
plot_sample_efficiency_curve(frames,
                             mean_dict["mp_grid_off"],
                             std_dict["mp_grid_off"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(8,7.5),
                            #  xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Success Rate",
                             marker="")

Plot mean and std across runs

mp_off_dense

In [None]:
frames = arange_frames(mean_dict["mp_off_dense"]["gru"].shape[0])
plot_sample_efficiency_curve(frames,
                             mean_dict["mp_off_dense"],
                             std_dict["mp_off_dense"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(8,7.5),
                            #  xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Path Progress",
                             marker="")

Plot mean and std across runs

ss

In [None]:
frames = arange_frames(mean_dict["ss"]["gru_rec_25"].shape[0])
plot_sample_efficiency_curve(frames,
                             mean_dict["ss"],
                             std_dict["ss"],
                             algorithms=["gru_rec_25", "trxl_rec_25", "gru_25", "trxl_25"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(8,7.5),
                            #  xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Task Progress",
                             marker="")

Plot individual mean

mm_act_grid

In [None]:
frames = arange_frames(mean_dict["mm_act_grid"]["gru"].shape[0])
plot_sample_efficiency_curve(frames,
                             raw_data_dict["mm_act_grid"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(8,7.5),
                            #  xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Commands Executed",
                             marker="",
                             out="mm_individuals.pdf")

Plot individual mean

mm_grid

In [None]:
frames = arange_frames(mean_dict["mm_grid"]["gru"].shape[0])
plot_sample_efficiency_curve(frames,
                             raw_data_dict["mm_grid"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(8,7.5),
                            #  xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Commands Executed",
                             marker="")

Plot individual mean

mm10

In [None]:
frames = arange_frames(mean_dict["mm10"]["gru"].shape[0])
plot_sample_efficiency_curve(frames,
                             raw_data_dict["mm10"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(16.5,3.5),
                             xticks=list(range(0, 501, 50)),
                             yticks=list([0.0, 0.25, 0.5, 0.75, 1.0]),
                             xlabel="Steps (in millions)",
                             ylabel="Task Progress",
                             marker="",
                             out="mm_individuals.pdf")

Plot individual mean

mp_grid_on

In [None]:
frames = arange_frames(mean_dict["mp_grid_on"]["gru"].shape[0])
plot_sample_efficiency_curve(frames,
                             raw_data_dict["mp_grid_on"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(8,7.5),
                            #  xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Success Rate",
                             marker="")

Plot individual mean

mp_grid_off

In [None]:
frames = arange_frames(mean_dict["mp_grid_off"]["gru"].shape[0])
plot_sample_efficiency_curve(frames,
                             raw_data_dict["mp_grid_off"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(8,7.5),
                            #  xticks=list(range(0, 820, 100)),
                             xlabel="Steps (in millions)",
                             ylabel="Success Rate",
                             marker="")

Plot individual mean

mp_off_dense

In [None]:
frames = arange_frames(mean_dict["mp_off_dense"]["trxl"].shape[0])
plot_sample_efficiency_curve(frames,
                             raw_data_dict["mp_off_dense"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(16.5,8),
                             xticks=list(range(0, 420, 50)),
                             yticks=list([0.2, 0.4, 0.6, 0.8, 1.0]),
                             xlabel="Steps (in millions)",
                             ylabel="Task Progress",
                             marker="",
                             out="mp_individuals.pdf")

Plot individual mean

ss

In [None]:
frames = arange_frames(mean_dict["ss"]["gru_rec_25"].shape[0])
plot_sample_efficiency_curve(frames,
                             raw_data_dict["ss"],
                             algorithms=["gru_rec_25", "trxl_rec_25", "gru_25", "trxl_25"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(16.5,3.5),
                             xticks=list(range(0, 420, 50)),
                             yticks=list([0.0, 0.25, 0.5, 0.75, 1.0]),
                             xlabel="Steps (in millions)",
                             ylabel="Task Progress",
                             marker="",
                             out="ss_individuals.pdf")

# Aggregation Plots

In [19]:
# Select data for aggregation
envs = ["mm10", "mp_off_dense", "ss"]
run_ids = ["gru", "trxl", "gru_rec", "trxl_rec"]

selected_data = {}
for env in envs:
    selected_data[env] = {}
    for key in raw_data_dict[env].keys():
        selected_data[env][key] = raw_data_dict[env][key]
try:
    selected_data["ss"].pop("trxl")
except:
    pass
try:
    selected_data["ss"].pop("gru")
except:
    pass
try:
    selected_data["ss"].pop("gru_rec")
except:
    pass
try:
    selected_data["ss"].pop("trxl_rec")
except:
    pass
selected_data["ss"]["gru"] = raw_data_dict["ss"]["gru_25"]
selected_data["ss"]["trxl"] = raw_data_dict["ss"]["trxl_25"]
selected_data["ss"]["gru_rec"] = raw_data_dict["ss"]["gru_rec_25"]
selected_data["ss"]["trxl_rec"] = raw_data_dict["ss"]["trxl_rec_25"]
try:
    selected_data["ss"].pop("trxl_25")
except:
    pass
try:
    selected_data["ss"].pop("gru_25")
except:
    pass
try:
    selected_data["ss"].pop("gru_rec_25")
except:
    pass
try:
    selected_data["ss"].pop("trxl_rec_25")
except:
    pass


In [20]:
# Aggregate, compute mean and std
agg_mean_dict = {}
agg_std_dict = {}
accumulated_data = {run_id: [] for run_id in run_ids}
for env in envs:
    for run_id in run_ids:
        accumulated_data[run_id].append(selected_data[env][run_id][0:51])

for run_id in run_ids:
    stacked_data = np.hstack(accumulated_data[run_id])
    agg_mean_dict[run_id] = stacked_data.mean(axis=1)
    agg_std_dict[run_id] = stacked_data.std(axis=1)

In [None]:
frames = arange_frames(agg_mean_dict["gru_rec"].shape[0])
plot_sample_efficiency_curve(frames,
                             agg_mean_dict,
                             agg_std_dict,
                             algorithms=["gru", "trxl", "gru_rec", "trxl_rec"],
                             colors=COLOR_MAPPING,
                             label_mapping=LABEL_MAPPING,
                             figsize=(16.5,3.5),
                             xticks=list(range(0, 420, 50)),
                             yticks=list([0.0, 0.25, 0.5, 0.75, 1.0]),
                             xlabel="Steps (in millions)",
                             ylabel="Task Progress",
                             marker="",
                             out="aggregation_finite.pdf")