In [None]:
import warnings
warnings.filterwarnings(category=UserWarning, action="ignore", module="dacbench")
warnings.filterwarnings(category=FutureWarning, action="ignore", module="dacbench")
import pandas as pd

project_name = "CANDID_DAC"
metrics_path = f"../run_data/{project_name}_metrics.csv"
configs_path = f"../run_data/{project_name}_configs.csv"
benchmark = "piecewise_linear"

config_id = "best"

# set up the experiment we want to plot
reward_shape = 'exponential'
exp_reward = 4.6
importance_bases = [0.3, 0.5, 0.7]
dim = 5
n_act = 3
metric_to_plot = "avg_reward_test_set"

df_configs = pd.read_csv(configs_path)
df_metrics = pd.read_csv(metrics_path)

# already filter the data frames to only contain the data for the relevant config_ids
if config_id == "best":
    # only keep rows where config_id contains "best"
    df_configs = df_configs[df_configs["config_id"].str.contains("best")]
else:
    # otherwise the config_id is the one we want to plot
    df_configs = df_configs[df_configs["config_id"] == config_id]

**Filter the config dataframe to only contain experiments that are of interest to us**

In [None]:
# filter the data frames to only contain the data for the relevant config_ids
if config_id == "best":
    # only keep rows where config_id contains "best"
    df_configs = df_configs[df_configs["config_id"].str.contains("best")]
else:
    # otherwise the config_id is the one we want to plot
    df_configs = df_configs[df_configs["config_id"] == config_id]

# print(df_configs.head())

# get the configs per benchmark dimensionality
configs = df_configs[(df_configs['benchmark'] == benchmark) &
                     (df_configs['reward_shape'] == reward_shape) &
                     (df_configs['exp_reward'] == exp_reward) &
                     (df_configs['importance_base'].isin(importance_bases)) &
                     (df_configs['dim'] == dim) & 
                     (df_configs['reverse_agents'] == False) &
                     (df_configs['n_act'] == n_act)]

In [None]:
grouped_configs = configs.groupby(['run_name', 'importance_base']).agg(list)['run_id']
# aggregate the seeds in the groups as list to control that all seeds are available for every setup
pd.set_option('display.max_colwidth', None)
seeds_in_group = configs.groupby(['run_name', 'importance_base']).agg(list)['seed']
# order the seeds in seeds_in_group
seeds_in_group = seeds_in_group.apply(lambda x: sorted(x))
print(seeds_in_group)

In [None]:
from matplotlib import pyplot as plt
import numpy as np
from plotting_helpers import get_best_possible_avg_reward, translate_run_name, METHOD_COLOURS

plt.rcParams.update({
    'font.size': 8,           # Global font size
    'axes.titlesize': 7,      # Title size of individual plots
    'axes.labelsize': 7,      # Label size for x and y labels
    'xtick.labelsize': 6.5,      # Size of x-tick labels
    'ytick.labelsize': 6.5,      # Size of y-tick labels
    'legend.fontsize': 7,      # Size of the legend text
    'figure.titlesize': 12,     # Title size of the entire figure
    'lines.linewidth': 0.75
})
height = 1.2
width = 5*height
fig, axes = plt.subplots(1, len(importance_bases), figsize=(width, height), sharey=True)

for i, importance_base in enumerate(importance_bases):
    ax = axes[i]

    # plot the best possible reward
    optimal_reward_1D = get_best_possible_avg_reward(dim, benchmark=benchmark,
                                                     reward_shape=reward_shape, c=exp_reward, importance_base=importance_base, max_dim=1)
    optimal_reward_acc = get_best_possible_avg_reward(dim, benchmark=benchmark,
                                                     reward_shape=reward_shape, c=exp_reward, importance_base=importance_base, max_dim=dim)

    print(f'Optimal reward for {dim}D: {optimal_reward_acc} (acc), {optimal_reward_1D} (1D)')

    ax.plot([0, 10000], [optimal_reward_acc, optimal_reward_acc], label='optimal', color='black', linestyle='--')
    ax.plot([0, 10000], [optimal_reward_1D, optimal_reward_1D], label='optimal (1D)', color='grey', linestyle='--')

    # only keep the runs for this dimensionality
    importance_configs = grouped_configs.xs(importance_base, level='importance_base')

    # now plot rewards per approach, which is encoded by run_name
    for run_name, run_ids in importance_configs.items():
        run_name = run_name.rsplit('_', 1)[0]
        label = translate_run_name(run_name)
        color = METHOD_COLOURS[run_name]

        # get the rewards for this run
        # remove all rows in the metrics, where the metric to plot is NaN
        rewards = df_metrics[df_metrics[metric_to_plot].notna()]
        rewards = rewards.loc[df_metrics['run_id'].isin(run_ids)].groupby('_step')[metric_to_plot].agg(['mean', 'std'])
        
        # plot the mean and std
        ax.fill_between(rewards.index / 10, rewards['mean'] - rewards['std'], rewards['mean'] + rewards['std'], alpha=0.5, color=color)
        ax.plot(rewards.index / 10, rewards['mean'], label=label, color=color)

        # make the plot square, regardless of scaling difference in x and y
        ax.set_ylim(2, 10.05)
        ax.set_xlim(0, 10000)
        ax.set_xticks(np.linspace(0, 10000, 5))
        ax.set_yticks(np.linspace(0, 10, 5))

        ax.set_title(f'Importance decay $\lambda$={importance_base}')
        ax.set_xlabel('Episodes')

# set the same y-axis label for all plots
y_label = 'Avg. episodic reward'
# if metric_to_plot == 'avg_reward_test_set':
#     y_label += ' (test set)'
# elif metric_to_plot == 'avg_reward_train_set':
#     y_label += ' (train set)'
# elif metric_to_plot == 'avg_episodic_reward':
#     y_label += ' (training)'
axes[0].set_ylabel(y_label)
# set a shared legend, right to the rightmost plot
axes[-1].legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=1)

# reduce space between plots and title
plt.subplots_adjust(wspace=0.15)
title = f'{dim}D {"Piecewise Linear" if benchmark == "piecewise_linear" else "CANDID Sigmoid"}'
# fig.suptitle(title, fontweight='bold')
fig.savefig(f'./paper_plots/pl_importance_decays.png', dpi=600, bbox_inches='tight')
# plt.tight_layout()