In [None]:
import pandas as pd

project_name = "CANDID_DAC"
# set up the experiment we want to plot
benchmark = "piecewise_linear"
reward_shape = 'exponential'
exp_reward = 4.6
importance_base = 0.5
n_acts = [3, 5, 10]
dim = 5

metric_to_plot = "avg_reward_train_set"
# metric_to_plot = "avg_episodic_reward"
config_path = f"../run_data/{project_name}_configs.csv"
metrics_path = f"../run_data/{project_name}_metrics.csv"

df_configs = pd.read_csv(config_path)
df_train = pd.read_csv(metrics_path)

In [None]:
# get the configs per benchmark dimensionality
configs = df_configs[(df_configs['benchmark'] == benchmark) &
                     (df_configs['reward_shape'] == reward_shape) &
                     (df_configs['exp_reward'] == exp_reward) &
                     (df_configs['importance_base'] == importance_base) &
                     (df_configs['n_act'].isin(n_acts)) &
                     (df_configs['dim'] == dim) &
                     (df_configs['reverse_agents'] == False)]

print(configs['n_act'].unique())

grouped_configs = configs.groupby(['run_name', 'n_act']).agg(list)['run_id']
print(len(grouped_configs))
# as control measure group by same attributes and get the seeds, in order
seeds_in_group = configs.groupby(['run_name', 'n_act']).agg(['count', list])['seed']
seeds_in_group = seeds_in_group.apply(lambda x: sorted(x))
# seeds_in_group
grouped_configs.apply(lambda x: len(x))

In [None]:
cp_configs = df_configs[(df_configs['name'] == 'ddqn') & (df_configs['dim'] == 5) & (df_configs['n_act'] == 10)]

In [None]:
cp_configs

In [None]:
from matplotlib import pyplot as plt
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
from plotting_helpers import get_best_possible_avg_reward, translate_run_name, METHOD_COLOURS

# width = 12
# fig, axes = plt.subplots(1, len(n_acts), figsize=(width, width / len(n_acts)), sharey=True)

plt.rcParams.update({
    'font.size': 8,           # Global font size
    'axes.titlesize': 7,      # Title size of individual plots
    'axes.labelsize': 6,      # Label size for x and y labels
    'xtick.labelsize': 6,      # Size of x-tick labels
    'ytick.labelsize': 6,      # Size of y-tick labels
    'legend.fontsize': 7,      # Size of the legend text
    'figure.titlesize': 12,     # Title size of the entire figure
    'lines.linewidth': 0.5
})
width = 6
dpi=100  # set to 100 to get in size of paper not zoomed in
fig, axes = plt.subplots(1, len(n_acts), figsize=(width, width / 5), sharey=True, dpi=dpi, )

for i, n_act in enumerate(n_acts):
    ax = axes[i]

    # plot the best possible reward
    optimal_reward_1D = get_best_possible_avg_reward(dim=dim, benchmark=benchmark, reward_shape=reward_shape, c=exp_reward,
                                                     importance_base=importance_base, max_dim=1, n_acts=n_act)
    optimal_reward_acc = get_best_possible_avg_reward(dim=dim, benchmark=benchmark, reward_shape=reward_shape, c=exp_reward,
                                                      importance_base=importance_base, max_dim=dim, n_acts=n_act)

    ax.plot([0, 10000], [optimal_reward_acc, optimal_reward_acc], label='optimal', color='black', linestyle='--')
    ax.plot([0, 10000], [optimal_reward_1D, optimal_reward_1D], label='optimal (1D)', color='grey', linestyle='--')

    # only keep the runs for this dimensionality
    dim_configs = grouped_configs.xs(n_act, level='n_act')

    # now plot rewards per approach, which is encoded by run_name
    for run_name, run_ids in dim_configs.items():
        run_name = run_name.rsplit('_', 1)[0]
        label = translate_run_name(run_name)
        color = METHOD_COLOURS[run_name]

        # get the rewards for this run, drop all undefined time steps
        rewards = df_train.loc[df_train['run_id'].isin(run_ids)].groupby('_step')[metric_to_plot].agg(['mean', 'std'])
        rewards = rewards.dropna()
        
        # plot the mean and std
        ax.fill_between(rewards.index / 10, rewards['mean'] - rewards['std'], rewards['mean'] + rewards['std'], alpha=0.5, color=color)
        ax.plot(rewards.index / 10, rewards['mean'], label=label, color=color)

        # make the plot square, regardless of scaling difference in x and y
        ax.set_ylim(0, 10.07)
        ax.set_xlim(0, 10000)
        ax.tick_params(axis='x', pad=-5)

        title = f'n_act = {n_act}'
        ax.set_title(title, y=0.94)
        ax.set_xlabel('Episodes')

# set the same y-axis label for all plots
y_label = 'Avg. episodic reward'
axes[0].set_ylabel(y_label)
# set a shared legend, centered under the plots, considering that legends is identical for all plots
axes[-1].legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=1)
# axes[-1].legend(['mean, std'], loc='center left', bbox_to_anchor=(1, 0.5), ncol=1)

fig.subplots_adjust(wspace=0.15,)
fig.savefig(f'./paper_plots/n_acts_scaling.png', bbox_inches='tight', dpi=400)