In [None]:
import pandas as pd

# fix the experimental setting to evaluate
dim = 5
n_act = 3  # number of actions per agent, has been fixed to 3 in the experiments
benchmark = 'piecewise_linear'
importance_base = 0.5
reward_shape = 'exponential'
c = 4.6
relevant_approaches = [f'saql_{dim}D', f'sdqn_{dim}D']
metric_to_plot = 'avg_reward_test_set'

project_name = "CANDID_DAC"


# Load the data and the experiment configurations
performance_metrics = pd.read_csv(f'../run_data/{project_name}_metrics.csv')
df_config = pd.read_csv(f'../run_data/{project_name}_configs.csv')

In [None]:
# filter the runs by their importance, filtering the config dataframe suffices becaue we use resulting run_ids
# to filter the performance metrics
print(df_config['reverse_agents'].unique())
print(df_config['run_name'].unique())
df_config = df_config[
    (df_config['dim'] == dim) &
    (df_config['benchmark'] == benchmark) &
    (df_config['reward_shape'] == reward_shape) &
    (df_config['run_name'].isin(relevant_approaches)) & 
    (df_config['n_act'] == n_act) & 
    (df_config['exp_reward'] == c) &
    (df_config['importance_base'] == importance_base)
]

# group all runs that share the run_name and discount_submdp and only keep lists of run_id
df_grouped = df_config.groupby(['run_name', 'reverse_agents']).agg(list)['run_id']
# as control measure group by same attributes and get the seeds, in order
seeds_in_group = df_config.groupby(['run_name', 'reverse_agents']).agg(list)['seed']
seeds_in_group = seeds_in_group.apply(lambda x: sorted(x))
seeds_in_group

In [None]:
# compute the best possible avg episodic reward
from dacbench.benchmarks import SigmoidBenchmark
from plotting_helpers import compute_optimal_episode_reward, get_best_possible_avg_reward
import numpy as np

optim_reward_per_episode_1D = get_best_possible_avg_reward(dim=dim, n_acts=n_act, benchmark=benchmark, reward_shape=reward_shape, c=c,
                                                           importance_base=importance_base, max_dim=1)
optim_reward_per_episode_acc = get_best_possible_avg_reward(dim=dim, n_acts=n_act, benchmark=benchmark, reward_shape=reward_shape, c=c,
                                                            importance_base=importance_base, max_dim=dim)


In [None]:
from matplotlib import pyplot as plt
from plotting_helpers import translate_run_name, METHOD_COLOURS
# plot the best possible reward


# iterate over the groups and plot the performance of each group

# plotting color dict for the different approaches
# color_dict = {'adqn': 'tab:blue', 'fdqn': 'tab:orange', 'fdqn_a': 'tab:green', 'sdqn': 'tab:red'}
plt.rcParams.update({
    'font.size': 8,           # Global font size
    'axes.titlesize': 7,      # Title size of individual plots
    'axes.labelsize': 7,      # Label size for x and y labels
    'xtick.labelsize': 6.5,      # Size of x-tick labels
    'ytick.labelsize': 6.5,      # Size of y-tick labels
    'legend.fontsize': 7,      # Size of the legend text
    'figure.titlesize': 12,     # Title size of the entire figure
    'lines.linewidth': 0.75
})
fig = plt.figure(figsize=(3, 1.6))
plt.plot(np.full(10000, optim_reward_per_episode_acc), color='black', linestyle='--', label='optimal')
plt.plot(np.full(10000, optim_reward_per_episode_1D), color='grey', linestyle='--', label='optimal (1D)')

for (name, reverse_agents), group in df_grouped.items():
    # remove the dimensionality from the run_name by removing the characters after the last underscore
    name = name[:name.rfind('_')]
    color = METHOD_COLOURS[name]
#     dashes = [1, 2] if reverse_agents else [1, 0]
    label = translate_run_name(name)
    label += ' (reversed importances)' if reverse_agents else ''
    relevant_data = performance_metrics[performance_metrics['run_id'].isin(group)]
    relevant_data = relevant_data[['_step', metric_to_plot]].groupby('_step').agg(['mean', 'std'])
    relevant_data.dropna(inplace=True)
    plt.plot(relevant_data.index / 10, relevant_data[metric_to_plot]['mean'], label=label, color=color, linestyle='-' if not reverse_agents else '--')
            #  linewidth=0.8) 
    plt.fill_between(relevant_data.index / 10, relevant_data[metric_to_plot]['mean'] - relevant_data[metric_to_plot]['std'],
                     relevant_data[metric_to_plot]['mean'] + relevant_data[metric_to_plot]['std'], alpha=0.4, facecolor=color)

max_epoch = performance_metrics['_step'].max() / 10

plt.xlim(left=0, right=10000)
plt.ylim(bottom=0, top=10)
plt.axis('on')

plt.xticks(np.linspace(0, 10000, 5))
plt.yticks(np.linspace(0, 10, 5))

# plt.title(f'Exponential importance sigmoid ($c = {c}$)')
plt.xlabel('Episodes')
plt.ylabel('Avg. episodic reward')

# plt.legend()
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

# plt.suptitle(f'Sequential policies on {dim}D CANDID Sigmoid benchmark', fontweight='bold')

# place plot a bit further from the title
plt.subplots_adjust(top=0.85)
plt.savefig(f'./paper_plots/pl_reversed_importances.png', dpi=600, bbox_inches='tight')