In [None]:
import pandas as pd

# select project and run from which to plot the agent
project_name = 'CANDID_DAC'
run_id = 'fe9iwm4u' # SDQN example
run_id = 'wtr6qyl1' # IQL example
run_id = 'hsytlcw1' # SAQL example
# run_id = 'pflbflhn' # SAQL 2D
n_act = 3

ckpts_path = f'../../results/models/{project_name}/{run_id}'
config_path = f'../run_data/{project_name}_configs.csv'

config_df = pd.read_csv(config_path).set_index('run_id')
# print(config_df)
# config_df = config_df[config_df['run_id'] == run_id]

config = config_df.loc[run_id]
print(config.to_dict())

In [None]:
import warnings
warnings.filterwarnings("ignore")
from plotting_helpers import load_policy_from_checkpoint
from dacbench.benchmarks import SigmoidBenchmark, PiecewiseLinearBenchmark
import numpy as np

# get the environment to plot on

importances = np.array([config.importance_base**i for i in range(config.dim)])

if config.benchmark in ['candid_sigmoid', 'piecewise_linear']:
    reward_shape = 'exponential' if config.reward_shape in ['exp', 'exponential'] else 'linear'
    dim = config.dim

    if config.benchmark == 'candid_sigmoid':
        env = SigmoidBenchmark().get_importances_benchmark(dimension=dim, importances=importances, reward_shape=reward_shape)
    else:
        benchmark = PiecewiseLinearBenchmark()
        benchmark.set_action_values([n_act for _ in range(dim)], importances)
        env = benchmark.get_environment()

policy = load_policy_from_checkpoint(config.to_dict(), env=env, ckpt_directory=ckpts_path, episode=10000, final=True)

In [None]:
# run the policy on several instances of the importance benchmark and plot the results
import numpy as np
from candid_dac.policies import AtomicPolicy
import torch

truth_on_instances = []
actions_on_instances = []
obtained_rewards = []
# instances_ids = [0, 50, 100, 150, 200, 299]
instances_ids = np.linspace(0, 299, 6, endpoint=True, dtype=int)

if config.benchmark == 'piecewise_linear':
    pass
else:
    dim = config.dim
    actions = np.zeros((dim, env.n_steps))
    truth = np.zeros((dim, env.n_steps))

    # inst_id = 15

    for i, inst_id in enumerate(instances_ids):
        # env.use_next_instance(instance_id=inst_id)
        obs, _ = env.reset(instance_id=inst_id)
        obtained_reward = 0
        # if not importance_sigmoid:
        # for i in range(inst_id):
        #     obs, _ = env.reset()

        for t in range(env.n_steps):
            if isinstance(policy, AtomicPolicy):
                action = policy(torch.tensor(obs))
                action = np.unravel_index(action, env.action_space.nvec)
            else:
                action = policy.get_action(obs)
            obs, reward, terminated, truncated, _ = env.step(action)
            obtained_reward += reward
            actions[:, t] = action

        actions_on_instances.append(actions.copy())
        obtained_rewards.append(obtained_reward)

        points_in_time = np.linspace(0, env.n_steps - 1, 100, endpoint=True)
        truth_on_instances.append(env._sig(points_in_time, env.slopes[0], env.shifts[0]))

print(len(truth_on_instances))
print(len(actions_on_instances))


**Plot agent predictions on several benchmarks**

In [None]:
# now plot the results
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import matplotlib.cm as cm
from plotting_helpers import translate_run_name
print(env.inst_id)
# plot truth and actions on each dimension in a separate plot
rows = len(instances_ids) // 3
plt.rcParams.update({
    'font.size': 8,           # Global font size
    'axes.titlesize': 7,      # Title size of individual plots
    'axes.labelsize': 7,      # Label size for x and y labels
    'xtick.labelsize': 6.5,      # Size of x-tick labels
    'ytick.labelsize': 6.5,      # Size of y-tick labels
    'legend.fontsize': 7,      # Size of the legend text
    'figure.titlesize': 12,     # Title size of the entire figure
    'lines.linewidth': 0.75,
    'lines.markersize': 3,
})

width = 6 # latex textwidth
fig, axs = plt.subplots(rows, 3, figsize=(width, width/2), sharex=True, sharey=True, dpi=100)

algorithm = translate_run_name(run_name=config.algorithm)

# aggregation

# save the episode actions
all_episode_actions = []
env.use_test_set()
for inst_id, ax in zip(instances_ids, axs.flatten()):
    obs, _ = env.reset(instance_id=inst_id)
    for _ in range(env.n_steps):
        action = policy.get_action(obs)
        obs, reward, terminated, truncated, _ = env.step(action.numpy())
    ax.set_title(f'Instance {inst_id}', pad=1)
    env.plot_predictions(ax)
    all_episode_actions.append(env.episode_actions)
    ax.set_xticks(np.linspace(0, env.n_steps-1, 4))
    ax.set_yticks(np.linspace(0, 1, 6))
    ax.tick_params(axis='x', pad=-5)

colors = plt.cm.viridis_r(np.linspace(0, 1, dim-1))
# create a customized legend with a colorbar below it
custom_lines = [Line2D([0], [0], color="tab:orange", linestyle="-", label="target"),
                Line2D([0], [0], linestyle="None", marker="o", color="gray", label="partially aggr. prediction"),
                Line2D([0], [0], color=colors[-1], linestyle=":", label="final prediction"),]
legend = axs[0, -1].legend(loc='center left', bbox_to_anchor=(1, -0.2), handles=custom_lines)
# Get the bounding box of the legend
legend_box = legend.get_window_extent()
# Convert the bounding box from display units to figure units
legend_box_fig = legend_box.transformed(fig.transFigure.inverted())
# Create new axes for the colorbar below the legend
cbar_ax = fig.add_axes([legend_box_fig.x0, legend_box_fig.y0 - 0.1, legend_box_fig.width, 0.05])
# Create color bar with manually specified ticks
cbar = fig.colorbar(cm.ScalarMappable(cmap=cm.viridis_r, norm=plt.Normalize(vmin=dim-1, vmax=0)),
                    cax=cbar_ax, ticks=np.arange(dim), orientation='horizontal')
cbar.ax.tick_params(length=0)  # Remove ticks
cbar.set_label('aggregated dimensions')


for ax in axs[:, 0]:
    ax.set_ylabel('prediction value')
for ax in axs[rows-1, :]:
    ax.set_xlabel('$t$')
# place the legend to the right of the last plot
algorithm = translate_run_name(run_name=config.algorithm)
# adapt horizontal spacing between subplots
plt.subplots_adjust(wspace=0.1, hspace=0.15)
fig.savefig(f'paper_plots/{algorithm.lower()}_on_{dim}D_{config.benchmark}.png', bbox_inches='tight', dpi=600)
# fig.suptitle(f"{algorithm} on {dim}D {'CANDID Sigmoid' if config.benchmark == 'candid_sigmoid' else 'Piecewise Linear'}", fontweight='bold')
# plt.show()

**Plot the substeps of action selection**

In [None]:
plt.rcParams.update({
    'font.size': 10,           # Global font size
    'axes.titlesize': 9,      # Title size of individual plots
    'axes.labelsize': 9,      # Label size for x and y labels
    'xtick.labelsize': 7,      # Size of x-tick labels
    'ytick.labelsize': 7,      # Size of y-tick labels
    'legend.fontsize': 8,      # Size of the legend text
    'figure.titlesize': 12,     # Title size of the entire figure
    'lines.linewidth': 1.5,
    'lines.markersize': 4,
})

fig, ax = plt.subplots(1, 1, figsize=(5, 4))
env.render_action_selection(t=5, ax=ax)
print(env.inst_id)
ax.set_ylabel('prediction value')
fig.savefig(f'paper_plots/{algorithm.lower()}_action_selection_on_{dim}D_{config.benchmark}.png', bbox_inches='tight', dpi=600)
# move the legend to the lower right corner

In [None]:
# export the data to csv
# import numpy as np
# # store the episode actions to csvs
# for inst_id, episode_actions in zip(instances_ids, all_episode_actions):
#     np.savetxt(f'./{project_name}_episode_actions_inst_{inst_id}.csv', episode_actions, delimiter=',')