In [None]:
# set up the benchmark properties

importance_base = 0.5 
reward_shape = 'exponential'
exp_reward = 4.6
dim = 2
n_acts = 3

In [None]:
from dacbench.benchmarks import SigmoidBenchmark
from dacbench.benchmarks import PiecewiseLinearBenchmark
import numpy as np
sigmoid_env = SigmoidBenchmark().get_benchmark(dimension=dim)
piecewise_bench = PiecewiseLinearBenchmark()
piecewise_bench.set_action_values([n_acts] * dim, np.array([importance_base**i for i in range(dim)]))
piecewise_env = piecewise_bench.get_environment()

# reset both environments in order to get the benchmarks set up properly
sigmoid_env.reset()
piecewise_env.reset(instance_id=0)

In [None]:
import numpy as np
truth_piecewise = np.full(piecewise_env.n_steps, np.nan)

for i in range(piecewise_env.n_steps):
    truth_piecewise[i] = piecewise_env._get_target(i)

In [None]:
import numpy as np
# setup a arrays reflecting all possible combinations of act_1 and act_2 which are of type int
acts_1 = np.linspace(0, n_acts-1, n_acts)
acts_2 = np.linspace(0, n_acts-1, n_acts)

In [None]:
# compute the resulting predictions for all possible combinations of act_1 and act_2 for the importance sigmoid setting

grid_acts_1, grid_acts_2 = np.meshgrid(np.linspace(0, n_acts-1, n_acts), np.linspace(0, n_acts-1, n_acts))

preds_importance = grid_acts_1 / (n_acts-1) + (grid_acts_2 - (n_acts - 1) / 2) / (n_acts - 1) * importance_base
print(grid_acts_1)
print(grid_acts_2)
print(preds_importance)


In [None]:
print(sigmoid_env._sig(0, np.array(sigmoid_env.slopes), np.array(sigmoid_env.shifts)))
print(sigmoid_env._sig(9, np.array(sigmoid_env.slopes), np.array(sigmoid_env.shifts)))

In [None]:
# iterate over the points in time for the environments and compute the rewards for all possible combinations of act_1 and act_2 and plot them
# as heatmaps

from matplotlib import pyplot as plt
# create a 2 x 10 plot of heatmaps
# 2 rows for the two different environments
# 1 row for the colorbar
# 10 columns for the 10 different points in time

plt.rcParams.update({
    'font.size': 8,           # Global font size
    'axes.titlesize': 7,      # Title size of individual plots
    'axes.labelsize': 7,      # Label size for x and y labels
    'xtick.labelsize': 6.5,      # Size of x-tick labels
    'ytick.labelsize': 6.5,      # Size of y-tick labels
    'legend.fontsize': 7,      # Size of the legend text
    'figure.titlesize': 8,     # Title size of the entire figure
    'lines.linewidth': 0.75
})
width = 5 # in inches, same as width of the latex template

plt_lin_reward = False
plt_candid_sig = False

rows = 2
if plt_lin_reward:
    rows += 1
if plt_candid_sig:
    rows += 1

subsample_steps = False
showtitle = False

time_steps = [i for i in range(sigmoid_env.n_steps)] if not subsample_steps else [0, 4, 9]

fig_size = (width, 1.1 * width / len(time_steps) * rows)

fig, axs = plt.subplots(rows, len(time_steps), figsize=fig_size, sharex='all', sharey='all', dpi=100)

# configure the plot
cmap = 'viridis'
for i, t in enumerate(time_steps):
    # compute the target values for the sigmoid curves
    targets_sigmoid = sigmoid_env._sig(t, np.array(sigmoid_env.slopes), np.array(sigmoid_env.shifts))
    target_piecewise_lin = piecewise_env._get_target(t)

    # the importance sigmoid only predicts on the first target value
    rewards_candid_sig_exp = np.exp(- exp_reward * np.abs(targets_sigmoid[0] - preds_importance))
    rewards_candid_sig_lin = 1 - np.abs(targets_sigmoid[0] - preds_importance)
    rewards_piecewise_lin = np.exp(- exp_reward * np.abs(target_piecewise_lin - preds_importance))

    # for the sigmoid environment each individual action predicts on one of the target values
    rewards_sigmoid_1 = 1 - np.abs(targets_sigmoid[0] - grid_acts_1 / (n_acts-1))
    rewards_sigmoid_2 = 1 - np.abs(targets_sigmoid[1] - grid_acts_2 / (n_acts-1))
    rewards_sigmoid = rewards_sigmoid_1 * rewards_sigmoid_2

    # plot the heatmaps, the first row is for the sigmoid environment, the second row for the importance sigmoid environment
    image = axs[0, i].imshow(rewards_sigmoid, cmap=cmap, interpolation='nearest',
                             vmin=0, vmax=1) # keep the image to add the colorbar later
    if plt_candid_sig:
        axs[1, i].imshow(rewards_candid_sig_exp, cmap=cmap, interpolation='nearest', vmin=0, vmax=1)
    if plt_lin_reward:
        axs[-2, i].imshow(rewards_candid_sig_lin, cmap=cmap, interpolation='nearest', vmin=0, vmax=1)
    axs[-1, i].imshow(rewards_piecewise_lin, cmap=cmap, interpolation='nearest', vmin=0, vmax=1)

# disable the grid and use 3 ticks on the x and y axis
for ax in axs.flatten():
    ax.grid(False)
    ticks = np.linspace(0, n_acts-1, 3)
    ax.set_xticks(ticks)
    ax.set_yticks(ticks)
    ax.tick_params(axis='both', pad=-4)

# set the timestep as title for each column, putting the title in formula style
for t, ax in zip(time_steps, axs[0, :]):
    ax.set_title(f'$t={t}$')

# plt.tight_layout()

offset_column_labels = -1.7
# add label to first row, that it shows the sigmoid environment, not using the y-label
axs[0, 0].text(offset_column_labels, 0.5, f'Sigmoid', transform=axs[0, 0].transAxes, va='center', ha='center')
if plt_candid_sig:
    axs[1, 0].text(offset_column_labels, 0.5, f'CANDID\nSigmoid', transform=axs[1, 0].transAxes, va='center', ha='center')
if plt_lin_reward:
    axs[-2, 0].text(offset_column_labels, 0.5, f'CANDID\nSigmoid\n(lin rew)', transform=axs[-2, 0].transAxes, va='center', ha='center')
axs[-1, 0].text(offset_column_labels, 0.5, f'Piecewise\nLinear', transform=axs[-1, 0].transAxes, va='center', ha='center')

# add the y-label to the first column, and set a bold font
for ax in axs[:, 0]:
    ax.set_ylabel('Action 2', fontdict={'fontweight': 'bold'})
axs[-1, len(time_steps) // 2].set_xlabel('Action 1', fontdict={'fontweight': 'bold'})

if showtitle:
    fig.suptitle(f'Reward surfaces of 2D benchmarks over time steps', fontsize=20, fontweight='bold')

# add a axes right of the last column to add the colorbar, and set the label above the colorbar
if subsample_steps:
    plt.subplots_adjust(top=0.8, hspace=0.05, wspace=0.05)
    cax = fig.add_axes([0.90, 0.15, 0.05, 0.6])
else:
    plt.subplots_adjust(top=0.8, hspace=0.15, wspace=0.05)
    cax = fig.add_axes([0.93, 0.1, 0.025, 0.7])
cbar = fig.colorbar(image, cax=cax)
cbar.set_label('Reward', fontdict={'fontweight': 'bold'}, loc='center')
cbar.ax.tick_params(width=0.5)
cbar.set_ticks([0, 0.5, 1])
fig.savefig('paper_plots/reward_surfaces.png', dpi=600, bbox_inches='tight')