In [None]:
# Configure plotting
%matplotlib inline

import matplotlib
from matplotlib import pyplot as plt
matplotlib.rcParams['figure.figsize'] = (10, 4)
matplotlib.rcParams['text.usetex'] = True
matplotlib.rcParams['text.latex.preamble'] = r'\usepackage{amsmath}'
matplotlib.rcParams['font.size'] = 16

import os
import sys
import pickle
from pathlib import Path
from datetime import datetime
from itertools import product

import GPy
import numpy as np
RANDOM_SEED=123
np.random.seed(RANDOM_SEED)

import seaborn as sns

from utils.model import GPyModel

from utils.utils import discretized_normal_distribution, sample_index_from_p
from utils.utils import w_x_t, kappa_x_w, MMD, worst_context_distribution_DRO

In [None]:
# Domain definition

# Lower and upper limits
ins_lim = [0, 30]
carb_lim = [20, 80]
fbg_lim = [100, 150]

N_ins = ins_lim[1] - ins_lim[0] + 1  # Size of the action (insulin) set, we use integer values
N_carb = 20  # Size of the context (carbohydrate) set
N_fbg = 1  # Size of the fasting blood glucose set, we use a fixed value

ins = np.linspace(ins_lim[0], ins_lim[1], N_ins)  # Vector of ins values
carb = np.linspace(carb_lim[0], carb_lim[1], N_carb)  # Vector of carb values
fbg = np.array([100])  # We fix the fasting blood glucose level to 100

InsCarbFbg_pairs = np.array(list(product(ins, carb, fbg))).reshape(N_ins, N_carb, N_fbg, -1)

In [None]:
obj_lengthscale = 10  # Lengthscale for M and GP kernel

# Generation of matrix M for MMD
Carb_kern = GPy.kern.Matern52(input_dim=1, lengthscale=obj_lengthscale)
M = Carb_kern.K(carb.reshape(N_carb, 1))

# plt.imshow(M, interpolation='bicubic')
# plt.colorbar()

In [None]:
def create_w_true_and_ref(t=None):
    """Generates and returns true (w_true) and reference (w_ref) distributions according to the
    experiment setup. If t is not None, distributional shifts converge to 0 with rate 1/log(t)."""

    w_true = np.zeros((N_carb, 1))
    w_true_ind = np.random.randint(N_carb)
    
    w_ref_ind_offset = (np.random.randint(13) - 6)  # Drift RV is U(-6, 6)
    if t is not None:
        w_ref_ind_offset = int(np.round(w_ref_ind_offset / np.log2(t)))
    w_ref_ind = np.clip(w_true_ind + w_ref_ind_offset, a_min=0, a_max=N_carb-1)
    
    w_ref = discretized_normal_distribution(carb, carb[w_ref_ind], (1.5)**2)
    w_true = discretized_normal_distribution(carb, carb[w_true_ind], (3)**2)

    return w_true, w_ref

In [None]:
def run_simglucose_simulation(meal, fasting_bg, insulin):
    """
    :param meal: float, amount of glucose intake
    :param fasting_bg: float, fasting blood glucose of the patient
    :param insulin: float, amount of insulin intake

    Simulation results precomputed due to license restrictions on simulator usage.
    """
    
    result_dict_path = 'functions/simglucose_outputs.pkl'
    if os.path.exists(result_dict_path):
        with open(result_dict_path, 'rb') as f:
            old_vals = pickle.load(f)
    else:
        old_vals = {}

    old_bgl = old_vals.get((fasting_bg, meal, insulin))
    if old_bgl is not None:
        return old_bgl

    raise RuntimeError("Given simulation configuration is not precomputed.")

    # new_bgl = simpatient("adolescent#001", fasting_bg, meal, insulin, 0, 150)['bgl']
    # old_vals[(fasting_bg, meal, insulin)] = new_bgl
    # with open(result_dict_path, 'rb+') as f:
    #     pickle.dump(old_vals, f)
    
    # return new_bgl


def get_observation(meal, fasting_bg, insulin, noise_std=0):
    """Given a simulation configuration, returns the noisy observation."""
    bgl = run_simglucose_simulation(meal, fasting_bg, insulin)

    bgl = bgl + np.random.randn(*bgl.shape) * noise_std

    # Our pseudo-objective
    return -abs(bgl - 112.5)

print(run_simglucose_simulation(0, 80, 0))

In [None]:
# Create objective function as an array. InsCarb_mat corresponds to f.
InsCarb_mat = np.empty((N_carb, N_ins))
for i in range(N_carb):
    for j in range(N_ins):
        InsCarb_mat[i, j] = get_observation(carb[i], fbg[0], ins[j])

In [None]:
def BO_loop_DRO(model, distributions, contexts, epsilon_coef, beta, T):
    InsCarbFbg_t_indices = []
    for t in range(T):
        # True and reference distributions
        w_true, w_ref = distributions[t]

        # The variable epsilon here is the radius of the ambiguity set of DRBO
        # true distance multiplied with a coefficient, e.g. 1/3
        epsilon = MMD(M, w_true, w_ref) * epsilon_coef
        print("Epsilon:", epsilon)

        # Fasting blood glucose is fixed.
        fbg_index = contexts[t, 1]

        # Modelling
        mu, var = model.predict(InsCarbFbg_pairs[:, :, fbg_index, :].reshape(-1, model.input_dim))
        InsCarb_pair_UCB = mu + beta*np.sqrt(var)
        InsCarb_mat_UCB = InsCarb_pair_UCB.reshape(N_ins, N_carb)

        # For each action(insulin), calculate worst expected reward in the ambiguity set
        worst_expected_reward_x = np.zeros(N_ins)
        for ins_ind in range(N_ins):
            UCB_ins = InsCarb_mat_UCB[ins_ind]
            worst_w_x = worst_context_distribution_DRO(M, w_ref, UCB_ins, epsilon)
            worst_expected_reward_x[ins_ind] = np.dot(worst_w_x.T, UCB_ins)
        
        # Choose maximum of these worst expected rewards.
        chosen_action_index = np.argmax(worst_expected_reward_x)
        # Context is observed from precomputed context vector.
        observed_carb_index = contexts[t, 0]
        # Add selected action and observed context indices to the array to be returned
        InsCarbFbg_t_indices.append([chosen_action_index, observed_carb_index, fbg_index])
        
        # Add new sample to model and update the model
        InsCarbFbg_sample = InsCarbFbg_pairs[chosen_action_index, observed_carb_index, fbg_index].reshape(1, -1)
        insulin_ts, carb_ts, fbg_ts = InsCarbFbg_sample[0]
        Y_sample_n = np.array(
            get_observation(carb_ts, fbg_ts, insulin_ts, noise_std=model.noise_std)
        ).reshape(1, -1)
        model.add_sample(InsCarbFbg_sample, Y_sample_n)
        model.update()
        
        print(f"Chosen point - insulin: {insulin_ts}, carb.: {carb_ts}, fbg: {fbg_ts}")
        print("Blood glucose(noisy) discrepancy:", Y_sample_n[0][0])

    return InsCarbFbg_t_indices

In [None]:
def BO_loop_WRO(model, distributions, contexts, beta, T):
    InsCarbFbg_t_indices = []
    for t in range(T):
        # Fasting blood glucose is fixed.
        fbg_index = contexts[t, 1]

        # Modelling
        mu, var = model.predict(InsCarbFbg_pairs[:, :, fbg_index, :].reshape(-1, model.input_dim))
        InsCarb_pair_UCB = mu + beta*np.sqrt(var)
        InsCarb_mat_UCB = InsCarb_pair_UCB.reshape(N_ins, N_carb)

        # Choose the action that maximizes the minimum over context.
        chosen_action_index = np.argmax(np.min(InsCarb_mat_UCB, axis=1))
        # Context is observed from precomputed context vector.
        observed_carb_index = contexts[t, 0]
        # Add selected action and observed context indices to the array to be returned
        InsCarbFbg_t_indices.append([chosen_action_index, observed_carb_index, fbg_index])

        # Add new sample to model and update the model
        InsCarbFbg_sample = InsCarbFbg_pairs[chosen_action_index, observed_carb_index, fbg_index].reshape(1, -1)
        insulin_ts, carb_ts, fbg_ts = InsCarbFbg_sample[0]
        Y_sample_n = np.array(
            get_observation(carb_ts, fbg_ts, insulin_ts, noise_std=model.noise_std)
        ).reshape(1, -1)
        model.add_sample(InsCarbFbg_sample, Y_sample_n)
        model.update()

        print(f"Chosen point - insulin: {insulin_ts}, carb.: {carb_ts}, fbg: {fbg_ts}")
        print("Blood glucose(noisy) discrepancy:", Y_sample_n[0][0])

    return InsCarbFbg_t_indices

In [None]:
def BO_loop_SO(model, distributions, contexts, beta, T):
    InsCarbFbg_t_indices = []
    for t in range(T):
        # True and reference distributions
        w_true, w_ref = distributions[t]

        # Fasting blood glucose is fixed.
        fbg_index = contexts[t, 1]

        # Modelling
        mu, var = model.predict(InsCarbFbg_pairs[:, :, fbg_index, :].reshape(-1, model.input_dim))
        InsCarb_pair_UCB = mu + beta*np.sqrt(var)
        InsCarb_mat_UCB = InsCarb_pair_UCB.reshape(N_ins, N_carb)

        # Choose the action that maximizes the expected reward over reference distribution.
        chosen_action_index = np.argmax(InsCarb_mat_UCB @ w_ref)
        # Context is observed from precomputed context vector.
        observed_carb_index = contexts[t, 0]
        # Add selected action and observed context indices to the array to be returned
        InsCarbFbg_t_indices.append([chosen_action_index, observed_carb_index, fbg_index])

        # Add new sample to model and update the model
        InsCarbFbg_sample = InsCarbFbg_pairs[chosen_action_index, observed_carb_index, fbg_index].reshape(1, -1)
        insulin_ts, carb_ts, fbg_ts = InsCarbFbg_sample[0]
        Y_sample_n = np.array(
            get_observation(carb_ts, fbg_ts, insulin_ts, noise_std=model.noise_std)
        ).reshape(1, -1)
        model.add_sample(InsCarbFbg_sample, Y_sample_n)
        model.update()
        
        print(f"Chosen point - insulin: {insulin_ts}, carb.: {carb_ts}, fbg: {fbg_ts}")
        print("Blood glucose(noisy) discrepancy:", Y_sample_n[0][0])

    return InsCarbFbg_t_indices

In [None]:
def BO_loop_RS(model, distributions, contexts, beta, T, taus):    
    kappas = np.zeros(T)
    InsCarbFbg_t_indices = []
    for t in range(T):
        # True and reference distributions
        w_true, w_ref = distributions[t]
        w_true = w_true.reshape(N_carb, 1)
        w_ref = w_ref.reshape(N_carb, 1)
        
        tau = taus[t]

        # Fasting blood glucose is fixed.
        fbg_index = contexts[t, 1]
            
        # Modelling
        mu, var = model.predict(InsCarbFbg_pairs[:, :, fbg_index, :].reshape(-1, model.input_dim))
        InsCarb_pair_UCB = mu + beta*np.sqrt(var)
        InsCarb_mat_UCB = InsCarb_pair_UCB.reshape(N_ins, N_carb)

        # For each action(insulin), calculate kappa_hat_tau,t and kappa_tau,t
        list_kappa_x = np.zeros(N_ins)
        list_kappa_hat_x = np.zeros(N_ins)
        for ins_ind in range(N_ins):
            UCB_ins = InsCarb_mat_UCB[ins_ind]
            w_bar_x_t = w_x_t(M, w_ref, UCB_ins, tau)
            kappa_hat_x = kappa_x_w(M, w_ref, w_bar_x_t, UCB_ins, tau, clip=False)
            list_kappa_hat_x[ins_ind] = kappa_hat_x
            
            f_x = InsCarb_mat[:, ins_ind].reshape(-1, 1)
            w_dbar_x_t = w_x_t(M, w_ref, f_x, tau)
            kappa_x = kappa_x_w(M, w_ref, w_dbar_x_t, f_x, tau, clip=False)
            list_kappa_x[ins_ind] = kappa_x

        # Choose action with minimum kappa_hat.
        chosen_action_index = np.argmin(list_kappa_hat_x)
        # Context is observed from precomputed context vector.
        observed_carb_index = contexts[t, 0]
        # Add selected action and observed context indices to the array to be returned
        InsCarbFbg_t_indices.append([chosen_action_index, observed_carb_index, fbg_index])
        
        # Add new sample to model and update the model
        InsCarbFbg_sample = InsCarbFbg_pairs[chosen_action_index, observed_carb_index, fbg_index].reshape(1, -1)
        insulin_ts, carb_ts, fbg_ts = InsCarbFbg_sample[0]
        Y_sample_n = np.array(
            get_observation(carb_ts, fbg_ts, insulin_ts, noise_std=model.noise_std)
        ).reshape(1, -1)
        model.add_sample(InsCarbFbg_sample, Y_sample_n)
        model.update()
        
        print(f"Chosen point - insulin: {insulin_ts}, carb.: {carb_ts}, fbg: {fbg_ts}", " - Kappa:", np.min(list_kappa_hat_x))
        print("Blood glucose(noisy) discrepancy:", Y_sample_n[0][0])

        # Save minimum kappa for regret calculation.
        min_kappa = np.min(list_kappa_x)
        kappas[t] = min_kappa

    return InsCarbFbg_t_indices, kappas

In [None]:
# Model simulation parameters: simulation count, timestep count and dro epsilons

# Reset the randomness
np.random.seed(RANDOM_SEED)

sim_count = 50
timestep_count = 200 + 1  # +1 for undersampled plotting (last tick in the plot)
initial_sample_cnt = 10

# Different ball radius coefficients for DRO
DRO_epsilons = [1/3, 1, 3]
RS_aspirations = [0.9]  # Currently not used

# Time independent tau value
fixed_tau = -10
assert fixed_tau is None or (fixed_tau is not None and len(RS_aspirations)==1)

# Modelling parameters
input_dim = 3
output_dim = 1
noise_std = 1
noise_var = np.square(noise_std)
beta = 2

ker = GPy.kern.Matern52(input_dim=input_dim, ARD=True, lengthscale=obj_lengthscale)

CONVERGE_REF_TO_TRUE = False

In [None]:
# Model simulations

# Initial samples for each simulation
initial_samples_sims = np.empty((sim_count, initial_sample_cnt, input_dim+output_dim))
# Time dependent true and reference distributions for each simulation
distributions_w_true_and_ref_sims = np.empty((sim_count, timestep_count, 2, N_carb))
# Chosen (action, context) pairs for each simulation of each DRO epsilon
indices_DRO_sims_eps = np.empty((len(DRO_epsilons), sim_count, timestep_count, 3))
# Chosen (action, context) pairs for each simulation of WRO
indices_WRO_sims = np.empty((1, sim_count, timestep_count, 3))
# Chosen (action, context) pairs for each simulation of SO
indices_SO_sims = np.empty((1, sim_count, timestep_count, 3))
# Chosen (action, context) pairs for each simulation of RS
indices_RS_sims_asp = np.empty((len(RS_aspirations), sim_count, timestep_count, 3))
# Kappa prime values for each simulation and timestep, we calculate these in RS loop
kappas_sims_asp = np.empty((len(RS_aspirations), sim_count, timestep_count))
# Tau values for each simulation and timestep, since we use a fixed tau this is constant
taus_sims_asp = np.empty((len(RS_aspirations), sim_count, timestep_count))
if fixed_tau is not None:
    taus_sims_asp[:] = fixed_tau


for sim in range(sim_count):
    print(f"\n\n------------------------ SIMULATION NUMBER: {sim:02d} ------------------------\n\n")

    # Randomly select initial samples
    initial_ins_inds = np.random.randint(low=0, high=N_ins, size=initial_sample_cnt)
    initial_carb_inds = np.random.randint(low=0, high=N_carb, size=initial_sample_cnt)
    initial_fbgs_inds = np.full(initial_sample_cnt, 0)
    initial_indices = np.stack([initial_ins_inds, initial_carb_inds, initial_fbgs_inds], axis=1)

    initial_InsCarbFbg = np.empty((initial_sample_cnt, 3))
    initial_Y = np.empty((initial_sample_cnt, 1))
    for i in range(initial_sample_cnt):
        initial_InsCarbFbg[i] = InsCarbFbg_pairs[
            initial_indices[i][0], initial_indices[i][1], initial_indices[i][2]
        ]
        insulin_tmp, carb_tmp, fbg_tmp = initial_InsCarbFbg[i]
        initial_Y[i, 0] = get_observation(carb_tmp, fbg_tmp, insulin_tmp)

    initial_samples_sims[sim] = np.hstack((initial_InsCarbFbg, initial_Y))


    # Create context distributions and sample context at each timestep for all algorithms.
    # All algorithms run with same distributions and contexts for each simulation.
    sim_contexts = np.empty((timestep_count, 2), dtype=int)
    for t in range(timestep_count):
        if CONVERGE_REF_TO_TRUE:
            w_true, w_ref = create_w_true_and_ref(t=t+2)
        else:
            w_true, w_ref = create_w_true_and_ref()
        distributions_w_true_and_ref_sims[sim, t, 0] = w_true.flatten()
        distributions_w_true_and_ref_sims[sim, t, 1] = w_ref.flatten()
        ts_carb_context = sample_index_from_p(w_true)
        
        sim_contexts[t] = np.array([ts_carb_context, 0])
    
        if fixed_tau is None:
            pass
        
    # DRO simulations with different epsilons
    for eps_i, eps in enumerate(DRO_epsilons):
        print(f"\nDRO with eps={eps:.3f}\n")
        model_DRO = GPyModel(input_dim, output_dim, noise_var, ker=ker)

        model_DRO.add_sample(initial_InsCarbFbg, initial_Y)
        model_DRO.update()
        
        chosen_indices_DRO = BO_loop_DRO(
            model_DRO, distributions=distributions_w_true_and_ref_sims[sim], contexts=sim_contexts,
            epsilon_coef=eps, beta=beta, T=timestep_count
        )
        indices_DRO_sims_eps[eps_i][sim] = np.array(chosen_indices_DRO).astype(int)
    
    # WRO simulation
    print(f"\nWRO\n")
    model_WRO = GPyModel(input_dim, output_dim, noise_var, ker=ker)

    model_WRO.add_sample(initial_InsCarbFbg, initial_Y)
    model_WRO.update()
    
    chosen_indices_WRO = BO_loop_WRO(
        model_WRO, distributions=distributions_w_true_and_ref_sims[sim], contexts=sim_contexts, beta=beta, T=timestep_count
    )
    indices_WRO_sims[0, sim] = np.array(chosen_indices_WRO).astype(int)

    # RS simulation
    for asp_i, asp in enumerate(RS_aspirations):
        print(f"\nRS with aspiration={asp:.3f}\n")
        model_RS = GPyModel(input_dim, output_dim, noise_var, ker=ker)

        model_RS.add_sample(initial_InsCarbFbg, initial_Y)
        model_RS.update()
        
        chosen_indices_RS, kappas = BO_loop_RS(
            model_RS, distributions=distributions_w_true_and_ref_sims[sim], contexts=sim_contexts,
            beta=beta, T=timestep_count, taus=taus_sims_asp[asp_i, sim]
        )
        indices_RS_sims_asp[asp_i][sim] = np.array(chosen_indices_RS).astype(int)
        kappas_sims_asp[asp_i][sim] = kappas

    print(f"\nSO\n")
    model_SO = GPyModel(input_dim, output_dim, noise_var, ker=ker)

    model_SO.add_sample(initial_InsCarbFbg, initial_Y)
    model_SO.update()

    chosen_indices_SO = BO_loop_SO(
        model_SO, distributions=distributions_w_true_and_ref_sims[sim], contexts=sim_contexts, beta=beta, T=timestep_count
    )
    indices_SO_sims[0, sim] = np.array(chosen_indices_SO).astype(int)

print("Finished!")

In [None]:
# Write simulation results

simulation_name = datetime.now().strftime("%m_%d_%Y-%H_%M_%S")
simulation_folder = os.path.join(
    "results",
    f"simg_conv{int(CONVERGE_REF_TO_TRUE)}_ts_sim{sim_count}_ts{timestep_count}_tau-{fixed_tau}-std-{noise_std:.2f}-{simulation_name}"
)
os.makedirs(simulation_folder, exist_ok=False)

np.save(os.path.join(simulation_folder, "initial_samples_sims"), initial_samples_sims)
np.save(os.path.join(simulation_folder, "distributions_w_true_and_ref_sims"), distributions_w_true_and_ref_sims)
np.save(os.path.join(simulation_folder, "indices_DRO_sims_eps"), indices_DRO_sims_eps)
np.save(os.path.join(simulation_folder, "indices_WRO_sims"), indices_WRO_sims)
np.save(os.path.join(simulation_folder, "indices_SO_sims"), indices_SO_sims)
np.save(os.path.join(simulation_folder, "indices_RS_sims_asp"), indices_RS_sims_asp)
np.save(os.path.join(simulation_folder, "kappas_sims_asp"), kappas_sims_asp)
np.save(os.path.join(simulation_folder, "taus_sims_asp"), taus_sims_asp)

In [None]:
# Read simulation results

# Change to True to read older experiments
if False:
    simulation_folder = ""
    sim_count = 50  # Sim count of older experiment

    initial_samples_sims = np.load(os.path.join(simulation_folder, "initial_samples_sims.npy"))
    distributions_w_true_and_ref_sims = np.load(os.path.join(simulation_folder, "distributions_w_true_and_ref_sims.npy"))
    indices_DRO_sims_eps = np.load(os.path.join(simulation_folder, "indices_DRO_sims_eps.npy")).astype(int)
    indices_WRO_sims = np.load(os.path.join(simulation_folder, "indices_WRO_sims.npy")).astype(int)
    indices_SO_sims = np.load(os.path.join(simulation_folder, "indices_SO_sims.npy")).astype(int)
    indices_RS_sims_asp = np.load(os.path.join(simulation_folder, "indices_RS_sims_asp.npy")).astype(int)
    kappas_sims_asp = np.load(os.path.join(simulation_folder, "kappas_sims_asp.npy"))
    taus_sims_asp = np.load(os.path.join(simulation_folder, "taus_sims_asp.npy"))

In [None]:
# Plot palettes
colors = sns.color_palette("dark")
greens = sns.color_palette("BuGn", 10)
blues = sns.color_palette("PuBu", 10)
reds = sns.color_palette("YlOrRd", 10)

In [None]:
# Expected rewards of simulations and timesteps w.r.t. f
expected_reward_true_sims_ts = distributions_w_true_and_ref_sims[:, :, 0, :] @ InsCarb_mat

# MMD distances of simulations and timesteps
mmd_sims_ts = np.empty((sim_count, timestep_count))
for sim_i in range(sim_count):
    for ts_i in range(timestep_count):
        mmd_sims_ts[sim_i, ts_i] = MMD(M, *distributions_w_true_and_ref_sims[sim_i, ts_i])


# Calculate rewards

# DRO rewards
true_rewards_of_actions_DRO_eps = np.empty((len(DRO_epsilons), sim_count, timestep_count))
for e_i in range(len(DRO_epsilons)):
    true_rewards_of_actions_DRO_eps[e_i] = expected_reward_true_sims_ts[
        np.arange(sim_count)[:, None],
        np.arange(timestep_count),
        indices_DRO_sims_eps[e_i, :, :, 0].astype(int)
    ]

# WRO rewards
true_rewards_of_actions_WRO = expected_reward_true_sims_ts[
    np.arange(sim_count)[:, None],
    np.arange(timestep_count),
    indices_WRO_sims[0, :, :, 0].astype(int)
]

# SO rewards
true_rewards_of_actions_SO = expected_reward_true_sims_ts[
    np.arange(sim_count)[:, None],
    np.arange(timestep_count),
    indices_SO_sims[0, :, :, 0].astype(int)
]

# RS rewards
true_rewards_of_actions_RS_asp = np.empty((len(RS_aspirations), sim_count, timestep_count))
for a_i in range(len(RS_aspirations)):
    true_rewards_of_actions_RS_asp[a_i] = expected_reward_true_sims_ts[
        np.arange(sim_count)[:, None],
        np.arange(timestep_count),
        indices_RS_sims_asp[a_i, :, :, 0].astype(int)
    ]


matplotlib.rcParams['figure.figsize'] = (10, 4)
fig, ax = plt.subplots(nrows=len(RS_aspirations), ncols=2)


# REGRET PLOTS


# Label, color, linestyle
lcls = [
    (r"DRBO: $3\epsilon$", colors[6], ":",),
    (r"DRBO: $\epsilon$", blues[-2], ":",),
    (r"DRBO: $\epsilon/3$", colors[5], ":",),
    ("WRBO", colors[8], "-.",),
    ("SO", greens[-2], "--",),
    ("RoBOS", reds[-1], "-",),
]


def plot_regret_std_bar(ax, regret, ind):
    std_div = 2
    subsample_rat = 25
    ealph = 0.6
    elinew = 0.3
    c_size = 4

    label, color, linestyle = lcls[ind]

    x = np.arange(timestep_count)[::subsample_rat]
    y = np.cumsum(regret.mean(axis=0))[::subsample_rat]
    y_err = (np.cumsum(regret, axis=1).std(axis=0) / std_div)[::subsample_rat]

    ax.errorbar(
        x, y, y_err, label=label, c=color,
        fmt=linestyle, ecolor=(*color, ealph), elinewidth=elinew, capsize=c_size
    )

# Plot regret values with std regions as error bars
plot_func = plot_regret_std_bar


# Robust satisficing regret plot
for a_i in range(len(RS_aspirations)):
    tmp_ax = ax[0] if len(RS_aspirations) == 1 else ax[a_i, 0]
    tmp_ax.set_xlabel("Round")
    tmp_ax.set_ylabel(r"$R_t^{\textit{rs}}$")

    taus_sims = taus_sims_asp[a_i]
    kappas_sims = kappas_sims_asp[a_i]

    # DROs
    for e_i in range(len(DRO_epsilons)):
        rs_regret = np.maximum(0, taus_sims - kappas_sims*mmd_sims_ts - true_rewards_of_actions_DRO_eps[e_i])
        plot_func(tmp_ax, rs_regret, e_i)
    
    # WRO
    rs_regret = np.maximum(0, taus_sims - kappas_sims*mmd_sims_ts - true_rewards_of_actions_WRO)
    plot_func(tmp_ax, rs_regret, e_i+1)

    # SO
    rs_regret = np.maximum(0, taus_sims - kappas_sims*mmd_sims_ts - true_rewards_of_actions_SO)
    plot_func(tmp_ax, rs_regret, e_i+2)
    
    # RS
    rs_regret = np.maximum(0, taus_sims - kappas_sims*mmd_sims_ts - true_rewards_of_actions_RS_asp[a_i])
    plot_func(tmp_ax, rs_regret, e_i+3)

    leg = tmp_ax.legend(prop={'size': 12}, frameon=True)


# Lenient regret plot
for a_i in range(len(RS_aspirations)):
    tmp_ax = ax[1] if len(RS_aspirations) == 1 else ax[a_i, 1]
    tmp_ax.set_xlabel("Round")
    tmp_ax.set_ylabel(r"$R_t^{\textit{l}}$")

    taus_sims = taus_sims_asp[a_i]
    
    # DROs
    for e_i in range(len(DRO_epsilons)):
        lenient_regret = np.maximum(0, taus_sims - true_rewards_of_actions_DRO_eps[e_i])
        plot_func(tmp_ax, lenient_regret, e_i)

    # WRO
    lenient_regret = np.maximum(0, taus_sims - true_rewards_of_actions_WRO)
    plot_func(tmp_ax, lenient_regret, e_i+1)

    # SO
    lenient_regret = np.maximum(0, taus_sims - true_rewards_of_actions_SO)
    plot_func(tmp_ax, lenient_regret, e_i+2)
    
    # RS
    lenient_regret = np.maximum(0, taus_sims - true_rewards_of_actions_RS_asp[a_i])
    plot_func(tmp_ax, lenient_regret, e_i+3)


plt.tight_layout()
plt.savefig(os.path.join(Path(simulation_folder), Path(simulation_folder).name + '.pdf'))

In [None]:
simulation_folders = [
    "", # None convergent experiment path
    "", # Convergent experiment path
]


def plot_reward_std_bar(ax, reward, ind):
    std_div = 2
    subsample_rat = 25
    ealph = 0.6
    elinew = 0.3
    c_size = 4

    label, color, linestyle = lcls[ind]

    x = np.arange(timestep_count)[::subsample_rat]
    y = np.cumsum(reward.mean(axis=0))[::subsample_rat]
    y_err = (np.cumsum(reward, axis=1).std(axis=0) / std_div)[::subsample_rat]

    ax.errorbar(
        x, y, y_err, label=label, c=color,
        fmt=linestyle, ecolor=(*color, ealph), elinewidth=elinew, capsize=c_size
    )


matplotlib.rcParams['figure.figsize'] = (10, 4)
fig, ax = plt.subplots(nrows=len(RS_aspirations), ncols=2)

for plt_i in range(2):
    simulation_folder = simulation_folders[plt_i]

    initial_samples_sims = np.load(os.path.join(simulation_folder, "initial_samples_sims.npy"))
    distributions_w_true_and_ref_sims = np.load(os.path.join(simulation_folder, "distributions_w_true_and_ref_sims.npy"))
    indices_DRO_sims_eps = np.load(os.path.join(simulation_folder, "indices_DRO_sims_eps.npy")).astype(int)
    indices_WRO_sims = np.load(os.path.join(simulation_folder, "indices_WRO_sims.npy")).astype(int)
    indices_SO_sims = np.load(os.path.join(simulation_folder, "indices_SO_sims.npy")).astype(int)
    indices_RS_sims_asp = np.load(os.path.join(simulation_folder, "indices_RS_sims_asp.npy")).astype(int)
    kappas_sims_asp = np.load(os.path.join(simulation_folder, "kappas_sims_asp.npy"))
    taus_sims_asp = np.load(os.path.join(simulation_folder, "taus_sims_asp.npy"))

    # Expected rewards of simulations and timesteps w.r.t. f
    expected_reward_true_sims_ts = distributions_w_true_and_ref_sims[:, :, 0, :] @ InsCarb_mat

    # MMD distances of simulations and timesteps
    mmd_sims_ts = np.empty((sim_count, timestep_count))
    for sim_i in range(sim_count):
        for ts_i in range(timestep_count):
            mmd_sims_ts[sim_i, ts_i] = MMD(M, *distributions_w_true_and_ref_sims[sim_i, ts_i])


    # Calculate rewards

    # DRO rewards
    true_rewards_of_actions_DRO_eps = np.empty((len(DRO_epsilons), sim_count, timestep_count))
    for e_i in range(len(DRO_epsilons)):
        true_rewards_of_actions_DRO_eps[e_i] = expected_reward_true_sims_ts[
            np.arange(sim_count)[:, None],
            np.arange(timestep_count),
            indices_DRO_sims_eps[e_i, :, :, 0].astype(int)
        ]

    # WRO rewards
    true_rewards_of_actions_WRO = expected_reward_true_sims_ts[
        np.arange(sim_count)[:, None],
        np.arange(timestep_count),
        indices_WRO_sims[0, :, :, 0].astype(int)
    ]

    # SO rewards
    true_rewards_of_actions_SO = expected_reward_true_sims_ts[
        np.arange(sim_count)[:, None],
        np.arange(timestep_count),
        indices_SO_sims[0, :, :, 0].astype(int)
    ]

    # RS rewards
    true_rewards_of_actions_RS_asp = np.empty((len(RS_aspirations), sim_count, timestep_count))
    for a_i in range(len(RS_aspirations)):
        true_rewards_of_actions_RS_asp[a_i] = expected_reward_true_sims_ts[
            np.arange(sim_count)[:, None],
            np.arange(timestep_count),
            indices_RS_sims_asp[a_i, :, :, 0].astype(int)
        ]


    # REGRET PLOTS


    # Label, color, linestyle
    lcls = [
        (r"DRBO: $3\epsilon$", colors[6], ":",),
        (r"DRBO: $\epsilon$", blues[-2], ":",),
        (r"DRBO: $\epsilon/3$", colors[5], ":",),
        ("WRBO", colors[8], "-.",),
        ("SO", greens[-2], "--",),
        ("RoBOS", reds[-1], "-",),
    ]

    # Plot std regions as error bars or colored regions
    plot_func = plot_reward_std_bar

    # Reward plot
    tmp_ax = ax[plt_i]
    tmp_ax.set_xlabel("Round")
    tmp_ax.set_ylabel(r"Cumulative Reward")

    taus_sims = taus_sims_asp[a_i]
    kappas_sims = kappas_sims_asp[a_i]

    # DROs
    for e_i in range(len(DRO_epsilons)):
        reward = true_rewards_of_actions_DRO_eps[e_i]
        plot_func(tmp_ax, reward, e_i)

    # WRO
    reward = true_rewards_of_actions_WRO
    plot_func(tmp_ax, reward, e_i+1)

    # SO
    reward = true_rewards_of_actions_SO
    plot_func(tmp_ax, reward, e_i+1)
    
    # RS
    reward = true_rewards_of_actions_RS_asp[a_i]
    plot_func(tmp_ax, reward, -1)

    leg = tmp_ax.legend(prop={'size': 12}, frameon=True)


plt.tight_layout()
for simulation_folder in simulation_folders:
    plt.savefig(os.path.join(Path(simulation_folder), Path(simulation_folder).name + '_both_exp_rewards.pdf'))