# Overview

In this notebook we'll load in the various policy results in desperate hope that we'll have something informative and insightful. From early look ins at the intermediate results, at least AIS learned something informative when the demographics were included when learning the representation and the learning rate was set to 1e-3... I'm holding my breath and crossing my fingers.... 

In [None]:
%matplotlib inline
import itertools
import numpy as np

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_context('paper')
sns.set_style('white')

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
plt.rcParams.update({'font.size': 20})

In [None]:
def ewma_vectorized(data, alpha, offset=None, dtype=None, order='C', out=None):
    """
    Calculates the exponential moving average over a vector.
    Will fail for large inputs.
    :param data: Input data
    :param alpha: scalar float in range (0,1)
        The alpha parameter for the moving average.
    :param offset: optional
        The offset for the moving average, scalar. Defaults to data[0].
    :param dtype: optional
        Data type used for calculations. Defaults to float64 unless
        data.dtype is float32, then it will use float32.
    :param order: {'C', 'F', 'A'}, optional
        Order to use when flattening the data. Defaults to 'C'.
    :param out: ndarray, or None, optional
        A location into which the result is stored. If provided, it must have
        the same shape as the input. If not provided or `None`,
        a freshly-allocated array is returned.
    """
    data = np.array(data, copy=False)

    if dtype is None:
        if data.dtype == np.float32:
            dtype = np.float32
        else:
            dtype = np.float64
    else:
        dtype = np.dtype(dtype)

    if data.ndim > 1:
        # flatten input
        data = data.reshape(-1, order)

    if out is None:
        out = np.empty_like(data, dtype=dtype)
    else:
        assert out.shape == data.shape
        assert out.dtype == dtype

    if data.size < 1:
        # empty input, return empty array
        return out

    if offset is None:
        offset = data[0]

    alpha = np.array(alpha, copy=False).astype(dtype, copy=False)

    # scaling_factors -> 0 as len(data) gets large
    # this leads to divide-by-zeros below
    scaling_factors = np.power(1. - alpha, np.arange(data.size + 1, dtype=dtype),
                               dtype=dtype)
    # create cumulative sum array
    np.multiply(data, (alpha * scaling_factors[-2]) / scaling_factors[:-1],
                dtype=dtype, out=out)
    np.cumsum(out, dtype=dtype, out=out)

    # cumsums / scaling
    out /= scaling_factors[-2::-1]

    if offset != 0:
        offset = np.array(offset, copy=False).astype(dtype, copy=False)
        # add offsets
        out += offset * scaling_factors[1:]

    return out

For simplicity, I'm going to go approach by approach to make sure that I've done this properly in terms of loading and aggregating things.

In [None]:
rand_nums = [25, 32, 1234, 2020, 53]
alpha_param = 0.10
learning_rates = ['1e-05', '0.0001','0.001']
pol_eval_file = 'dBCQ_policy_eval_l'
storage_dir_TWK = '/scratch/ssd001/home/tkillian/ml4h2020_srl/results/'
storage_dir_HZ = '/scratch/hdd001/home/haoran/ml4h2020_srl/results/'

### Autoencoder (AE)

In [None]:
arch = 'ae'

noDem_noCorr_dir = f'{arch}_noCntxt_s256_l1e-4_rand'
yDem_noCorr_dir = f'{arch}_s64_l1e-4_rand'
noDem_yCorr_dir = f'{arch}_corrConst_noCntxt_s32_l1e-4_rand'
yDem_yCorr_dir = f'{arch}_corrConst_s64_l1e-4_rand'

In [None]:
ae_noDem_noCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
ae_yDem_noCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
ae_noDem_yCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
ae_yDem_yCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))

In [None]:
for lr in learning_rates:
    for rn in rand_nums:
        lr_idx = learning_rates.index(lr)
        rn_idx = rand_nums.index(rn)
        nDnC_file_name = storage_dir_TWK+noDem_noCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        yDnC_file_name = storage_dir_TWK+yDem_noCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        nDyC_file_name = storage_dir_TWK+noDem_yCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        yDyC_file_name = storage_dir_TWK+yDem_yCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        
        ae_noDem_noCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(nDnC_file_name), alpha_param)
        ae_yDem_noCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(yDnC_file_name), alpha_param)
        ae_noDem_yCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(nDyC_file_name), alpha_param)
        ae_yDem_yCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(yDyC_file_name), alpha_param)

In [None]:
colors = sns.color_palette('CMRmap', n_colors=3)

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ae_yDem_noCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ae_yDem_noCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/ Dem. w/o Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ae_noDem_yCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ae_noDem_yCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/o Dem. w/ Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ae_yDem_yCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ae_yDem_yCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/ Dem. w/ Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ae_noDem_noCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ae_noDem_noCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/o Dem. w/o Corr')
plt.legend(loc=2)
plt.show()

For the Autoencoder (and I suspect for all other approaches) it seems that a learning rate of 1e-3 is the right choice and all policies trained on data that was not constrained by the correlation coeff seem to be learning something :)

In [None]:
colors_comp = sns.color_palette('tab10',n_colors=4)

In [None]:
# Compare the policies learned between different training settings with a learning rate of 1e-3
plt.figure(figsize=(12,8))

nDnC_mean, nDnC_std = np.mean(ae_noDem_noCorr_pol_results[-1], axis=0), np.std(ae_noDem_noCorr_pol_results[-1], axis=0)
nDyC_mean, nDyC_std = np.mean(ae_noDem_yCorr_pol_results[-1], axis=0), np.std(ae_noDem_yCorr_pol_results[-1], axis=0)
yDnC_mean, yDnC_std = np.mean(ae_yDem_noCorr_pol_results[-1], axis=0), np.std(ae_yDem_noCorr_pol_results[-1], axis=0)
yDyC_mean, yDyC_std = np.mean(ae_yDem_yCorr_pol_results[-1], axis=0), np.std(ae_yDem_yCorr_pol_results[-1], axis=0)

plt.plot(np.arange(1000)*500, nDnC_mean, lw=3, color=colors_comp[0], label='Observations Only')
plt.fill_between(np.arange(1000)*500, nDnC_mean-nDnC_std, nDnC_mean+nDnC_std, color=colors_comp[0], alpha=0.3)

plt.plot(np.arange(1000)*500, nDyC_mean, lw=3, color=colors_comp[1], label='Correlation Constrained')
plt.fill_between(np.arange(1000)*500, nDyC_mean-nDyC_std, nDyC_mean+nDyC_std, color=colors_comp[1], alpha=0.3)

plt.plot(np.arange(1000)*500, yDnC_mean, lw=3, color=colors_comp[2], label='Demographics Included')
plt.fill_between(np.arange(1000)*500, yDnC_mean-yDnC_std, yDnC_mean+yDnC_std, color=colors_comp[2], alpha=0.3)

plt.plot(np.arange(1000)*500, yDyC_mean, lw=3, color=colors_comp[3], label='Correlation Const. + Demographics Incl.')
plt.fill_between(np.arange(1000)*500, yDyC_mean-yDyC_std, yDyC_mean+yDyC_std, color=colors_comp[3], alpha=0.3)

plt.title(f'{arch.upper()} Policy Comparison between Training Settings', fontsize=18)
plt.xlabel("Iterations", fontsize=16)
plt.xticks(fontsize=16)
plt.ylabel("WIS Return", fontsize=16)
plt.yticks(fontsize=16)
plt.ylim([0.2, 1.0])
plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
plt.legend(loc=2,fontsize=14)
plt.grid()
plt.show()



In [None]:
best_ae_pol_mean = np.copy(nDnC_mean)
best_ae_pol_std = np.copy(nDnC_std)

### Approximate Information State (AIS)

In [None]:
arch = 'ais'

noDem_noCorr_dir = f'{arch}_noCntxt_s64_l5e-4_rand'
yDem_noCorr_dir = f'{arch}_s64_l5e-4_rand'
noDem_yCorr_dir = f'{arch}_corrConst_noCntxt_s64_l5e-4_rand'
yDem_yCorr_dir = f'{arch}_corrConst_s64_l5e-4_rand'

In [None]:
ais_noDem_noCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
ais_yDem_noCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
ais_noDem_yCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
ais_yDem_yCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))

In [None]:
for lr in learning_rates:
    for rn in rand_nums:
        lr_idx = learning_rates.index(lr)
        rn_idx = rand_nums.index(rn)
        nDnC_file_name = storage_dir_TWK+noDem_noCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        yDnC_file_name = storage_dir_TWK+yDem_noCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        nDyC_file_name = storage_dir_TWK+noDem_yCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        yDyC_file_name = storage_dir_TWK+yDem_yCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        
        ais_noDem_noCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(nDnC_file_name), alpha_param)
        ais_yDem_noCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(yDnC_file_name), alpha_param)
        ais_noDem_yCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(nDyC_file_name), alpha_param)
        ais_yDem_yCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(yDyC_file_name), alpha_param)

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ais_yDem_noCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ais_yDem_noCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/ Dem. w/o Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ais_noDem_yCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ais_noDem_yCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/o Dem. w/ Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ais_yDem_yCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ais_yDem_yCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/ Dem. w/ Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ais_noDem_noCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ais_noDem_noCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/o Dem. w/o Corr')
plt.legend(loc=2)
plt.show()

In [None]:
# Compare the policies learned between different training settings with a learning rate of 1e-3
plt.figure(figsize=(12,8))

nDnC_mean, nDnC_std = np.mean(ais_noDem_noCorr_pol_results[-1], axis=0), np.std(ais_noDem_noCorr_pol_results[-1], axis=0)
nDyC_mean, nDyC_std = np.mean(ais_noDem_yCorr_pol_results[-1], axis=0), np.std(ais_noDem_yCorr_pol_results[-1], axis=0)
yDnC_mean, yDnC_std = np.mean(ais_yDem_noCorr_pol_results[-1], axis=0), np.std(ais_yDem_noCorr_pol_results[-1], axis=0)
yDyC_mean, yDyC_std = np.mean(ais_yDem_yCorr_pol_results[-1], axis=0), np.std(ais_yDem_yCorr_pol_results[-1], axis=0)

plt.plot(np.arange(1000)*500, nDnC_mean, lw=3, color=colors_comp[0], label='Observations Only')
plt.fill_between(np.arange(1000)*500, nDnC_mean-nDnC_std, nDnC_mean+nDnC_std, color=colors_comp[0], alpha=0.3)

plt.plot(np.arange(1000)*500, nDyC_mean, lw=3, color=colors_comp[1], label='Correlation Constrained')
plt.fill_between(np.arange(1000)*500, nDyC_mean-nDyC_std, nDyC_mean+nDyC_std, color=colors_comp[1], alpha=0.3)

plt.plot(np.arange(1000)*500, yDnC_mean, lw=3, color=colors_comp[2], label='Demographics Included')
plt.fill_between(np.arange(1000)*500, yDnC_mean-yDnC_std, yDnC_mean+yDnC_std, color=colors_comp[2], alpha=0.3)

plt.plot(np.arange(1000)*500, yDyC_mean, lw=3, color=colors_comp[3], label='Correlation Const. + Demographics Incl.')
plt.fill_between(np.arange(1000)*500, yDyC_mean-yDyC_std, yDyC_mean+yDyC_std, color=colors_comp[3], alpha=0.3)

plt.title(f'{arch.upper()} Policy Comparison between Training Settings', fontsize=18)
plt.xlabel("Iterations", fontsize=16)
plt.xticks(fontsize=16)
plt.ylabel("WIS Return", fontsize=16)
plt.yticks(fontsize=16)
plt.ylim([0.2, 1.0])
plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
plt.legend(loc=2,fontsize=14)
plt.grid()
plt.show()




In [None]:
best_ais_pol_mean = np.copy(yDnC_mean)
best_ais_pol_std = np.copy(yDnC_std)

### Decoupled Dynamics Module (DDM)

In [None]:
arch = 'ddm'

noDem_noCorr_dir = f'{arch}_noCntxt_s128_l1e-4_rand'
yDem_noCorr_dir = f'{arch}_s128_l1e-4_rand'
noDem_yCorr_dir = f'{arch}_corrConstp25_noCntxt_s256_l1e-4_rand'
yDem_yCorr_dir = f'{arch}_corrConstp25_s256_l1e-4_rand'

In [None]:
ddm_noDem_noCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
ddm_yDem_noCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
ddm_noDem_yCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
ddm_yDem_yCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))

In [None]:
for lr in learning_rates:
    for rn in rand_nums:
        lr_idx = learning_rates.index(lr)
        rn_idx = rand_nums.index(rn)
        nDnC_file_name = storage_dir_TWK+noDem_noCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        yDnC_file_name = storage_dir_TWK+yDem_noCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        nDyC_file_name = storage_dir_TWK+noDem_yCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        yDyC_file_name = storage_dir_TWK+yDem_yCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        
        ddm_noDem_noCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(nDnC_file_name), alpha_param)
        ddm_yDem_noCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(yDnC_file_name), alpha_param)
        ddm_noDem_yCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(nDyC_file_name), alpha_param)
        ddm_yDem_yCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(yDyC_file_name), alpha_param)

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ddm_yDem_noCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ddm_yDem_noCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/ Dem. w/o Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ddm_noDem_yCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ddm_noDem_yCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/o Dem. w/ Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ddm_yDem_yCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ddm_yDem_yCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/ Dem. w/ Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ddm_noDem_noCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ddm_noDem_noCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/o Dem. w/o Corr')
plt.legend(loc=2)
plt.show()

In [None]:
# Compare the policies learned between different training settings with a learning rate of 1e-3
plt.figure(figsize=(12,8))

nDnC_mean, nDnC_std = np.mean(ddm_noDem_noCorr_pol_results[-1], axis=0), np.std(ddm_noDem_noCorr_pol_results[-1], axis=0)
nDyC_mean, nDyC_std = np.mean(ddm_noDem_yCorr_pol_results[-1], axis=0), np.std(ddm_noDem_yCorr_pol_results[-1], axis=0)
yDnC_mean, yDnC_std = np.mean(ddm_yDem_noCorr_pol_results[-1], axis=0), np.std(ddm_yDem_noCorr_pol_results[-1], axis=0)
yDyC_mean, yDyC_std = np.mean(ddm_yDem_yCorr_pol_results[-1], axis=0), np.std(ddm_yDem_yCorr_pol_results[-1], axis=0)

plt.plot(np.arange(1000)*500, nDnC_mean, lw=3, color=colors_comp[0], label='Observations Only')
plt.fill_between(np.arange(1000)*500, nDnC_mean-nDnC_std, nDnC_mean+nDnC_std, color=colors_comp[0], alpha=0.3)

plt.plot(np.arange(1000)*500, nDyC_mean, lw=3, color=colors_comp[1], label='Correlation Constrained')
plt.fill_between(np.arange(1000)*500, nDyC_mean-nDyC_std, nDyC_mean+nDyC_std, color=colors_comp[1], alpha=0.3)

plt.plot(np.arange(1000)*500, yDnC_mean, lw=3, color=colors_comp[2], label='Demographics Included')
plt.fill_between(np.arange(1000)*500, yDnC_mean-yDnC_std, yDnC_mean+yDnC_std, color=colors_comp[2], alpha=0.3)

plt.plot(np.arange(1000)*500, yDyC_mean, lw=3, color=colors_comp[3], label='Correlation Const. + Demographics Incl.')
plt.fill_between(np.arange(1000)*500, yDyC_mean-yDyC_std, yDyC_mean+yDyC_std, color=colors_comp[3], alpha=0.3)

plt.title(f'{arch.upper()} Policy Comparison between Training Settings', fontsize=18)
plt.xlabel("Iterations", fontsize=16)
plt.xticks(fontsize=16)
plt.ylabel("WIS Return", fontsize=16)
plt.yticks(fontsize=16)
plt.ylim([0.2, 1.0])
plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
plt.legend(loc=2,fontsize=14)
plt.grid()
plt.show()




In [None]:
best_ddm_pol_mean = np.copy(yDnC_mean)
best_ddm_pol_std = np.copy(yDnC_std)

### Deep Signature Transform (DST)

In [None]:
arch = 'dst'


In [None]:
dst_noDem_noCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
dst_yDem_noCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
dst_noDem_yCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
dst_yDem_yCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))

In [None]:
for lr in learning_rates:
    for rn in rand_nums:
        lr_idx = learning_rates.index(lr)
        rn_idx = rand_nums.index(rn)
        nDnC_file_name = storage_dir_HZ+f'{arch}/{arch}_sz64_rand{rn}_corr0_contextFalse_sepsis_training/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        yDnC_file_name = storage_dir_HZ+f'{arch}/{arch}_sz64_rand{rn}_corr0_contextTrue_sepsis_training/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        nDyC_file_name = storage_dir_HZ+f'{arch}/{arch}_sz64_rand{rn}_corr1_contextFalse_sepsis_training/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        yDyC_file_name = storage_dir_HZ+f'{arch}/{arch}_sz64_rand{rn}_corr1_contextTrue_sepsis_training/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        
        dst_noDem_noCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(nDnC_file_name), alpha_param)
        dst_yDem_noCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(yDnC_file_name), alpha_param)
        dst_noDem_yCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(nDyC_file_name), alpha_param)
        dst_yDem_yCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(yDyC_file_name), alpha_param)

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(dst_yDem_noCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(dst_yDem_noCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/ Dem. w/o Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(dst_noDem_yCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(dst_noDem_yCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/o Dem. w/ Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(dst_yDem_yCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(dst_yDem_yCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/ Dem. w/ Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(dst_noDem_noCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(dst_noDem_noCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/o Dem. w/o Corr')
plt.legend(loc=2)
plt.show()

In [None]:
# Compare the policies learned between different training settings with a learning rate of 1e-3
plt.figure(figsize=(12,8))

nDnC_mean, nDnC_std = np.mean(dst_noDem_noCorr_pol_results[-1], axis=0), np.std(dst_noDem_noCorr_pol_results[-1], axis=0)
nDyC_mean, nDyC_std = np.mean(dst_noDem_yCorr_pol_results[-1], axis=0), np.std(dst_noDem_yCorr_pol_results[-1], axis=0)
yDnC_mean, yDnC_std = np.mean(dst_yDem_noCorr_pol_results[-1], axis=0), np.std(dst_yDem_noCorr_pol_results[-1], axis=0)
yDyC_mean, yDyC_std = np.mean(dst_yDem_yCorr_pol_results[-1], axis=0), np.std(dst_yDem_yCorr_pol_results[-1], axis=0)

plt.plot(np.arange(1000)*500, nDnC_mean, lw=3, color=colors_comp[0], label='Observations Only')
plt.fill_between(np.arange(1000)*500, nDnC_mean-nDnC_std, nDnC_mean+nDnC_std, color=colors_comp[0], alpha=0.3)

plt.plot(np.arange(1000)*500, nDyC_mean, lw=3, color=colors_comp[1], label='Correlation Constrained')
plt.fill_between(np.arange(1000)*500, nDyC_mean-nDyC_std, nDyC_mean+nDyC_std, color=colors_comp[1], alpha=0.3)

plt.plot(np.arange(1000)*500, yDnC_mean, lw=3, color=colors_comp[2], label='Demographics Included')
plt.fill_between(np.arange(1000)*500, yDnC_mean-yDnC_std, yDnC_mean+yDnC_std, color=colors_comp[2], alpha=0.3)

plt.plot(np.arange(1000)*500, yDyC_mean, lw=3, color=colors_comp[3], label='Correlation Const. + Demographics Incl.')
plt.fill_between(np.arange(1000)*500, yDyC_mean-yDyC_std, yDyC_mean+yDyC_std, color=colors_comp[3], alpha=0.3)

plt.title(f'{arch.upper()} Policy Comparison between Training Settings', fontsize=18)
plt.xlabel("Iterations", fontsize=16)
plt.xticks(fontsize=16)
plt.ylabel("WIS Return", fontsize=16)
plt.yticks(fontsize=16)
plt.ylim([0.2, 1.0])
plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
plt.legend(loc=2,fontsize=14)
plt.grid()
plt.show()




In [None]:
best_dst_pol_mean = np.copy(nDnC_mean)
best_dst_pol_std = np.copy(nDnC_std)

### Recurrent Neural Network (RNN)

In [None]:
arch = 'rnn'

noDem_noCorr_dir = f'{arch}_noCntxt_s64_l1e-4_rand'
yDem_noCorr_dir = f'{arch}_s64_l1e-4_rand'
noDem_yCorr_dir = f'{arch}_corrConst_noCntxt_s64_l1e-4_rand'
yDem_yCorr_dir = f'{arch}_corrConst_s128_l1e-4_rand'

In [None]:
rnn_noDem_noCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
rnn_yDem_noCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
rnn_noDem_yCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
rnn_yDem_yCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))

In [None]:
for lr in learning_rates:
    for rn in rand_nums:
        lr_idx = learning_rates.index(lr)
        rn_idx = rand_nums.index(rn)
        nDnC_file_name = storage_dir_TWK+noDem_noCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        yDnC_file_name = storage_dir_TWK+yDem_noCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        nDyC_file_name = storage_dir_TWK+noDem_yCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        yDyC_file_name = storage_dir_TWK+yDem_yCorr_dir+f'{rn}_sepsis/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        
        rnn_noDem_noCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(nDnC_file_name), alpha_param)
        rnn_yDem_noCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(yDnC_file_name), alpha_param)
        rnn_noDem_yCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(nDyC_file_name), alpha_param)
        rnn_yDem_yCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(yDyC_file_name), alpha_param)

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(rnn_yDem_noCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(rnn_yDem_noCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/ Dem. w/o Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(rnn_noDem_yCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(rnn_noDem_yCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/o Dem. w/ Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(rnn_yDem_yCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(rnn_yDem_yCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/ Dem. w/ Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(rnn_noDem_noCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(rnn_noDem_noCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/o Dem. w/o Corr')
plt.legend(loc=2)
plt.show()

In [None]:
# Compare the policies learned between different training settings with a learning rate of 1e-3
plt.figure(figsize=(12,8))

nDnC_mean, nDnC_std = np.mean(rnn_noDem_noCorr_pol_results[-1], axis=0), np.std(rnn_noDem_noCorr_pol_results[-1], axis=0)
nDyC_mean, nDyC_std = np.mean(rnn_noDem_yCorr_pol_results[-1], axis=0), np.std(rnn_noDem_yCorr_pol_results[-1], axis=0)
yDnC_mean, yDnC_std = np.mean(rnn_yDem_noCorr_pol_results[-1], axis=0), np.std(rnn_yDem_noCorr_pol_results[-1], axis=0)
yDyC_mean, yDyC_std = np.mean(rnn_yDem_yCorr_pol_results[-1], axis=0), np.std(rnn_yDem_yCorr_pol_results[-1], axis=0)

plt.plot(np.arange(1000)*500, nDnC_mean, lw=3, color=colors_comp[0], label='Observations Only')
plt.fill_between(np.arange(1000)*500, nDnC_mean-nDnC_std, nDnC_mean+nDnC_std, color=colors_comp[0], alpha=0.3)

plt.plot(np.arange(1000)*500, nDyC_mean, lw=3, color=colors_comp[1], label='Correlation Constrained')
plt.fill_between(np.arange(1000)*500, nDyC_mean-nDyC_std, nDyC_mean+nDyC_std, color=colors_comp[1], alpha=0.3)

plt.plot(np.arange(1000)*500, yDnC_mean, lw=3, color=colors_comp[2], label='Demographics Included')
plt.fill_between(np.arange(1000)*500, yDnC_mean-yDnC_std, yDnC_mean+yDnC_std, color=colors_comp[2], alpha=0.3)

plt.plot(np.arange(1000)*500, yDyC_mean, lw=3, color=colors_comp[3], label='Correlation Const. + Demographics Incl.')
plt.fill_between(np.arange(1000)*500, yDyC_mean-yDyC_std, yDyC_mean+yDyC_std, color=colors_comp[3], alpha=0.3)

plt.title(f'{arch.upper()} Policy Comparison between Training Settings', fontsize=18)
plt.xlabel("Iterations", fontsize=16)
plt.xticks(fontsize=16)
plt.ylabel("WIS Return", fontsize=16)
plt.yticks(fontsize=16)
plt.ylim([0.2, 1.0])
plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
plt.legend(loc=2,fontsize=14)
plt.grid()
plt.show()




In [None]:
best_rnn_pol_mean = np.copy(yDnC_mean)
best_rnn_pol_std = np.copy(yDnC_std)

### ODE-RNN (ODE)

In [None]:
arch = 'odernn'


In [None]:
ode_noDem_noCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
ode_yDem_noCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
ode_noDem_yCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
ode_yDem_yCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))

In [None]:
for lr in learning_rates:
    for rn in rand_nums:
        lr_idx = learning_rates.index(lr)
        rn_idx = rand_nums.index(rn)
        nDnC_file_name = storage_dir_HZ+f'{arch}/{arch}_sz64_rand{rn}_corr0_contextFalse_sepsis_training/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        yDnC_file_name = storage_dir_HZ+f'{arch}/{arch}_sz32_rand{rn}_corr0_contextTrue_sepsis_training/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        nDyC_file_name = storage_dir_HZ+f'{arch}/{arch}_sz64_rand{rn}_corr1_contextFalse_sepsis_training/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        yDyC_file_name = storage_dir_HZ+f'{arch}/{arch}_sz128_rand{rn}_corr1_contextTrue_sepsis_training/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        
        ode_noDem_noCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(nDnC_file_name), alpha_param)
        ode_yDem_noCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(yDnC_file_name), alpha_param)
        ode_noDem_yCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(nDyC_file_name), alpha_param)
        ode_yDem_yCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(yDyC_file_name), alpha_param)

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ode_yDem_noCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ode_yDem_noCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/ Dem. w/o Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ode_noDem_yCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ode_noDem_yCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/o Dem. w/ Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ode_yDem_yCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ode_yDem_yCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/ Dem. w/ Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(ode_noDem_noCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(ode_noDem_noCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/o Dem. w/o Corr')
plt.legend(loc=2)
plt.show()

In [None]:
# Compare the policies learned between different training settings with a learning rate of 1e-3
plt.figure(figsize=(12,8))

nDnC_mean, nDnC_std = np.mean(ode_noDem_noCorr_pol_results[-1], axis=0), np.std(ode_noDem_noCorr_pol_results[-1], axis=0)
nDyC_mean, nDyC_std = np.mean(ode_noDem_yCorr_pol_results[-1], axis=0), np.std(ode_noDem_yCorr_pol_results[-1], axis=0)
yDnC_mean, yDnC_std = np.mean(ode_yDem_noCorr_pol_results[-1], axis=0), np.std(ode_yDem_noCorr_pol_results[-1], axis=0)
yDyC_mean, yDyC_std = np.mean(ode_yDem_yCorr_pol_results[-1], axis=0), np.std(ode_yDem_yCorr_pol_results[-1], axis=0)

plt.plot(np.arange(1000)*500, nDnC_mean, lw=3, color=colors_comp[0], label='Observations Only')
plt.fill_between(np.arange(1000)*500, nDnC_mean-nDnC_std, nDnC_mean+nDnC_std, color=colors_comp[0], alpha=0.3)

plt.plot(np.arange(1000)*500, nDyC_mean, lw=3, color=colors_comp[1], label='Correlation Constrained')
plt.fill_between(np.arange(1000)*500, nDyC_mean-nDyC_std, nDyC_mean+nDyC_std, color=colors_comp[1], alpha=0.3)

plt.plot(np.arange(1000)*500, yDnC_mean, lw=3, color=colors_comp[2], label='Demographics Included')
plt.fill_between(np.arange(1000)*500, yDnC_mean-yDnC_std, yDnC_mean+yDnC_std, color=colors_comp[2], alpha=0.3)

plt.plot(np.arange(1000)*500, yDyC_mean, lw=3, color=colors_comp[3], label='Correlation Const. + Demographics Incl.')
plt.fill_between(np.arange(1000)*500, yDyC_mean-yDyC_std, yDyC_mean+yDyC_std, color=colors_comp[3], alpha=0.3)

plt.title(f'{arch.upper()} Policy Comparison between Training Settings', fontsize=18)
plt.xlabel("Iterations", fontsize=16)
plt.xticks(fontsize=16)
plt.ylabel("WIS Return", fontsize=16)
plt.yticks(fontsize=16)
plt.ylim([0.2, 1.0])
plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
plt.legend(loc=2,fontsize=14)
plt.grid()
plt.show()




In [None]:
best_ode_pol_mean = np.copy(nDnC_mean)
best_ode_pol_std = np.copy(nDnC_std)

### Neural CDE (CDE)

In [None]:
arch = 'cde'


In [None]:
cde_noDem_noCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
cde_yDem_noCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
cde_noDem_yCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))
cde_yDem_yCorr_pol_results = np.zeros((len(learning_rates),len(rand_nums), 1000))

In [None]:
for lr in learning_rates:
    for rn in rand_nums:
        lr_idx = learning_rates.index(lr)
        rn_idx = rand_nums.index(rn)
        nDnC_file_name = storage_dir_HZ+f'{arch}/{arch}_sz64_rand{rn}_corr0_contextFalse_sepsis_training/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        yDnC_file_name = storage_dir_HZ+f'{arch}/{arch}_sz32_rand{rn}_corr0_contextTrue_sepsis_training/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        nDyC_file_name = storage_dir_HZ+f'{arch}/{arch}_sz32_rand{rn}_corr1_contextFalse_sepsis_training/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        yDyC_file_name = storage_dir_HZ+f'{arch}/{arch}_sz128_rand{rn}_corr1_contextTrue_sepsis_training/{arch}_data/'+pol_eval_file+f'{lr}.npy'
        
        cde_noDem_noCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(nDnC_file_name), alpha_param)
        cde_yDem_noCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(yDnC_file_name), alpha_param)
        cde_noDem_yCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(nDyC_file_name), alpha_param)
        cde_yDem_yCorr_pol_results[lr_idx, rn_idx,:] = ewma_vectorized(np.load(yDyC_file_name), alpha_param)

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(cde_yDem_noCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(cde_yDem_noCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/ Dem. w/o Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(cde_noDem_yCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(cde_noDem_yCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/o Dem. w/ Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(cde_yDem_yCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(cde_yDem_yCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/ Dem. w/ Corr')
plt.legend(loc=2)
plt.show()

In [None]:
plt.figure()
for lr_idx, lr in enumerate(learning_rates):
    
    temp_mean = np.mean(cde_noDem_noCorr_pol_results[lr_idx], axis=0)
    temp_std = np.std(cde_noDem_noCorr_pol_results[lr_idx], axis=0)
    
    plt.plot(np.arange(1000), temp_mean, lw=2, color=colors[lr_idx], label=f'lr={lr}')
    plt.fill_between(np.arange(1000),temp_mean - temp_std, temp_mean + temp_std, alpha=0.3, color=colors[lr_idx])
plt.title(f'{arch.upper()} w/o Dem. w/o Corr')
plt.legend(loc=2)
plt.show()

In [None]:
# Compare the policies learned between different training settings with a learning rate of 1e-3
plt.figure(figsize=(12,8))

nDnC_mean, nDnC_std = np.mean(cde_noDem_noCorr_pol_results[-1], axis=0), np.std(cde_noDem_noCorr_pol_results[0], axis=0)
nDyC_mean, nDyC_std = np.mean(cde_noDem_yCorr_pol_results[-1], axis=0), np.std(cde_noDem_yCorr_pol_results[-1], axis=0)
yDnC_mean, yDnC_std = np.mean(cde_yDem_noCorr_pol_results[0], axis=0), np.std(cde_yDem_noCorr_pol_results[0], axis=0)
yDyC_mean, yDyC_std = np.mean(cde_yDem_yCorr_pol_results[-1], axis=0), np.std(cde_yDem_yCorr_pol_results[-1], axis=0)

plt.plot(np.arange(1000)*500, nDnC_mean, lw=3, color=colors_comp[0], label='Observations Only')
plt.fill_between(np.arange(1000)*500, nDnC_mean-nDnC_std, nDnC_mean+nDnC_std, color=colors_comp[0], alpha=0.3)

plt.plot(np.arange(1000)*500, nDyC_mean, lw=3, color=colors_comp[1], label='Correlation Constrained')
plt.fill_between(np.arange(1000)*500, nDyC_mean-nDyC_std, nDyC_mean+nDyC_std, color=colors_comp[1], alpha=0.3)

plt.plot(np.arange(1000)*500, yDnC_mean, lw=3, color=colors_comp[2], label='Demographics Included')
plt.fill_between(np.arange(1000)*500, yDnC_mean-yDnC_std, yDnC_mean+yDnC_std, color=colors_comp[2], alpha=0.3)

plt.plot(np.arange(1000)*500, yDyC_mean, lw=3, color=colors_comp[3], label='Correlation Const. + Demographics Incl.')
plt.fill_between(np.arange(1000)*500, yDyC_mean-yDyC_std, yDyC_mean+yDyC_std, color=colors_comp[3], alpha=0.3)

plt.title(f'{arch.upper()} Policy Comparison between Training Settings', fontsize=18)
plt.xlabel("Iterations", fontsize=16)
plt.xticks(fontsize=16)
plt.ylabel("WIS Return", fontsize=16)
plt.yticks(fontsize=16)
plt.ylim([0.2, 1.0])
plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
plt.legend(loc=2,fontsize=14)
plt.grid()
plt.show()




In [None]:
best_cde_pol_mean = np.copy(nDnC_mean)
best_cde_pol_std = np.copy(nDnC_std)

## NOW PUTTING THEM ALL TOGETHER!!! :) :) ;) :)

In [None]:
archs = ['ae','ais','cde','ddm','dst','ode','rnn']
flatui = ["#9b59b6", "#3498db", "#feb308", "#e74c3c", "#34495e", "#2ecc71", "#84543f", "#76852a"]
colors = sns.color_palette(flatui,7)
# colors = sns.color_palette('tab10',len(archs))

In [None]:
# Compare the policies learned between SRL approaches...
plt.figure(figsize=(16,12))

plt.plot(np.arange(1000)*500, best_ae_pol_mean, lw=3, color=colors[0], label='AE')
plt.fill_between(np.arange(1000)*500, best_ae_pol_mean-best_ae_pol_std, best_ae_pol_mean+best_ae_pol_std, color=colors[0], alpha=0.3)

plt.plot(np.arange(1000)*500, best_ais_pol_mean, lw=3, color=colors[1], label='AIS')
plt.fill_between(np.arange(1000)*500, best_ais_pol_mean-best_ais_pol_std, best_ais_pol_mean+best_ais_pol_std, color=colors[1], alpha=0.3)

plt.plot(np.arange(1000)*500, best_cde_pol_mean, lw=3, color=colors[2], label='CDE')
plt.fill_between(np.arange(1000)*500, best_cde_pol_mean-best_cde_pol_std, best_cde_pol_mean+best_cde_pol_std, color=colors[2], alpha=0.3)

plt.plot(np.arange(1000)*500, best_dst_pol_mean, lw=3, color=colors[3], label='DST')
plt.fill_between(np.arange(1000)*500, best_dst_pol_mean-best_dst_pol_std, best_dst_pol_mean+best_dst_pol_std, color=colors[3], alpha=0.3)

plt.plot(np.arange(1000)*500, best_ddm_pol_mean, lw=3, color=colors[4], label='DDM')
plt.fill_between(np.arange(1000)*500, best_ddm_pol_mean-best_ddm_pol_std, best_ddm_pol_mean+best_ddm_pol_std, color=colors[4], alpha=0.3)

plt.plot(np.arange(1000)*500, best_ode_pol_mean, lw=3, color=colors[5], label='ODE')
plt.fill_between(np.arange(1000)*500, best_ode_pol_mean-best_ode_pol_std, best_ode_pol_mean+best_ode_pol_std, color=colors[5], alpha=0.3)

plt.plot(np.arange(1000)*500, best_rnn_pol_mean, lw=3, color=colors[6], label='RNN')
plt.fill_between(np.arange(1000)*500, best_rnn_pol_mean-best_rnn_pol_std, best_rnn_pol_mean+best_rnn_pol_std, color=colors[6], alpha=0.3)


# plt.title('Policy Comparison between Embedding Approaches', fontsize=18)
plt.xlabel("Iterations", fontsize=24)
plt.ylabel("WIS Return", fontsize=24)
plt.yticks(fontsize=20)
plt.legend(loc=2,fontsize=18)
plt.ylim([0.2, 1.0])
plt.grid()
plt.xticks(fontsize=20)
plt.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
plt.show()