In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
from cplex.exceptions import CplexSolverError
import matplotlib
matplotlib.rcParams['figure.dpi'] = 300
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
import matplotlib.pyplot as plt
import torch.nn as nn
import numpy as np
import pickle
import os
import seaborn as sns
sns.set()

# Reacher

* Load experiment setups.

In [None]:
env_name = 'reacher'
episode_length = 300
confidence = 0.8
grid_points = 21

name = '{}-{}'.format(int(episode_length), env_name)

In [None]:
steps = int(2e6)
log_interval = int(1e4)
save_interval= int(1e5)

In [None]:
PI = np.pi
ans = []

for param_2 in range(grid_points):
    pos_2 = 2 * PI * param_2 / (grid_points - 1) - PI
    for param_1 in range(grid_points):
        pos_1 = 2 * PI * param_1 / (grid_points - 1) - PI
        
        safety = (np.abs(0.1 * np.sin(pos_1) + 0.11 * np.sin(pos_1 + pos_2)) <= 0.1) * 1.
        ans.append(safety)

ans = np.array(ans)
max_safe_set = np.sum(ans >= confidence)

In [None]:
baseline_dir = os.path.join(name, 'ddpg-initial')
baseline_step = int(0)

bl_map = np.load(os.path.join(baseline_dir, '{}-reachability-map.npz'.format(int(baseline_step))))['arr_0']

In [None]:
init_found = np.sum((bl_map <= 1. - confidence) * (ans >= confidence))
init_notsafe = np.sum((bl_map <= 1. - confidence) * (ans < confidence))
init_error = np.mean((bl_map - ans) ** 2)

* List of seeds, figure-related arguments.

In [None]:
bl_seeds = list(range(8001, 8011))
lyap_seeds = list(range(8001, 8011))
exp_seeds = list(range(8201, 8210)) + [8211]

In [None]:
fig_kwargs = {'format': 'eps',
              'dpi': 300,
              'rasterized': True,
              'bbox_inches': 'tight',
              'pad_inches': 0,
              'frameon': False,
             }
# Figsize default: (6., 4.); do not change this

## Plot approximated reachability

In [None]:
ckpts = int(steps // save_interval)
xaxis = np.array(range(0, ckpts+1)) * save_interval

In [None]:
ans = ans.reshape((ans.size,))

In [None]:
def get_stats(seeds, dir_name):

    error = []
    found = []
    notsafe = []
    cover = []

    for seed in seeds:
        map_prev = np.load(os.path.join(baseline_dir, '{}-reachability-map.npz'.format(int(baseline_step))))['arr_0']
        for i in range(1, ckpts+1):
            map_now = np.load(os.path.join(name, '{}-{}'.format(dir_name, seed),
                                           '{}-reachability-map.npz'.format(int(save_interval * i))))['arr_0']
            found.append(np.sum((map_now <= 1. - confidence) * (ans >= confidence)))
            notsafe.append( np.sum((map_now <= 1. - confidence) * (ans < confidence)))
            error.append(np.mean((map_now - ans) ** 2))
            cover.append( np.sum((map_now  <= 1. - confidence) * (map_prev <= 1. - confidence)) / np.sum(map_prev <= 1. - confidence) )

            map_prev[:] = map_now[:]
            del map_now
        del map_prev

    error = np.array(error).reshape((len(seeds), ckpts))
    found = np.array(found).reshape((len(seeds), ckpts))
    notsafe = np.array(notsafe).reshape((len(seeds), ckpts))
    cover = np.array(cover).reshape((len(seeds), ckpts))
    
    return error, found, notsafe, cover

In [None]:
b1_error, b1_found, b1_notsafe, b1_cover = get_stats(bl_seeds, 'spec-def-ddpg')# double Q, double replay

In [None]:
l1_error, l1_found, l1_notsafe, l1_cover = get_stats(lyap_seeds, 'spec-lyap-ddpg')# double Q

In [None]:
e1_error, e1_found, e1_notsafe, e1_cover = get_stats(exp_seeds, 'spec-exp-ddpg')# double Q, double replay, explorer only

#### Show the result briefly.

In [None]:
fig, axes = plt.subplots(1, 2, sharex=True, figsize=(10,3))

#### Compare with the best.
b1_mu = np.concatenate(([0], np.mean(b1_found, axis=0)), axis=0) / max_safe_set
b1_std = np.concatenate(([0], np.std(b1_found, axis=0)), axis=0) / max_safe_set
l1_mu = np.concatenate(([0], np.mean(l1_found, axis=0)), axis=0) / max_safe_set
l1_std = np.concatenate(([0], np.std(l1_found, axis=0)), axis=0) / max_safe_set
e1_mu = np.concatenate(([0], np.mean(e1_found, axis=0)), axis=0) / max_safe_set
e1_std = np.concatenate(([0], np.std(e1_found, axis=0)), axis=0) / max_safe_set

axes[0].fill_between(xaxis, b1_mu - b1_std, b1_mu + b1_std, alpha=0.25, color='teal')
axes[0].fill_between(xaxis, l1_mu - l1_std, l1_mu + l1_std, alpha=0.25, color='coral')
axes[0].fill_between(xaxis, e1_mu - e1_std, e1_mu + e1_std, alpha=0.25, color='mediumblue')
axes[0].plot(xaxis, b1_mu, label='No Lyapunov', color='teal')
axes[0].plot(xaxis, l1_mu, label='LSS', color='coral')
axes[0].plot(xaxis, e1_mu, label='ESS', color='mediumblue')

# b1_best = np.concatenate(([0], b1_found[np.argmax(b1_found[:, -1]), :]), axis=0) / max_safe_set
# l1_best = np.concatenate(([0], l1_found[np.argmax(l1_found[:, -1]), :]), axis=0) / max_safe_set
# e1_best = np.concatenate(([0], e1_found[np.argmax(e1_found[:, -1]), :]), axis=0) / max_safe_set
# axes[0].plot(xaxis, b1_best, label='No Lyapunov', color='teal')
# axes[0].plot(xaxis, l1_best, label='LSS', color='coral')
# axes[0].plot(xaxis, e1_best, label='ESS', color='mediumblue')

# axes[0].legend(ncol=3, loc='best')
axes[0].set_xlabel('Steps')
axes[0].set_title('Correct specification')
axes[0].ticklabel_format(style='sci', scilimits=(-3,4), axis='both')
axes[0].set_xlim(int(0. * save_interval), int(ckpts * save_interval))
axes[0].set_ylim(-0.05, 1.05)

#### Compare with the best.
# b1_best = np.concatenate(([0], b1_notsafe[np.argmax(b1_found[:, -1]), :]), axis=0) / np.prod(ans.shape)
# l1_best = np.concatenate(([0], l1_notsafe[np.argmax(l1_found[:, -1]), :]), axis=0) / np.prod(ans.shape)
# e1_best = np.concatenate(([0], e1_notsafe[np.argmax(e1_found[:, -1]), :]), axis=0) / np.prod(ans.shape)
# axes[1].plot(xaxis, b1_best, label='No Lyapunov', color='teal')
# axes[1].plot(xaxis, l1_best, label='LSS', color='coral')
# axes[1].plot(xaxis, e1_best, label='ESS', color='mediumblue')

b1_mu = np.concatenate(([0], np.mean(b1_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
b1_std = np.concatenate(([0], np.std(b1_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
l1_mu = np.concatenate(([0], np.mean(l1_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
l1_std = np.concatenate(([0], np.std(l1_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
e1_mu = np.concatenate(([0], np.mean(e1_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
e1_std = np.concatenate(([0], np.std(e1_notsafe, axis=0)), axis=0) / np.prod(ans.shape)

axes[1].fill_between(xaxis, b1_mu - b1_std, b1_mu + b1_std, alpha=0.25, color='teal')
axes[1].fill_between(xaxis, l1_mu - l1_std, l1_mu + l1_std, alpha=0.25, color='coral')
axes[1].fill_between(xaxis, e1_mu - e1_std, e1_mu + e1_std, alpha=0.25, color='mediumblue')
axes[1].plot(xaxis, b1_mu, label='No Lyapunov', color='teal')
axes[1].plot(xaxis, l1_mu, label='LSS', color='coral')
axes[1].plot(xaxis, e1_mu, label='ESS', color='mediumblue')

axes[1].legend(ncol=1, loc='best')
axes[1].set_xlabel('Steps')
axes[1].set_title('False positive specification')
axes[1].ticklabel_format(style='sci', scilimits=(-3,4), axis='both')
axes[1].set_xlim(int(0. * save_interval), int(ckpts * save_interval))
axes[1].set_ylim(-0.025, 0.225)

#### Print figures.

In [None]:
fig, ax = plt.subplots(1, 1, sharex=True)

#### Compare with the best.
# b1_best = np.concatenate(([0], b1_found[np.argmax(b1_found[:, -1]), :]), axis=0) / max_safe_set
# l1_best = np.concatenate(([0], l1_found[np.argmax(l1_found[:, -1]), :]), axis=0) / max_safe_set
# e1_best = np.concatenate(([0], e1_found[np.argmax(e1_found[:, -1]), :]), axis=0) / max_safe_set

# ax.plot(xaxis, b1_best, label='No Lyapunov', color='teal')
# ax.plot(xaxis, l1_best, label='LSS', color='coral')
# ax.plot(xaxis, e1_best, label='ESS', color='mediumblue')

#### Compare with the average.
b1_mu = np.concatenate(([0], np.mean(b1_found, axis=0)), axis=0) / max_safe_set
b1_std = np.concatenate(([0], np.std(b1_found, axis=0)), axis=0) / max_safe_set
# b2_mu = np.concatenate(([0], np.mean(b2_found, axis=0)), axis=0) / max_safe_set
# b2_std = np.concatenate(([0], np.std(b2_found, axis=0)), axis=0) / max_safe_set
l1_mu = np.concatenate(([0], np.mean(l1_found, axis=0)), axis=0) / max_safe_set
l1_std = np.concatenate(([0], np.std(l1_found, axis=0)), axis=0) / max_safe_set
# l2_mu = np.concatenate(([0], np.mean(l2_found, axis=0)), axis=0) / max_safe_set
# l2_std = np.concatenate(([0], np.std(l2_found, axis=0)), axis=0) / max_safe_set
e1_mu = np.concatenate(([0], np.mean(e1_found, axis=0)), axis=0) / max_safe_set
e1_std = np.concatenate(([0], np.std(e1_found, axis=0)), axis=0) / max_safe_set
# e2_mu = np.concatenate(([0], np.mean(e2_found, axis=0)), axis=0) / max_safe_set
# e2_std = np.concatenate(([0], np.std(e2_found, axis=0)), axis=0) / max_safe_set

ax.fill_between(xaxis, b1_mu - b1_std, b1_mu + b1_std, alpha=0.25, color='teal')
# ax.fill_between(xaxis, b2_mu - b2_std, b2_mu + b2_std, alpha=0.25, color='mediumseagreen')
ax.fill_between(xaxis, l1_mu - l1_std, l1_mu + l1_std, alpha=0.25, color='coral')
# ax.fill_between(xaxis, l2_mu - l2_std, l2_mu + l2_std, alpha=0.25, color='indianred')
ax.fill_between(xaxis, e1_mu - e1_std, e1_mu + e1_std, alpha=0.25, color='mediumblue')
# ax.fill_between(xaxis, e2_mu - e2_std, e2_mu + e2_std, alpha=0.25, color='slateblue')
ax.plot(xaxis, b1_mu, label='No Lyapunov', color='teal')
# ax.plot(xaxis, b2_mu, label='No Lyapunov ()', color='mediumseagreen')
ax.plot(xaxis, l1_mu, label='LSS', color='coral')
# ax.plot(xaxis, l2_mu, label='LSS ()', color='indianred')
ax.plot(xaxis, e1_mu, label='ESS', color='mediumblue')
# ax.plot(xaxis, e2_mu, label='ESS ()', color='slateblue')

ax.legend(ncol=3, loc='best')
# ax.legend(bbox_to_anchor=(1.025, 1.00), ncol=1, loc='best')
# plt.xlabel('Steps (1 step=128 samples)')
# plt.ylabel('Ratio of safe states found')
ax.set_xlabel('Steps')
ax.ticklabel_format(style='sci', scilimits=(-3,4), axis='both')
ax.set_xlim(int(0. * save_interval), int(ckpts * save_interval))
ax.set_ylim(-0.05, 1.05)
# ax.set_rasterized(True)
ax.set_rasterization_zorder(0)
fig.set_dpi(300)
fig.patch.set_alpha(0)
fig.tight_layout()
plt.savefig(os.path.join(name, '{}-spec-[safe_set]over[max_safe_set].pdf'.format(env_name)), format='pdf')

In [None]:
fig, ax = plt.subplots(1, 1, sharex=True)

#### Compare with the best.
# b1_best = np.concatenate(([0], b1_notsafe[np.argmax(b1_found[:, -1]), :]), axis=0) / np.prod(ans.shape)
# l1_best = np.concatenate(([0], l1_notsafe[np.argmax(l1_found[:, -1]), :]), axis=0) / np.prod(ans.shape)
# e1_best = np.concatenate(([0], e1_notsafe[np.argmax(e1_found[:, -1]), :]), axis=0) / np.prod(ans.shape)

# ax.plot(xaxis, b1_best, label='No Lyapunov', color='teal')
# ax.plot(xaxis, l1_best, label='LSS', color='coral')
# ax.plot(xaxis, e1_best, label='ESS', color='mediumblue')

#### Compare with the average.
b1_mu = np.concatenate(([0], np.mean(b1_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
b1_std = np.concatenate(([0], np.std(b1_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
# b2_mu = np.concatenate(([0], np.mean(b2_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
# b2_std = np.concatenate(([0], np.std(b2_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
l1_mu = np.concatenate(([0], np.mean(l1_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
l1_std = np.concatenate(([0], np.std(l1_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
# l2_mu = np.concatenate(([0], np.mean(l2_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
# l2_std = np.concatenate(([0], np.std(l2_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
e1_mu = np.concatenate(([0], np.mean(e1_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
e1_std = np.concatenate(([0], np.std(e1_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
# e2_mu = np.concatenate(([0], np.mean(e2_notsafe, axis=0)), axis=0) / np.prod(ans.shape)
# e2_std = np.concatenate(([0], np.std(e2_notsafe, axis=0)), axis=0) / np.prod(ans.shape)

ax.fill_between(xaxis, b1_mu - b1_std, b1_mu + b1_std, alpha=0.25, color='teal')
# ax.fill_between(xaxis, b2_mu - b2_std, b2_mu + b2_std, alpha=0.25, color='mediumseagreen')
ax.fill_between(xaxis, l1_mu - l1_std, l1_mu + l1_std, alpha=0.25, color='coral')
# ax.fill_between(xaxis, l2_mu - l2_std, l2_mu + l2_std, alpha=0.25, color='indianred')
ax.fill_between(xaxis, e1_mu - e1_std, e1_mu + e1_std, alpha=0.25, color='mediumblue')
# ax.fill_between(xaxis, e2_mu - e2_std, e2_mu + e2_std, alpha=0.25, color='slateblue')
ax.plot(xaxis, b1_mu, label='No Lyapunov', color='teal')
# ax.plot(xaxis, b2_mu, label='No Lyapunov ()', color='mediumseagreen')
ax.plot(xaxis, l1_mu, label='LSS', color='coral')
# ax.plot(xaxis, l2_mu, label='LSS ()', color='indianred')
ax.plot(xaxis, e1_mu, label='ESS', color='mediumblue')
# ax.plot(xaxis, e2_mu, label='ESS ()', color='slateblue')

ax.legend(ncol=3, loc='best')
# ax.legend(bbox_to_anchor=(1.025, 1.00), ncol=1, loc='best')
# plt.xlabel('Steps (1 step=128 samples)')
# plt.ylabel('Ratio of safe states found')
plt.xlabel('Steps')
ax.ticklabel_format(style='sci', scilimits=(-3,4), axis='both')
plt.xlim(int(0. * save_interval), int(ckpts * save_interval))
plt.ylim(-0.025, 0.225)
# ax.set_rasterized(True)
ax.set_rasterization_zorder(0)
fig.set_dpi(300)
fig.patch.set_alpha(0)
fig.tight_layout()
plt.savefig(os.path.join(name, '{}-spec-[false_positive_safe_set]over[state_space].pdf'.format(env_name)), format='pdf')

In [None]:
fig, ax = plt.subplots(1, 1, sharex=True)

#### Compare with the best.
# b1_best = np.concatenate(([0], b1_cover[np.argmax(b1_found[:, -1]), :]), axis=0)
# l1_best = np.concatenate(([0], l1_cover[np.argmax(l1_found[:, -1]), :]), axis=0)
# e1_best = np.concatenate(([0], e1_cover[np.argmax(e1_found[:, -1]), :]), axis=0)

# ax.plot(xaxis, b1_best, label='No Lyapunov', color='teal')
# ax.plot(xaxis, l1_best, label='LSS', color='coral')
# ax.plot(xaxis, e1_best, label='ESS', color='mediumblue')

#### Compare with the average.
# b1_cover = np.ma.array(b1_cover, mask=np.isnan(b1_cover))
# l1_cover = np.ma.array(l1_cover, mask=np.isnan(l1_cover))
# e1_cover = np.ma.array(e1_cover, mask=np.isnan(e1_cover))

# b1_mu = np.concatenate(([0], np.mean(b1_cover, axis=0)), axis=0)
# b1_std = np.concatenate(([0], np.std(b1_cover, axis=0)), axis=0)
# b2_mu = np.concatenate(([1], np.mean(b2_cover, axis=0)), axis=0)
# b2_std = np.concatenate(([0], np.std(b2_cover, axis=0)), axis=0)
# l1_mu = np.concatenate(([0], np.mean(l1_cover, axis=0)), axis=0)
# l1_std = np.concatenate(([0], np.std(l1_cover, axis=0)), axis=0)
# l2_mu = np.concatenate(([1], np.mean(l2_cover, axis=0)), axis=0)
# l2_std = np.concatenate(([0], np.std(l2_cover, axis=0)), axis=0)
# e1_mu = np.concatenate(([0], np.mean(e1_cover, axis=0)), axis=0)
# e1_std = np.concatenate(([0], np.std(e1_cover, axis=0)), axis=0)
# e2_mu = np.concatenate(([1], np.mean(e2_cover, axis=0)), axis=0)
# e2_std = np.concatenate(([0], np.std(e2_cover, axis=0)), axis=0)

# ax.fill_between(xaxis, b1_mu - b1_std, b1_mu + b1_std, alpha=0.25, color='teal')
# ax.fill_between(xaxis, b2_mu - b2_std, b2_mu + b2_std, alpha=0.25, color='mediumseagreen')
# ax.fill_between(xaxis, l1_mu - l1_std, l1_mu + l1_std, alpha=0.25, color='coral')
# ax.fill_between(xaxis, l2_mu - l2_std, l2_mu + l2_std, alpha=0.25, color='indianred')
# ax.fill_between(xaxis, e1_mu - e1_std, e1_mu + e1_std, alpha=0.25, color='mediumblue')
# ax.fill_between(xaxis, e2_mu - e2_std, e2_mu + e2_std, alpha=0.25, color='slateblue')
# ax.plot(xaxis, b1_mu, label='No Lyapunov', color='teal')
# ax.plot(xaxis, b2_mu, label='No Lyapunov ()', color='mediumseagreen')
# ax.plot(xaxis, l1_mu, label='LSS', color='coral')
# ax.plot(xaxis, l2_mu, label='LSS ()', color='indianred')
# ax.plot(xaxis, e1_mu, label='ESS', color='mediumblue')
# ax.plot(xaxis, e2_mu, label='ESS ()', color='slateblue')

ax.legend(ncol=3, loc='best')
# ax.legend(bbox_to_anchor=(1.025, 1.00), ncol=1, loc='best')
# plt.xlabel('Steps (1 step=128 samples)')
# plt.ylabel('Ratio of safe states found')
plt.xlabel('Steps')
ax.ticklabel_format(style='sci', scilimits=(-3,4), axis='both')
plt.xlim(int(0. * save_interval), int(ckpts * save_interval))
plt.ylim(0.25, 1.05)
# ax.set_rasterized(True)
ax.set_rasterization_zorder(0)
fig.set_dpi(300)
fig.patch.set_alpha(0)
fig.tight_layout()
plt.savefig(os.path.join(name, '{}-spec-[cover_ratio].pdf'.format(env_name)), format='pdf')

## Plot average episode safety during learning

In [None]:
# Loading too much data is slow...
tf_size_guidance = {
    'compressedHistograms': 10,
    'images': 0,
    'scalars': 200,
    'histograms': 1
}

ckpts = int(steps // log_interval)
xaxis = np.array(range(0, ckpts+1)) * log_interval

In [None]:
init_safety = 0.#0.42

In [None]:
def get_curves(seeds, dir_name):
    
    epi_safety = np.zeros((len(seeds), ckpts,))
    for idx in range(len(seeds)):
        dn = os.path.join(name, '{}-{}'.format(dir_name, seeds[idx]), 'tb')
        tbs = [f for f in os.listdir(dn) if os.path.isfile(os.path.join(dn, f))]
        for tb in tbs:
            event_acc = EventAccumulator(os.path.join(dn, tb), tf_size_guidance)
            event_acc.Reload()

            # Show all tags in the log file
            #print(event_acc.Tags())

            average_safety = event_acc.Scalars('train/average_safety')
            for ldx in range(len(average_safety)):
                n = average_safety[ldx][1]
                if n > steps:
                    continue
                else:
                    n = n // log_interval
                    epi_safety[idx][n-1] = average_safety[ldx][2] # value
            del event_acc
            del average_safety
            
    return epi_safety

In [None]:
b1_safety = get_curves(bl_seeds, 'spec-def-ddpg')

In [None]:
l1_safety = get_curves(lyap_seeds, 'spec-lyap-ddpg')

In [None]:
e1_safety = get_curves(exp_seeds, 'spec-exp-ddpg')

In [None]:
fig, ax = plt.subplots(1, 1, sharex=True)

b1_mu = np.concatenate(([init_safety], np.mean(b1_safety, axis=0)), axis=0)
b1_std = np.concatenate(([0], np.std(b1_safety, axis=0)), axis=0)
# b2_mu = np.concatenate(([init_safety], np.mean(b2_safety, axis=0)), axis=0)
# b2_std = np.concatenate(([0], np.std(b2_safety, axis=0)), axis=0)
l1_mu = np.concatenate(([init_safety], np.mean(l1_safety, axis=0)), axis=0)
l1_std = np.concatenate(([0], np.std(l1_safety, axis=0)), axis=0)
# l2_mu = np.concatenate(([init_safety], np.mean(l2_safety, axis=0)), axis=0)
# l2_std = np.concatenate(([0], np.std(l2_safety, axis=0)), axis=0)
e1_mu = np.concatenate(([init_safety], np.mean(e1_safety, axis=0)), axis=0)
e1_std = np.concatenate(([0], np.std(e1_safety, axis=0)), axis=0)
# e2_mu = np.concatenate(([init_safety], np.mean(e2_safety, axis=0)), axis=0)
# e2_std = np.concatenate(([0], np.std(e2_safety, axis=0)), axis=0)

ax.fill_between(xaxis, b1_mu - b1_std, b1_mu + b1_std, alpha=0.25, color='teal')
# ax.fill_between(xaxis, b2_mu - b2_std, b2_mu + b2_std, alpha=0.25, color='mediumseagreen')
ax.fill_between(xaxis, l1_mu - l1_std, l1_mu + l1_std, alpha=0.25, color='coral')
# ax.fill_between(xaxis, l2_mu - l2_std, l2_mu + l2_std, alpha=0.25, color='indianred')
ax.fill_between(xaxis, e1_mu - e1_std, e1_mu + e1_std, alpha=0.25, color='mediumblue')
# ax.fill_between(xaxis, e2_mu - e2_std, e2_mu + e2_std, alpha=0.25, color='slateblue')
ax.plot(xaxis, b1_mu, label='No Lyapunov', color='teal')
# ax.plot(xaxis, b2_mu, label='No Lyapunov ()', color='mediumseagreen')
ax.plot(xaxis, l1_mu, label='LSS', color='coral')
# ax.plot(xaxis, l2_mu, label='LSS ()', color='indianred')
ax.plot(xaxis, e1_mu, label='ESS', color='mediumblue')
# ax.plot(xaxis, e2_mu, label='ESS ()', color='slateblue')

ax.plot(xaxis, confidence * np.ones(ckpts+1), 'r--')

ax.legend(ncol=3, loc='lower right')
# ax.legend(bbox_to_anchor=(1.025, 1.00), ncol=1, loc='best')
# plt.xlabel('Steps (1 step=128 samples)')
# plt.ylabel('Average episode safety')
plt.xlabel('Steps')
ax.ticklabel_format(style='sci', scilimits=(-3,4), axis='both')
plt.xlim(log_interval, ckpts*log_interval)
plt.ylim(.45, 1.05)
# ax.set_rasterized(True)
ax.set_rasterization_zorder(0)
fig.set_dpi(300)
fig.patch.set_alpha(0)
fig.tight_layout()
plt.savefig(os.path.join(name, '{}-spec-[average_episode_safety].pdf'.format(env_name)), format='pdf')

### Get Lambda if necessary.

In [None]:
def get_lambda(seeds, dir_name):
    
    epi_safety = np.zeros((len(seeds), ckpts,))
    for idx in range(len(seeds)):
        dn = os.path.join(name, '{}-{}'.format(dir_name, seeds[idx]), 'tb')
        tbs = [f for f in os.listdir(dn) if os.path.isfile(os.path.join(dn, f))]
        for tb in tbs:
            event_acc = EventAccumulator(os.path.join(dn, tb), tf_size_guidance)
            event_acc.Reload()

            # Show all tags in the log file
            #print(event_acc.Tags())

            average_safety = event_acc.Scalars('train/exploratory_lambda')
            for ldx in range(len(average_safety)):
                n = average_safety[ldx][1]
                if n > steps:
                    continue
                else:
                    n = n // log_interval
                    epi_safety[idx][n-1] = average_safety[ldx][2] # value
            del event_acc
            del average_safety
            
    return epi_safety

In [None]:
_lambda = get_lambda(list(range(7001, 7002)), 'spec-exp-ddpg')# double Q, no target Q networks

In [None]:
fig, ax = plt.subplots(1, 1, sharex=True)

_mu = np.concatenate(([1.], np.mean(_lambda, axis=0)), axis=0)
_std = np.concatenate(([0], np.std(_lambda, axis=0)), axis=0)

ax.fill_between(xaxis, _mu - _std, _mu + _std, alpha=0.25, color='blue')
ax.plot(xaxis, _mu, label='ESS (Double Q)', color='blue')

ax.legend(bbox_to_anchor=(1.025, 1.00), ncol=1, loc='best')
# plt.xlabel('Steps (1 step=128 samples)')
# plt.ylabel('Average episode safety')
plt.xlabel('Steps')
ax.ticklabel_format(style='sci', scilimits=(-3,4), axis='both')
plt.xlim(log_interval, ckpts*log_interval)
# plt.ylim(-.05, 1.05)
ax.set_rasterized(True)
ax.set_rasterization_zorder(0)
fig.set_dpi(300)
fig.patch.set_alpha(0)
fig.tight_layout()plt.savefig(os.path.join(name, '{}-spec-[episode_lambda].eps'.format(env_name)), format='eps')

## Visualize.

In [None]:
ans = np.array(ans).reshape((grid_points, grid_points))

In [None]:
def get_reachability(name, logdir, seeds, ckpts, reshape=True, reference=None):
    reachability_list = []
    for seed in seeds:
        tmp = []
        for i in range(1, ckpts+1):
            a = np.load(os.path.join(name, '{}-{}'.format(logdir, seed),
                                     '{}-reachability-map.npz'.format(int(save_interval * i))))['arr_0']
            tmp.append(a)
            del a
        tmp = np.array(tmp)
        reachability_list.append(tmp)
    if reference is None:
        reachability_list = np.array(reachability_list).mean(0)
    else:
        idx = np.argmax(reference[:, -1])
        reachability_list = np.array(reachability_list)[idx, ...]
    if reshape:
        try:
            reachability_list = reachability_list.reshape((ckpts, grid_points, grid_points))
        except ValueError:
            print("Reshape unavailable.")
    return reachability_list

In [None]:
ckpts = int(steps // save_interval)
xaxis = save_interval * np.array(range(1, ckpts+1))#(np.array(range(1, ckpts+1))-0.5)

In [None]:
bl_list = get_reachability(name, 'spec-def-ddpg', bl_seeds, ckpts, reshape=True, reference=b1_found)
lyap_list = get_reachability(name, 'spec-lyap-ddpg', lyap_seeds, ckpts, reshape=True, reference=l1_found)
exp_list = get_reachability(name, 'spec-exp-ddpg', exp_seeds, ckpts, reshape=True, reference=e1_found)

In [None]:
idx = ckpts

In [None]:
fig, ax = plt.subplots(1,1)
# Show False-positive and True-positive altogether.
img = plt.imshow((1.-bl_list[idx-1] >= confidence) * (ans.reshape((grid_points, grid_points)) >= confidence)
                 + (1.-bl_list[idx-1] >= confidence) * (ans.reshape((grid_points, grid_points)) < confidence) * 0.5,
                 cmap='inferno', extent=[-180., +180., +180., -180.,], aspect=1.)
#img = plt.imshow(1.-bl_list[idx-1], cmap='plasma', extent=[-180., +180., +180., -180.,], aspect=1.)
ax.set_xlabel('Angle 1 (degree)')# center
ax.set_xticks(np.arange(-180., 180.+1, 60.))
ax.set_ylabel('Angle 2 (degree)')# arm tip
ax.set_yticks(np.arange(-180., 180.+1, 60.))
ax.get_yaxis().set_visible(False)

plt.clim(0., 1.)
# fig.colorbar(img)
plt.grid(False)
fig.set_dpi(300)
fig.patch.set_facecolor('none')
fig.patch.set_alpha(0)
fig.tight_layout()
ax.patch.set_facecolor('none')
ax.patch.set_alpha(0)
fig.savefig(os.path.join(name, '{}-spec-visualize-def-{}.pdf'.format(env_name, save_interval * idx)),
            format='pdf', facecolor=fig.get_facecolor(), edgecolor='none', bbox_inches='tight')

In [None]:
fig, ax = plt.subplots(1,1)
img = plt.imshow((1.-lyap_list[idx-1] >= confidence) * (ans.reshape((grid_points, grid_points)) >= confidence)
                 + (1.-lyap_list[idx-1] >= confidence) * (ans.reshape((grid_points, grid_points)) < confidence) * 0.5,
                 cmap='inferno', extent=[-180., +180., +180., -180.,], aspect=1.)
#img = plt.imshow(1.-lyap_list[idx-1], cmap='plasma', extent=[-180., +180., +180., -180.,], aspect=1.)
ax.set_xlabel('Angle 1 (degree)')# center
ax.set_xticks(np.arange(-180., 180.+1, 60.))
ax.set_ylabel('Angle 2 (degree)')# arm tip
ax.set_yticks(np.arange(-180., 180.+1, 60.))
ax.get_yaxis().set_visible(False)

plt.clim(0., 1.)
# fig.colorbar(img)
plt.grid(False)
fig.set_dpi(300)
fig.patch.set_facecolor('none')
fig.patch.set_alpha(0)
fig.tight_layout()
ax.patch.set_facecolor('none')
ax.patch.set_alpha(0)
fig.savefig(os.path.join(name, '{}-spec-visualize-lyap-{}.pdf'.format(env_name, save_interval * idx)),
            format='pdf', facecolor=fig.get_facecolor(), edgecolor='none', bbox_inches='tight')

In [None]:
fig, ax = plt.subplots(1,1)
img = plt.imshow((1.-exp_list[idx-1] >= confidence) * (ans >= confidence)
                 + (1.-exp_list[idx-1] >= confidence) * (ans < confidence) * 0.5,
                 cmap='inferno', extent=[-180., +180., +180., -180.,], aspect=1.)
#img = plt.imshow(1.-exp_list[ckpts-1], cmap='plasma', extent=[-180., +180., +180., -180.,], aspect=1.)
ax.set_xlabel('Angle 1 (degree)')# center
ax.set_xticks(np.arange(-180., 180.+1, 60.))
ax.set_ylabel('Angle 2 (degree)')# arm tip
ax.set_yticks(np.arange(-180., 180.+1, 60.))
ax.get_yaxis().set_visible(False)

plt.clim(0., 1.)
# fig.colorbar(img)
plt.grid(False)
fig.set_dpi(300)
fig.patch.set_facecolor('none')
fig.patch.set_alpha(0)
fig.tight_layout()
ax.patch.set_facecolor('none')
ax.patch.set_alpha(0)
fig.savefig(os.path.join(name, '{}-spec-visualize-exp-{}.pdf'.format(env_name, save_interval * idx)),
            format='pdf', facecolor=fig.get_facecolor(), edgecolor='none', bbox_inches='tight')

In [None]:
fig, ax = plt.subplots(1,1)
img = plt.imshow((ans >= confidence), cmap='inferno', extent=[-180., +180., +180., -180.,], aspect=1.)
#img = plt.imshow(ans, cmap='plasma', extent=[-180., +180., +180., -180.,], aspect=1.)
ax.set_xlabel('Angle 1 (degree)')# center
ax.set_xticks(np.arange(-180., 180.+1, 60.))
ax.set_ylabel('Angle 2 (degree)')# arm tip
ax.set_yticks(np.arange(-180., 180.+1, 60.))

plt.clim(0., 1.)
# fig.colorbar(img)
plt.grid(False)
fig.set_dpi(300)
fig.patch.set_facecolor('none')
fig.patch.set_alpha(0)
fig.tight_layout()
ax.patch.set_facecolor('none')
ax.patch.set_alpha(0)
plt.savefig(os.path.join(name, '{}-spec-visualize-answer.pdf'.format(env_name)),
            format='pdf', facecolor=fig.get_facecolor(), edgecolor='none', bbox_inches='tight')