In [None]:
import cma
import numpy as np
import matplotlib.pyplot as plt
from IPython import display
import time
import imageio
import os
from matplotlib import cm
%matplotlib inline

## Helper functions

In [None]:
def scale(x, bounds):
    """Scale the input numbers in [0, 1] to the range of each variable"""
    if bounds.ndim == 1:
        return bounds[0] + x * (bounds[1] - bounds[0])
    else:
        return bounds[:, 0] + x * (bounds[:, 1] - bounds[:, 0])

def normalize(x_scaled, bounds):
    if bounds.ndim == 1:
        return (x_scaled - bounds[0]) / (bounds[1] - bounds[0])
    else:
        return (x_scaled - bounds[:, 0]) / (bounds[:, 1] - bounds[:, 0])

In [None]:
import signal_tl as stl

def compute_stl_rob(phi, signal_builder, record):
    time_index = np.arange(len(record))
    signal = signal_builder(record, time_index)
    rob = stl.compute_robustness(phi, signal)
    return rob.at(0)

## Default options

In [None]:
class RobustnessAnalysis:
    def __init__(self, env_builder, agent, episode_eval, delta_0, dev_bounds, dist, options=None):
        self.env_builder = env_builder
        self.agent = agent
        self.episode_eval = episode_eval
        self.delta_0 = delta_0
        self.dev_bounds = dev_bounds
        self.dist = dist
        self.options = {    
            'epsilon': 1e-2,
            'deviation_num_trials': 3,
            'deviation_sigma': 0.2, # normalized
            'deviation_timeout': 10, # timeout in minutes for each CMA run for finding a deviation

            'falsification_sigma': 0.2,
            'falsification_timeout': 1,
            'falsification_num_trials': 3,
            'falsification_episodes': 100,

            'episodes_of_each_x0': 1,
            'steps_of_each_x0': 200,
        }
        if options is not None:
            self.options.update(options)
            
        self.cache = {}
    
    def robustness_boundary(self):
        delta, _ = self.min_unsafe_deviation()
        boundary = self.dist(delta, self.delta_0) - self.options['epsilon']
        while True:
            delta, _ = self.min_unsafe_deviation(boundary)
            if delta is None:
                break
            boundary = self.dist(delta, self.delta_0) - self.options['epsilon']
        return boundary
    
    def any_unsafe_deviation(self, boundary=None):
        delta = None
        delta_dist = np.inf
        
        num_tries = self.options['deviation_num_trials']
        sigma = self.options['deviation_sigma']
        timeout = self.options['deviation_timeout']
        
        # Use normlized value in CMA, scale it back when calling the objective or constraints function
        x0 = normalize(self.delta_0, self.dev_bounds)
        
        objective = lambda delta: self.stl_falsification_cma(delta)[0]
        if boundary is not None:
            constraints = lambda delta: [self.dist(delta, self.delta_0) - boundary]
            
            for i in range(num_tries):
                print(f'\n================ Any unsafe deviation trial {i+1} ==============>')
                
                cfun = cma.ConstrainedFitnessAL(
                    lambda x: objective(scale(x, self.dev_bounds)),
                    lambda x: constraints(scale(x, self.dev_bounds)),
                    find_feasible_first=True
                )
                _, es = cma.fmin2(
                    cfun,
                    x0,
                    sigma,
                    {'bounds': [0.0, 1.0], 'tolstagnation': 0, 'tolx': 1e-5, 'timeout': timeout * 60,
                     'ftarget': 0.0},
                    callback=cfun.update
                )
                if cfun.best_feas.info is not None:
                    delta = scale(cfun.best_feas.info['x'], self.dev_bounds)
                    delta_dist = self.dist(delta, self.delta_0)
                    break
        else:
            for i in range(num_tries):
                print(f'\n================ Any unsafe deviation trial {i+1} ==============>')
                
                _, es = cma.fmin2(
                    lambda x: objective(scale(x, self.dev_bounds)),
                    x0,
                    sigma,
                    {'bounds': [0.0, 1.0], 'tolstagnation': 0, 'tolx': 1e-5, 'timeout': timeout * 60,
                     'ftarget': 0.0}
                )
                if es.result.fbest < 0.0:
                    delta = scale(es.result.xbest, self.dev_bounds)
                    delta_dist = self.dist(delta, self.delta_0)
                    break
        
        return delta, delta_dist
            
    
    def min_unsafe_deviation(self, boundary=None):
        min_dist = np.inf
        min_delta = None

        num_tries = self.options['deviation_num_trials']
        sigma = self.options['deviation_sigma']
        timeout = self.options['deviation_timeout']

        objective = lambda delta: self.dist(delta, self.delta_0)
        if boundary is not None:
            constraints = lambda delta: [self.stl_falsification_cma(delta)[0],
                                         self.dist(delta, self.delta_0) - boundary]
        else:
            constraints = lambda delta: [self.stl_falsification_cma(delta)[0]]

        # Use normlized value in CMA, scale it back when calling the objective or constraints function
        x0 = normalize(self.delta_0, self.dev_bounds)
        for i in range(num_tries):
            print(f'\n================ Min unsafe deviation trial {i+1} ==============>')

            cfun = cma.ConstrainedFitnessAL(
                lambda x: objective(scale(x, self.dev_bounds)),
                lambda x: constraints(scale(x, self.dev_bounds)),
                find_feasible_first=True
            )
            _, es = cma.fmin2(
                cfun,
                x0,
                sigma,
                {'bounds': [0.0, 1.0], 'tolstagnation': 0, 'tolx': 1e-5, 'timeout': timeout * 60},
                callback=cfun.update
            )

            print(es.result)
            if cfun.best_feas.info is not None:
                print(cfun.best_feas.info)
                delta = scale(cfun.best_feas.info['x'], self.dev_bounds)
                delta_dist = self.dist(delta, self.delta_0)
                if delta_dist < min_dist:
                    min_dist = delta_dist
                    min_delta = delta

        return min_delta, min_dist
    
    def stl_falsification_cma(self, delta):
        sigma = self.options['falsification_sigma']
        timeout = self.options['falsification_timeout']
        num_tries = self.options['falsification_num_trials']
        max_episodes = self.options['falsification_episodes']

        env, x0_bounds = self.env_builder(delta)
        objective = lambda x: self.prop_eval(env, x)
        
        min_f = np.inf
        min_x = None
        for i in range(num_tries):
            x0 = np.random.rand(len(x0_bounds)) # random between [0, 1)

            x, es = cma.fmin2(
                lambda x: objective(scale(x, x0_bounds)),
                x0,
                sigma,
                {'bounds': [0.0, 1.0], 'maxfeval': max_episodes, 'timeout': timeout * 60, 'verbose': -9}
            )
            if es.result.fbest < min_f:
                min_f = es.result.fbest
                min_x = x
        
        env.close()
        self.cache[tuple(delta)] = (min_f, scale(min_x, x0_bounds))
        return self.cache[tuple(delta)]
    
    def prop_eval(self, env, x0):
        space = env.observation_space
        
        model_reset = self.agent['model_reset']
        next_action = self.agent['next_action']
        
        num_tries = self.options['episodes_of_each_x0']
        max_episode_steps = self.options['steps_of_each_x0']
        
        values = []
        for i in range(num_tries):
            obs = env.reset_to(x0)
            obs_record = [obs]
            reward_record = [0]
            
            if model_reset is not None:
                model_reset()
            for step in range(max_episode_steps):
                action = next_action(obs)
                obs, reward, _, _ = env.step(action)
                obs_record.append(np.clip(obs, space.low, space.high))
                reward_record.append(reward)
            
            v = self.episode_eval(np.asarray(obs_record), np.asarray(reward_record))
            values.append(v)
        
        # Use the mean robustness of multiple runs at an initial state x
        return np.asarray(values).mean()
    
    def visualize_deviation(self, delta, gif):
        value, x0 = self.cache[tuple(delta)]
        env, _ = self.env_builder(delta)
        self.visual_episode(env, x0, save_gif=gif)
        env.close()
        print("STL robustness value:", value)
        print("Initial state:", x0)
    
    def init_fig(self, env):
        plt.figure()
        plt.title(env.spec.id)
        plt.axis('off')
        return plt.imshow(env.render(mode='rgb_array'))
    
    def update_fig(self, img, env, step, reward, done, episode_measure_name, value):
        title = plt.title(
            f"{env.spec.id}\n" +
            f"Step: {step} | Reward: {reward:.3f} | Done: {done}\n" +
            f"{episode_measure_name}: {value:.3f}"
        )
        if done or value < 0:
            plt.setp(title, color='r')
        else:
            plt.setp(title, color='k')
        img.set_data(env.render(mode='rgb_array'))
        fig = plt.gcf()
        display.display(fig)
        display.clear_output(wait=True)

        return np.asarray(fig.canvas.buffer_rgba())

    def visual_episode(self, env, x0=None, visualize_in_notebook=True,
                       sleep=0.01, save_gif=None, episode_measure_name='STL'):
        if x0 is not None:
            obs = env.reset_to(x0)
        else:
            obs = env.reset()

        model_reset = self.agent['model_reset']
        next_action = self.agent['next_action']
        max_episode_steps = self.options['steps_of_each_x0']

        if model_reset is not None:
            model_reset()

        if save_gif is not None:
            gif = []
        else:
            gif = None

        # initialize figure in notebook
        if visualize_in_notebook:
            img = self.init_fig(env)

        space = env.observation_space
        total_reward = 0.0
        obs_record = [obs]
        reward_record = [0]

        for step in range(1, max_episode_steps+1):
            action = next_action(obs)
            obs, reward, done, info = env.step(action)
            total_reward += reward
            obs_record.append(np.clip(obs, space.low, space.high))
            reward_record.append(reward)

            if visualize_in_notebook:
                v = self.episode_eval(np.asarray(obs_record), np.asarray(reward_record))
                fig_data = self.update_fig(img, env, step, total_reward, done, episode_measure_name, v)
                if save_gif is not None:
                    gif.append(fig_data)
            else:
                env.render()

            if sleep > 0.0:
                time.sleep(sleep)

        if save_gif is not None:
            imageio.mimsave(save_gif, [data for data in gif], fps=10)
    
    def grid_plot(self, x_bound, y_bound, n_x, n_y, x_name="X", y_name="Y", z_name="Z",
                  grid_name="grid_data", override=False, out_dir='data', boundary=None):
        if not os.path.exists(f'{out_dir}/{grid_name}.csv') or override:
            X = np.linspace(x_bound[0], x_bound[1], n_x)
            Y = np.linspace(y_bound[0], y_bound[1], n_y)
            X, Y = np.meshgrid(X, Y, indexing='ij')

            grid_data = np.zeros((n_x, n_y))

            for i in range(n_x):
                for j in range(n_y):
                    # treat xv[i,j], yv[i,j]
                    x, y = X[i, j], Y[i, j]
                    robustness, _ = self.stl_falsification_cma([x, y])
                    grid_data[i, j] = robustness

            if not os.path.exists(out_dir):
                os.mkdir(out_dir)

            np.savetxt(f"{out_dir}/{x_name}.csv", X, delimiter=",")
            np.savetxt(f"{out_dir}/{y_name}.csv", Y, delimiter=",")
            np.savetxt(f"{out_dir}/{grid_name}.csv", grid_data, delimiter=",")
        else:
            X = np.loadtxt(f"{out_dir}/{x_name}.csv", delimiter=",")
            Y = np.loadtxt(f"{out_dir}/{y_name}.csv", delimiter=",")
            grid_data = np.loadtxt(f"{out_dir}/{grid_name}.csv", delimiter=",")

        fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(14, 14))
        
        if boundary is not None:
            mask = np.asarray([
                [self.dist([x, y], self.delta_0) > boundary for x, y in zip(xs, ys)]
                for xs, ys in zip(X, Y)
            ])
            ax.plot_surface(X, Y, np.ma.masked_where(mask, grid_data), cmap=cm.coolwarm)
            ax.plot_surface(X, Y, grid_data, cmap=cm.coolwarm, alpha=0.25)
        else:
            ax.plot_surface(X, Y, grid_data, cmap=cm.coolwarm)
        
        ax.set_zlabel(z_name, fontsize=13)
        ax.set_xlabel(x_name, fontsize=13)
        ax.set_ylabel(y_name, fontsize=13)

        return ax, X, Y, grid_data