In [None]:
%run ../robustness_analysis.ipynb

## Define the environment

In [None]:
# Create a mutated environment
from gym.envs.classic_control import CartPoleEnv
from types import SimpleNamespace

class MutatedCartPoleEnv(CartPoleEnv):
    def __init__(self, masscart = 1.0, masspole = 0.1, length = 0.5, force_mag = 10.0):
        super().__init__()
        
        self.spec = SimpleNamespace()
        self.spec.id = f"MutatedCartPole-{masscart:.3f}-{masspole:.3f}-{length:.3f}-{force_mag:.3f}"
        
        self.gravity = 9.8
        self.masscart = masscart
        self.masspole = masspole
        self.total_mass = self.masspole + self.masscart
        self.length = length  # actually half the pole's length
        self.polemass_length = self.masspole * self.length
        self.force_mag = force_mag
        self.tau = 0.02  # seconds between state updates
    
    def reset_to(self, state, seed=None):
        self.seed(seed)
        self.state = state
        self.steps_beyond_done = None
        return np.array(self.state, dtype=np.float32)

In [None]:
masscart = [0.1, 2.0]
force_mag = [1.0, 20.0]

# define the range of the deviation parameters
dev_bounds = np.asarray([masscart, force_mag])

# define the range of the initial states
x0_bounds = np.repeat([[-0.05, 0.05]], 4, axis=0)

# define the deviation parameter value for the original environment (no deviation)
delta_0 = np.asarray([
    1.0, # default masscart (not normalized)
    10.0 # default force_mag
])

In [None]:
def env_builder(delta):
    return MutatedCartPoleEnv(masscart=delta[0], force_mag=delta[1]), x0_bounds

## Define the agent

In [None]:
from stable_baselines3 import DQN

model_name = 'best_dqn'
model = DQN.load(model_name)

agent = {
    # define the action generation function of the given agent
    'next_action': lambda obs: model.predict(obs, deterministic=True)[0],
    # some agents (like PID) needs to be reset for each run
    'model_reset': None
}

## Define the STL property

In [None]:
obs_space = MutatedCartPoleEnv().observation_space
pos_range = np.asarray([obs_space.low[0], obs_space.high[0]])
angle_range = np.asarray([obs_space.low[2], obs_space.high[2]])

def stl_prop():
    pos = stl.Predicate('pos')
    angle = stl.Predicate('angle')

    pos_threshold = normalize(2.4, pos_range)
    angle_threshold = normalize(12 * 2 * np.pi / 360, angle_range)

    return stl.Always( (pos < pos_threshold) & (angle < angle_threshold) )


def signal_builder(record, time_index):
    return {
        "pos": stl.Signal(
            normalize(np.abs(record[:, 0]), pos_range),
            time_index
        ),
        "angle": stl.Signal(
            normalize(np.abs(record[:, 2]), angle_range),
            time_index
        )
    }

def episode_eval(obs_record, reward_record):
    return compute_stl_rob(stl_prop(), signal_builder, obs_record)

## Define distance metric

In [None]:
# l-2 norm distance
def dist(delta, delta_0):
    delta = normalize(delta, dev_bounds)
    delta_0 = normalize(delta_0, dev_bounds)
    return np.sqrt(np.sum((delta - delta_0) ** 2))

## Start analysis

In [None]:
if not os.path.exists('gifs'):
    os.mkdir('gifs')
if not os.path.exists('data'):
    os.mkdir('data')

In [None]:
from datetime import datetime

In [None]:
analysis = RobustnessAnalysisCMA2(env_builder, agent, episode_eval, delta_0, dev_bounds, dist)
analysis.options['deviation_restarts'] = 2
analysis.options['deviation_sigma'] = 0.2
analysis.options['deviation_evals'] = 100
analysis.options['falsification_sigma'] = 0.4
analysis.options['falsification_timeout'] = 1
analysis.options['falsification_restarts'] = 0
analysis.options

In [None]:
start = datetime.now()
delta, delta_dist = analysis.any_unsafe_deviation()
print('Time to find any unsafe deviation:', datetime.now() - start)
print(delta, delta_dist)

In [None]:
analysis.visualize_deviation(delta, "gifs/any_counterexample.gif")

In [None]:
start = datetime.now()
delta, delta_dist, _ = analysis.min_unsafe_deviation()
print('Time to find min unsafe deviation:', datetime.now() - start)
print(delta, delta_dist)

In [None]:
analysis.visualize_deviation(delta, "gifs/min_counterexample.gif")

In [None]:
boundary = delta_dist

In [None]:
ax, masses, forces, grid_data = analysis.grid_plot(masscart, force_mag, 25, 25, x_name="Masses", y_name="Forces",
                                                   z_name="STL Satisfaction")
ax.view_init(30, -125)
plt.savefig('gifs/robustness-landscape.png', bbox_inches='tight')
plt.show()

In [None]:
analysis.heatmap(masscart, force_mag, 25, 25, x_name="Masses", y_name="Forces", z_name="STL Satisfaction")
plt.savefig('gifs/robustness-landscape-heatmap.png', bbox_inches='tight')
plt.show()

## Compute robustness boundary

In [None]:
ax, masses, forces, grid_data = analysis.grid_plot(masscart, force_mag, 25, 25, x_name="Masses", y_name="Forces",
                                                   z_name='STL Satisfaction', boundary=boundary)
ax.view_init(30, -125)
plt.savefig('gifs/robustness-landscape-boundary.png', bbox_inches='tight')
plt.show()

In [None]:
analysis.heatmap(masscart, force_mag, 25, 25, x_name="Masses", y_name="Forces", z_name="STL Satisfaction",
                 boundary=boundary)
plt.savefig('gifs/robustness-landscape-boundary-heatmap.png', bbox_inches='tight')
plt.show()

## Try out different system evaluation function

In [None]:
analysis = RobustnessAnalysis(env_builder, agent, episode_eval, delta_0, dev_bounds, dist)
analysis.options['deviation_restarts'] = 2
analysis.options['deviation_sigma'] = 0.2
analysis.options['deviation_evals'] = 100
analysis.options['falsification_sigma'] = 0.4
analysis.options['falsification_timeout'] = 1
analysis.options['falsification_restarts'] = 0
analysis.options

In [None]:
def run_data(analysis):
    dists, times = [], []
    for _ in range(10):
        start = datetime.now()
        _, dist, Xss = analysis.min_unsafe_deviation()
        
        for i in range(analysis.options['deviation_restarts']+1):
            analysis.heatmap(masscart, force_mag, 25, 25, x_name="Masses", y_name="Forces", z_name="STL Satisfaction")
            plt.scatter(Xss[i][:, 0] * 24, Xss[i][:, 1] * 24, c=np.arange(len(Xss[i])), cmap='Greys')
            plt.show()
    
        dists.append(dist)
        times.append(datetime.now() - start)
    return np.array(dists), np.array(times)

In [None]:
dists1, times1 = run_data(analysis)

In [None]:
analysis2 = RobustnessAnalysisCMA2(env_builder, agent, episode_eval, delta_0, dev_bounds, dist)
analysis2.options['deviation_restarts'] = 2
analysis2.options['deviation_sigma'] = 0.2
analysis2.options['deviation_evals'] = 100
analysis2.options['falsification_sigma'] = 0.4
analysis2.options['falsification_timeout'] = 1
analysis2.options['falsification_restarts'] = 0
analysis2.options

In [None]:
dists2, times2 = run_data(analysis2)

In [None]:
plt.boxplot([dists1, dists2])

In [None]:
plt.boxplot([[x.total_seconds() for x in times1], [x.total_seconds() for x in times2]])

## Use reward as the property evaluation function

In [None]:
def episode_eval_reward(obs_record, reward_record):
    return reward_record.sum() - 200

In [None]:
analysis_reward = RobustnessAnalysis(env_builder, agent, episode_eval_reward, delta_0, dev_bounds, dist)
analysis_reward.options['falsification_num_trials'] = 1

In [None]:
ax, masses, forces, grid_data = analysis_reward.grid_plot(masscart, force_mag, 25, 25, out_dir="data_reward",
                                                          x_name="masses", y_name="forces", z_name='Reward')
ax.set_xlabel('Mass', fontsize=13)
ax.set_ylabel('Force magnitude', fontsize=13)
ax.view_init(30, -125)
plt.savefig('gifs/robustness-landscape-reward.png')
plt.show()