In [1]:
import pandas as pd
import seaborn as sns
import numpy as np

In [2]:
from simulation.simulation import parallelized_simulation_attack_obs as parallelized_simulation
from attack.attack import mad_fgsm, mad_pgd
from NeuralShield.AAAI21.models import loader
from NeuralShield.hyperparameter import benchmarks

In [3]:
import ray
_ = ray.init(num_cpus=50, num_gpus=1)

2020-11-05 21:26:36,343	INFO resource_spec.py:212 -- Starting Ray with 156.3 GiB memory available for workers and up to 70.99 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-11-05 21:26:36,995	INFO services.py:1165 -- View the Ray dashboard at [1m[32mlocalhost:8265[39m[22m


# Attack with MAD

[paper link](https://arxiv.org/pdf/2003.08938.pdf)  

The loss function here is 
$$
\begin{aligned}
\underset{\hat{s} \in B(s)}{\arg \min } L_{\mathrm{MAD}}(\hat{s}) &=\underset{\hat{s} \in B(s)}{\arg \max } D_{\mathrm{KL}}(\pi(\cdot \mid s) \| \pi(\cdot \mid \hat{s})) \\
&=\underset{\hat{s} \in B(s)}{\arg \max }\left(\pi_{\theta_{\pi}}(s)-\pi_{\theta_{\pi}}(\hat{s})\right)^{\top} \Sigma^{-1}\left(\pi_{\theta_{\pi}}(s)-\pi_{\theta_{\pi}}(\hat{s})\right)
\end{aligned}
$$

We use FGSM and PGD, instead of SGLD mentioned in paper, but it can also reduce the reward significantly. 

In [4]:
baseline_res = {}
for env_name in benchmarks:
    if "BulletEnv" in env_name:
        for algo in benchmarks[env_name]["algos"]:
            if "a2c" in algo or "ppo" in algo:
                actor_net = loader.get_actor_net(env_name, algo)
                ret = parallelized_simulation(env_name, algo, actor_net, None, 1000,
                                        1000, None, 0, None, thread_number=50)
                baseline_res[f"{env_name}/{algo}"] = ret

[2m[36m(pid=45743)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=45717)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=45760)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=45720)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=45758)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=45755)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=45754)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=45718)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=45725)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=45736)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=45702)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=45756)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=45753)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=45738)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=45759)[0m pybullet build time: Sep  4 2020 23:4

Attack with $l_{\infty} = 0.5$, and we attack on every observation. 

In [5]:
fgsm_res = {}   
for env_name in benchmarks:
    if "BulletEnv" in env_name:
        for algo in benchmarks[env_name]["algos"]:
            if "a2c" in algo or "ppo" in algo:
                actor_net = loader.get_actor_net(env_name, algo)
                ret = parallelized_simulation(env_name, algo, actor_net, None, 1000,
                                        1000, mad_fgsm, 1, {"l_inf_norm": 5e-1}, thread_number=50)
                fgsm_res[f"{env_name}/{algo}"] = ret

In [6]:
pgd_res = {}
for env_name in benchmarks:
    if "BulletEnv" in env_name:
        for algo in benchmarks[env_name]["algos"]:
            if "a2c" in algo or "ppo" in algo:
                actor_net = loader.get_actor_net(env_name, algo)
                ret = parallelized_simulation(env_name, algo, actor_net, None, 1000,
                                        1000, mad_pgd, 1, {"l_inf_norm": 5e-1, "lr": 1e-1}, thread_number=50)
                pgd_res[f"{env_name}/{algo}"] = ret

In [8]:
baseline_df = pd.DataFrame.from_dict(baseline_res, orient="index", columns=["reward mean", "reward std", "unsafe states found"])
baseline_df.insert(3, "attack", None)

df_fgsm = pd.DataFrame.from_dict(fgsm_res, orient="index", columns=["reward mean", "reward std", "unsafe states found"])
df_fgsm.insert(3, "attack", "MAD-FGSM")

df_pgd = pd.DataFrame.from_dict(pgd_res, orient="index", columns=["reward mean", "reward std", "unsafe states found"])
df_pgd.insert(3, "attack", "MAD-PGD")

df3 = pd.concat([baseline_df, df_fgsm, df_pgd])

df3.to_csv("res/mad_obs_attack_0.5.csv")

With attacks, the reward decreased significantly, but these attacks does not cause any unsafe trajectory. 

In [10]:
df3.sort_index()

Unnamed: 0,reward mean,reward std,unsafe states found,attack
AntBulletEnv-v0/a2c_norm,2275.409203,78.214526,0.0,
AntBulletEnv-v0/a2c_norm,772.703955,64.062102,0.0,MAD-PGD
AntBulletEnv-v0/a2c_norm,895.455374,80.011208,0.0,MAD-FGSM
AntBulletEnv-v0/ppo2_norm,2203.950433,154.262919,0.0,
AntBulletEnv-v0/ppo2_norm,1628.068648,267.301124,0.0,MAD-PGD
AntBulletEnv-v0/ppo2_norm,1376.362562,296.090052,0.0,MAD-FGSM
HalfCheetahBulletEnv-v0/a2c_norm,2193.813466,84.03001,0.0,
HalfCheetahBulletEnv-v0/a2c_norm,2200.374856,83.379577,0.0,MAD-PGD
HalfCheetahBulletEnv-v0/a2c_norm,2196.015145,80.585623,0.0,MAD-FGSM
HalfCheetahBulletEnv-v0/ppo2_norm_tw,2971.039425,173.436992,0.0,MAD-PGD


Attack with $l_{\infty} = 1$, and we attack on every observation. 

In [11]:
fgsm_res = {}   
for env_name in benchmarks:
    if "BulletEnv" in env_name:
        for algo in benchmarks[env_name]["algos"]:
            if "a2c" in algo or "ppo" in algo:
                actor_net = loader.get_actor_net(env_name, algo)
                ret = parallelized_simulation(env_name, algo, actor_net, None, 1000,
                                        1000, mad_fgsm, 1, {"l_inf_norm": 1}, thread_number=50)
                fgsm_res[f"{env_name}/{algo}"] = ret

In [12]:
pgd_res = {}
for env_name in benchmarks:
    if "BulletEnv" in env_name:
        for algo in benchmarks[env_name]["algos"]:
            if "a2c" in algo or "ppo" in algo:
                actor_net = loader.get_actor_net(env_name, algo)
                ret = parallelized_simulation(env_name, algo, actor_net, None, 1000,
                                        1000, mad_pgd, 1, {"l_inf_norm": 1, "lr": 1e-1}, thread_number=50)
                pgd_res[f"{env_name}/{algo}"] = ret

In [13]:
baseline_df = pd.DataFrame.from_dict(baseline_res, orient="index", columns=["reward mean", "reward std", "unsafe states found"])
baseline_df.insert(3, "attack", None)

df_fgsm = pd.DataFrame.from_dict(fgsm_res, orient="index", columns=["reward mean", "reward std", "unsafe states found"])
df_fgsm.insert(3, "attack", "MAD-FGSM")

df_pgd = pd.DataFrame.from_dict(pgd_res, orient="index", columns=["reward mean", "reward std", "unsafe states found"])
df_pgd.insert(3, "attack", "MAD-PGD")

df3 = pd.concat([baseline_df, df_fgsm, df_pgd])

df3.to_csv("res/mad_obs_attack_1.0.csv")
df3.sort_index()

Unnamed: 0,reward mean,reward std,unsafe states found,attack
AntBulletEnv-v0/a2c_norm,2275.409203,78.214526,0.0,
AntBulletEnv-v0/a2c_norm,747.739234,61.51271,0.0,MAD-PGD
AntBulletEnv-v0/a2c_norm,668.541822,102.158943,0.0,MAD-FGSM
AntBulletEnv-v0/ppo2_norm,2203.950433,154.262919,0.0,
AntBulletEnv-v0/ppo2_norm,1560.970516,255.385187,0.0,MAD-PGD
AntBulletEnv-v0/ppo2_norm,833.033551,87.172706,0.0,MAD-FGSM
HalfCheetahBulletEnv-v0/a2c_norm,2193.813466,84.03001,0.0,
HalfCheetahBulletEnv-v0/a2c_norm,2197.209051,83.77372,0.0,MAD-PGD
HalfCheetahBulletEnv-v0/a2c_norm,2198.444688,90.911089,0.0,MAD-FGSM
HalfCheetahBulletEnv-v0/ppo2_norm_tw,2945.853646,207.171035,0.0,MAD-PGD
