In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pybullet_envs
import gym
from NeuralShield.AAAI21.models import loader as attack_loader
from NeuralShield.AAAI21.attack.attack import mad_pgd, mad_fgsm
from NeuralShield.Utils import loader
import numpy as np
import torch as th
th.set_num_threads(1)

In [3]:
env = loader.get_env("AntBulletEnv-v0", "ppo2_norm", reward_type=None)
env

<BulletWrapper<NormalizedObsWrapper<TimeLimit<AntBulletEnv<AntBulletEnv-v0>>>>>

In [4]:
pi = loader.get_original_policy("AntBulletEnv-v0", "ppo2_norm")
pi

<stable_baselines.ppo2.ppo2.PPO2 at 0x7f86f0daafd0>

In [5]:
actor_net = attack_loader.get_actor_net("AntBulletEnv-v0", "ppo2_norm")
actor_net

ActorNetwork(
  (actor): Sequential(
    (0): Linear(in_features=28, out_features=16, bias=True)
    (1): Tanh()
    (2): Linear(in_features=16, out_features=8, bias=True)
  )
)

In [6]:
def random_attack(obs, *args, **kwargs):
    l_inf_norm = kwargs.get("l_inf_norm")
    return obs + l_inf_norm * np.random.uniform(-1, 1)

In [7]:
def simulation(env, pi, actor_net, step_num, rollout_num, attack_fn, attack_freq, attack_kwargs):
    rewards = []
    unsafe_count = 0

    for _ in range(rollout_num):
        obs = env.reset()
        reward_sum = 0
        for _ in range(step_num):
            if np.random.random_sample() < attack_freq:
                obs = attack_fn(obs, actor_net, **attack_kwargs)
            action, _ = pi.predict(obs)
            obs, r, d, info = env.step(action)
            if r <= -100:
                unsafe_count += 1
                r = 0
            reward_sum += 3000 * r
        rewards.append(reward_sum)

    return np.mean(rewards), np.std(rewards), unsafe_count

In [8]:
simulation(env, pi, actor_net, 1000, 10, None, 0, {"l_inf_norm": 5e-1})

(2129.814543853198, 305.1361697367996, 0)

In [9]:
simulation(env, pi, actor_net, 1000, 10,
           random_attack, 10, {"l_inf_norm": 5e-1})

(1987.0139111606863, 464.5884904209216, 0)

In [10]:
simulation(env, pi, actor_net, 1000, 10, mad_fgsm,
           1, {"l_inf_norm": 5e-1, "delta": 1e-3})

(976.9753561875528, 524.198383911694, 0)

In [11]:
simulation(env, pi, actor_net, 1000, 10, mad_pgd,
           1, {"l_inf_norm": 5e-1, "lr": 1e-1})

(1565.9149084402936, 325.2067982236745, 0)

# Test parallelized simulation

In [12]:
from simulation import parallelized_simulation
import ray
ray.init(num_cpus=20, num_gpus=1)

2020-11-04 23:41:12,765	INFO resource_spec.py:212 -- Starting Ray with 128.17 GiB memory available for workers and up to 58.94 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-11-04 23:41:13,318	INFO services.py:1165 -- View the Ray dashboard at [1m[32mlocalhost:8266[39m[22m


In [13]:
parallelized_simulation("AntBulletEnv-v0", "ppo2_norm", actor_net, 1000,
                        1000, mad_fgsm, 1, {"l_inf_norm": 5e-1, "lr": 1e-1}, thread_number=20)

[2m[36m(pid=58647)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=58634)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=58631)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=58651)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=58644)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=58641)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=58637)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=58639)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=58635)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=58632)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=58646)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=58638)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=58633)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=58650)[0m pybullet build time: Sep  4 2020 23:44:26
[2m[36m(pid=58649)[0m pybullet build time: Sep  4 2020 23:4

array([1669.60624348,  141.44894213,    0.        ])