In [1]:
import os

import seaborn as sns

from ray import init, rllib, tune, shutdown

In [2]:
from attack_simulator.agents import DEFENDERS
from attack_simulator.env import AttackSimulationEnv
from attack_simulator.graph import AttackGraph, SIZES

In [3]:
import gym
import numpy as np

class BooleanVectorPreprocessor(rllib.models.preprocessors.Preprocessor):
    def _init_shape(self, observation_space, options=None):
        return (len(observation_space.spaces),)

    def transform(self, observation):
        return np.array(observation)
    
    @property
    def observation_space(self):
        space = gym.spaces.Box(0, 1, self.shape, dtype='int8')
        space.original_space = self._obs_space
        return space

rllib.models.ModelCatalog.register_custom_preprocessor('boolean_vector', BooleanVectorPreprocessor)


class AgentPolicy(rllib.policy.Policy):
    def __init__(self, observation_space, action_space, config):
        super().__init__(observation_space, action_space, config)
        agent_config = dict(
            input_dim=observation_space.shape[0], # same as len(observation_space.original_space.spaces)
            num_actions=action_space.n,
            random_seed=config['seed'],
            attack_graph=config['env_config']['attack_graph'],
        )
        self._agent = DEFENDERS[config['agent_type']](agent_config)

    def compute_actions(self, observations, *args, **kwargs):
        return [self._agent.act(obs) for obs in observations], [], {}

    def get_weights(self):
        return {}

    def set_weights(self, weights):
        pass
    
    
def instantiate_agent(agent_type, config):
    default_config = rllib.agents.trainer.with_common_config(
        dict(config, agent_type=agent_type, env_class=config['env'])  # , model=dict(custom_preprocessor='boolean_vector'))
    )
    return rllib.agents.trainer_template.build_trainer(
        name=agent_type,
        default_policy=AgentPolicy,
        default_config=default_config,
    )(config=config)

In [4]:
from copy import deepcopy

class AlphaZeroWrapper(gym.Env):
    def __init__(self, config):
        self.env = config['env_class'](config)
        self.action_space = self.env.action_space
        assert isinstance(self.action_space, gym.spaces.Discrete), 'AlphaZero requires a Discrete action space'
        shape = (self.action_space.n,)
        self.observation_space = gym.spaces.Dict(dict(obs=self.env.observation_space, action_mask=gym.spaces.Box(0, 1, shape)))
        self.reward = 0
        self.mask = np.full(shape, 1, dtype='int8')
        
    def reset(self):
        self.reward = 0
        observation = self.env.reset()
        return dict(obs=observation, action_mask=self.mask)

    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        self.reward += reward
        reward = self.reward if done else 0
        return dict(obs=observation, action_mask=self.mask), reward, done, info

    def set_state(self, state):
        env, self.reward = state
        self.env = deepcopy(env)
        return dict(obs=self.env.observation, action_mask=self.mask)

    def get_state(self):
        return deepcopy(self.env), self.reward
    
    def close(self):
        self.env.close()
        
    def render(self, mode=None):
        self.env.render(mode)
        
    def seed(self, seed=None):
        self.env.seed(seed)

In [5]:
# 'contrib/AlphaZero' does NOT appear to work without its custom dense model
from ray.rllib.contrib.alpha_zero.models.custom_torch_models import DenseModel
rllib.models.ModelCatalog.register_custom_model('alpha_zero_dense_model', DenseModel)

In [6]:
import pandas as pd

class RolloutAggregator:
    def __init__(self, **kwargs):
        self._kwargs = kwargs
        self._episodes = []

    def begin_rollout(self):
        self._rewards = []

    def append_step(self, obs, action, next_obs, reward, done, info):
        self._rewards.append(reward)

    def end_rollout(self):
        self._episodes.append(
            dict(self._kwargs, episode_length=len(self._rewards), episode_reward=sum(self._rewards))
        )
    
    def to_df(self):
        return pd.DataFrame(self._episodes)

In [7]:
if os.path.isdir('/var/run/secrets/kubernetes.io'):  # inside k8s pod
    init(address='auto')
else:
    # listen on all interfaces inside a container for port-forwarding to work
    dashboard_host = '0.0.0.0' if os.path.exists('/.dockerenv') else '127.0.0.1'
    init(num_cpus=4, dashboard_host=dashboard_host)

# ALTERNATIVE: use the "Ray client" to connect to a remote cluster
# Unfortunately, JupyterNotebookReporter displays an object reference
# <IPython.core.display.HTML object> instead of content...
# --- --- ---
#
# from ray.util.client import worker
#
# worker.INITIAL_TIMEOUT_SEC = worker.MAX_TIMEOUT_SEC = 1
#
# ray_client_server = 'host.docker.internal' if os.path.exists("/.dockerenv") else '127.0.0.1'
# try:
#     init(address=f'ray://{ray_client_server}:10001')
# except Connection Error:
#     pass  # TODO: try something else...

2021-09-15 11:47:16,462	INFO worker.py:825 -- Connecting to existing Ray cluster at address: 10.28.0.130:6379


In [8]:
from ray.rllib.rollout import rollout
from tqdm import tqdm

agent_types = ['R2D2', 'contrib/AlphaZero', 'rule-based', 'random']
seeds = [0, 1, 2, 3, 6, 7, 11, 28, 42, 1337]
training_iterations = 10
rollouts = 10

def generate(savename):
    frames = []

    for graph_size in SIZES:
        graph = AttackGraph(dict(graph_size=graph_size))

        for seed in seeds:
            for agent_type in agent_types:
                config = dict(
                    framework='torch',
                    model=dict(use_lstm=True),
                    env=AttackSimulationEnv,
                    env_config=dict(attack_graph=graph),
                    seed=seed,
                    create_env_on_driver=True,  # apparently, assumed by `rollout`
                    num_workers=8,
                    batch_mode='complete_episodes',
                    # log_level='DEBUG',
                )
                if agent_type in DEFENDERS:
                    agent = instantiate_agent(agent_type, config)
                else:
                    if agent_type == 'contrib/AlphaZero':
                        config['env_config'].update(env_class=config['env'])
                        config.update(env=AlphaZeroWrapper, model=dict(custom_model='alpha_zero_dense_model'))
                    agent_class = rllib.agents.registry.get_trainer_class(agent_type)
                    print(agent_class, config)
                    agent = agent_class(config=config)
                    name = f'{agent_type.split("/")[-1]}_{graph_size}_{seed}'
                    if os.path.exists(name):
                        checkpoint_path = tune.utils.trainable.TrainableUtil.get_checkpoints_paths(name).chkpt_path[0]
                        agent.restore(checkpoint_path)
                    else:
                        pbar = tqdm(range(training_iterations), name)
                        for _ in pbar:
                            results = agent.train()
                            # TODO: break based on results?
                        agent.save(name)

                aggregator = RolloutAggregator(agent_type=agent_type, graph_size=graph.num_attacks)
                rollout(agent, 'AttackSimulator', num_steps=0, num_episodes=rollouts, saver=aggregator)
                frames.append(aggregator.to_df())

    df = pd.concat(frames, ignore_index=True).rename(columns=dict(agent_type='Agent', graph_size='Graph size', episode_length='Episode lengths', episode_reward='Returns'))
    df.to_csv(savename)
    return df

In [9]:
#%%capture noise --no-stderr

savename = 'data.csv'

df = generate(savename) if not os.path.exists(savename) else pd.read_csv(savename, index_col=0)
df

2021-09-15 11:47:16,783	INFO trainer.py:726 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


AttackGraph(en2720.yaml[tiny], 2 services, 7 attack steps)
<class 'ray.rllib.agents.trainer_template.R2D2'> {'framework': 'torch', 'model': {'use_lstm': True}, 'env': <class 'attack_simulator.env.AttackSimulationEnv'>, 'env_config': {'attack_graph': <attack_simulator.graph.AttackGraph object at 0x7f0458768850>}, 'seed': 0, 'create_env_on_driver': True, 'num_workers': 8, 'batch_mode': 'complete_episodes'}


2021-09-15 11:47:20,775	ERROR worker.py:79 -- Unhandled error (suppress with RAY_IGNORE_UNHANDLED_ERRORS=1): [36mray::RolloutWorker.set_weights()[39m (pid=2873, ip=10.28.1.2, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7ff656914a00>)
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 832, in set_weights
    self.policy_map[pid].set_weights(w)
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/ray/rllib/agents/dqn/simple_q_torch_policy.py", line 50, in set_weights
    TorchPolicy.set_weights(self, weights)
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/ray/rllib/policy/torch_policy.py", line 707, in set_weights
    self.model.load_state_dict(weights)
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/torch/nn/mo

Episode #0: reward: 150.0
Episode #1: reward: 268.0
Episode #2: reward: 15.0
Episode #3: reward: -16704.0
Episode #4: reward: -13280.0
Episode #5: reward: -7365.0
Episode #6: reward: -17171.0
Episode #7: reward: -24690.0




Episode #8: reward: 245.0
Episode #9: reward: 25.0
<class 'ray.rllib.agents.trainer_template.AlphaZero'> {'framework': 'torch', 'model': {'custom_model': 'alpha_zero_dense_model'}, 'env': <class '__main__.AlphaZeroWrapper'>, 'env_config': {'attack_graph': <attack_simulator.graph.AttackGraph object at 0x7f0458768850>, 'env_class': <class 'attack_simulator.env.AttackSimulationEnv'>}, 'seed': 0, 'create_env_on_driver': True, 'num_workers': 8, 'batch_mode': 'complete_episodes'}


[2m[36m(pid=17973, ip=10.28.2.66)[0m 2021-09-15 11:47:25,516	ERROR worker.py:428 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=17973, ip=10.28.2.66)
[2m[36m(pid=17973, ip=10.28.2.66)[0m   File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 580, in __init__
[2m[36m(pid=17973, ip=10.28.2.66)[0m     self._build_policy_map(
[2m[36m(pid=17973, ip=10.28.2.66)[0m   File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1375, in _build_policy_map
[2m[36m(pid=17973, ip=10.28.2.66)[0m     self.policy_map.create_policy(name, orig_cls, obs_space, act_space,
[2m[36m(pid=17973, ip=10.28.2.66)[0m   File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/ray/r

RayActorError: The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=17973, ip=10.28.2.66)
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 580, in __init__
    self._build_policy_map(
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1375, in _build_policy_map
    self.policy_map.create_policy(name, orig_cls, obs_space, act_space,
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/ray/rllib/policy/policy_map.py", line 136, in create_policy
    self[policy_id] = class_(observation_space, action_space,
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/ray/rllib/contrib/alpha_zero/core/alpha_zero_trainer.py", line 138, in __init__
    model = ModelCatalog.get_model_v2(
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/ray/rllib/models/catalog.py", line 552, in get_model_v2
    raise e
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/ray/rllib/models/catalog.py", line 537, in get_model_v2
    instance = model_cls(obs_space, action_space, num_outputs,
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/ray/rllib/contrib/alpha_zero/models/custom_torch_models.py", line 102, in __init__
    in_features=obs_space.original_space["obs"].shape[0],
TypeError: 'NoneType' object is not subscriptable

In [None]:
shutdown()

In [None]:
sns.set(style='darkgrid', rc={'figure.figsize': (12, 8)})

In [None]:
g = sns.lineplot(data=df, x='Graph size', y='Returns', hue='Agent', ci='sd')
g.legend(title='Agent', loc='upper left')
g.set_title('Returns vs Size (random attacker)')

In [None]:
g = sns.lineplot(data=df, x='Graph size', y='Episode lengths', hue='Agent', ci='sd')
g.legend(title='Agent', loc='upper left')
g.set_title('Episode lengths vs Size (random attacker)')

In [None]:
pd.set_option('display.max_columns', 32)
df.groupby('Agent').describe()

In [None]:
e = AttackSimulationEnv({})

In [None]:
e.observation_space