In [1]:
import os

import gym
import numpy as np
import pandas as pd
import seaborn as sns

from ray import init, rllib, tune, shutdown

In [2]:
from attack_simulator.agents import DEFENDERS
from attack_simulator.env import AttackSimulationEnv
from attack_simulator.graph import AttackGraph, SIZES

In [3]:
class AgentPolicy(rllib.policy.Policy):
    def __init__(self, observation_space, action_space, config):
        super().__init__(observation_space, action_space, config)
        agent_config = dict(
            input_dim=observation_space.shape[0],
            num_actions=action_space.n,
            random_seed=config["seed"],
            attack_graph=config["env_config"]["attack_graph"],
        )
        self._agent = DEFENDERS[config["agent_type"]](agent_config)

    def compute_actions(self, observations, *args, **kwargs):
        # FIXME: use a `numpy` array as a temporary workaround for
        #        https://github.com/ray-project/ray/issues/10100
        return np.array([self._agent.act(obs) for obs in observations]), [], {}

    def get_weights(self):
        return {}

    def set_weights(self, weights):
        pass


def template_agent(agent_type):
    default_config = rllib.agents.trainer.with_common_config(dict(agent_type=agent_type))
    return rllib.agents.trainer_template.build_trainer(
        name=agent_type,
        default_policy=AgentPolicy,
        default_config=default_config,
    )

In [4]:
from copy import deepcopy

class AttackSimAlphaZeroEnv(AttackSimulationEnv):
    def set_state(self, state):
        (
            self.episode_count,
            self.episode_id,
            self.simulation_time,
            self.ttc_remaining,
            self.rewards,
            self.attack_surface,
            self.attack_state,
            self.service_state,
            self._observation,
            self.seed,
            self.rng,
            self.action,
            self.attack_index,
            self.reward,
            self.done,
            self.compromised_steps,
            self.compromised_flags
        ) = deepcopy(state)

    def get_state(self):
        state = (
            self.episode_count,
            self.episode_id,
            self.simulation_time,
            self.ttc_remaining,
            self.rewards,
            self.attack_surface,
            self.attack_state,
            self.service_state,
            self._observation,
            self.seed,
            self.rng,
            getattr(self, 'action', None),
            getattr(self, 'attack_index', None),
            getattr(self, 'reward', None),
            getattr(self, 'done', None),
            getattr(self, 'compromised_steps', None),
            getattr(self, 'compromised_flags', None),
        )
        return deepcopy(state)
    

class AlphaZeroWrapper(gym.Env):
    def __init__(self, config):
        self.env = config["env_class"](config)
        self.action_space = self.env.action_space
        assert isinstance(
            self.action_space, gym.spaces.Discrete
        ), "AlphaZero requires a Discrete action space"
        shape = (self.action_space.n,)
        self.observation_space = gym.spaces.Dict(
            dict(obs=self.env.observation_space, action_mask=gym.spaces.Box(0, 1, shape))
        )
        self.reward = 0
        self.mask = np.full(shape, 1, dtype="int8")

    def reset(self):
        self.reward = 0
        observation = self.env.reset()
        return dict(obs=observation, action_mask=self.mask)

    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        self.reward += reward
        reward = self.reward if done else 0
        return dict(obs=observation, action_mask=self.mask), reward, done, info

    def set_state(self, state):
        env_state, self.reward = state
        self.env.set_state(env_state)
        return dict(obs=self.env.observation, action_mask=self.mask)

    def get_state(self):
        return self.env.get_state(), self.reward

    def close(self):
        self.env.close()

    def render(self, mode=None):
        self.env.render(mode)

    def seed(self, seed=None):
        self.env.seed(seed)

In [5]:
# 'contrib/AlphaZero' does NOT appear to work without its custom dense model
from ray.rllib.contrib.alpha_zero.models.custom_torch_models import DenseModel

rllib.models.ModelCatalog.register_custom_model("alpha_zero_dense_model", DenseModel)

In [6]:
if os.path.isdir("/var/run/secrets/kubernetes.io"):  # inside k8s pod
    args = dict(address="auto")
else:
    # listen on all interfaces inside a container for port-forwarding to work
    dashboard_host = "0.0.0.0" if os.path.exists("/.dockerenv") else "127.0.0.1"
    args = dict(num_cpus=4, dashboard_host=dashboard_host)

# ALTERNATIVE: use the "Ray client" to connect to a remote cluster
# Unfortunately, JupyterNotebookReporter displays an object reference
# <IPython.core.display.HTML object> instead of content...
# --- --- ---
#
# from ray.util.client import worker
#
# worker.INITIAL_TIMEOUT_SEC = worker.MAX_TIMEOUT_SEC = 1
#
# ray_client_server = 'host.docker.internal' if os.path.exists("/.dockerenv") else '127.0.0.1'
# try:
#     init(address=f'ray://{ray_client_server}:10001')
# except Connection Error:
#     pass  # TODO: try something else...

In [7]:
agent_types = ["contrib/AlphaZero", "R2D2", "rule-based", "random"]
graphs = [AttackGraph(dict(graph_size=size)) for size in SIZES]
seeds = [0, 1, 2, 3, 6, 7, 11, 28, 42, 1337]
iterations = 10
rollouts = 10

common_config = dict(
    # log_level='DEBUG',
    framework="torch",
    env=AttackSimulationEnv,
    env_config=dict(attack_graph=tune.grid_search(graphs)),
    seed=tune.grid_search(seeds),
    # common evaluation settings
    evaluation_num_workers=1,
    evaluation_config=dict(
        explore=False,
        # workaround for a bug in RLLib (https://github.com/ray-project/ray/issues/17921)
        replay_sequence_length=-1,
    ),
    evaluation_num_episodes=rollouts,
)
train_and_eval_config = dict(
    common_config,
    model=dict(use_lstm=True),
    num_workers=4,
    # evaluation at the end
    evaluation_interval=iterations,
)
eval_only_config = dict(
    common_config,
    # evaluation ONLY: avoid MultiGPU optimizer, set all relevant sizes to 0
    simple_optimizer=True,
    num_workers=0,
    train_batch_size=0,
    rollout_fragment_length=0,
    timesteps_per_iteration=0,
    # evaluation at the end
    evaluation_interval=1,
)

AttackGraph(en2720.yaml[tiny], 2 services, 7 attack steps)
AttackGraph(en2720.yaml[small], 5 services, 27 attack steps)
AttackGraph(en2720.yaml[medium-small], 9 services, 44 attack steps)
AttackGraph(en2720.yaml[medium], 13 services, 56 attack steps)
AttackGraph(en2720.yaml[large], 16 services, 62 attack steps)
AttackGraph(en2720.yaml[extra-large], 16 services, 65 attack steps)
AttackGraph(en2720.yaml[full], 18 services, 78 attack steps)


In [8]:
rename = {
    "config.env_config.agent_type": "Agent",
    "config.env_config.attack_graph": "graph",
    "evaluation.hist_stats.episode_reward": "returns",
    "evaluation.hist_stats.episode_lengths": "lengths",
}

def postprocess(results_df):
    df = results_df[rename.keys()].rename(columns=rename)
    df.dropna(inplace=True)  # remove `NaN` evaluation results from failed trials 
    df["Graph size"] = df["graph"].apply(lambda g: g.num_attacks)
    del df["graph"]
    df["tuple"] = df.apply(lambda t: list(zip(t.returns, t.lengths)), axis="columns")
    del df["returns"]
    del df["lengths"]
    df = df.explode("tuple", ignore_index=True)
    df[["Returns", "Episode lengths"]] = df["tuple"].tolist()
    del df["tuple"]
    return df

In [9]:
def generate(savename):
    init(**args)

    frames = []
    for agent_type in agent_types:
        if agent_type in DEFENDERS:
            agent = template_agent(agent_type)
            config = eval_only_config
            stop = dict(training_iteration=0)
        else:
            agent = agent_type
            config = train_and_eval_config
            if agent_type == "contrib/AlphaZero":
                config["env_config"].update(env_class=AttackSimAlphaZeroEnv)
                config.update(
                    env=AlphaZeroWrapper,
                    model=dict(custom_model="alpha_zero_dense_model"),
                    rollout_fragment_length=32,
                    train_batch_size=640,
                    sgd_minibatch_size=64,
                    num_sgd_iter=8,
                )
            stop = dict(training_iteration=iterations)  # TODO: additional stopping criteria?

        config["env_config"].update(agent_type=agent_type)
        results = tune.run(
            agent,
            config=config,
            stop=stop,
            max_failures=3,
            queue_trials=True,
            raise_on_failed_trial=False,
            progress_reporter=tune.JupyterNotebookReporter(overwrite=True),
        )
        frames.append(results.results_df)

    shutdown()
    results_df = pd.concat(frames, ignore_index=True)
    df = postprocess(results_df)
    df.to_csv(savename)
    return df

In [10]:
savename = "returns-tune-train+eval.csv"

df = generate(savename) if not os.path.exists(savename) else pd.read_csv(savename, index_col=0)

Trial name,status,loc,env_config/attack_graph,seed
R2D2_AlphaZeroWrapper_bd42f_00000,ERROR,,"AttackGraph(en2720.yaml[tiny], 2 services, 7 attack steps)",0
R2D2_AlphaZeroWrapper_bd42f_00001,ERROR,,"AttackGraph(en2720.yaml[small], 5 services, 27 attack steps)",0
R2D2_AlphaZeroWrapper_bd42f_00002,ERROR,,"AttackGraph(en2720.yaml[medium-small], 9 services, 44 attack steps)",0
R2D2_AlphaZeroWrapper_bd42f_00003,ERROR,,"AttackGraph(en2720.yaml[medium], 13 services, 56 attack steps)",0
R2D2_AlphaZeroWrapper_bd42f_00004,ERROR,,"AttackGraph(en2720.yaml[large], 16 services, 62 attack steps)",0
R2D2_AlphaZeroWrapper_bd42f_00005,ERROR,,"AttackGraph(en2720.yaml[extra-large], 16 services, 65 attack steps)",0
R2D2_AlphaZeroWrapper_bd42f_00006,ERROR,,"AttackGraph(en2720.yaml[full], 18 services, 78 attack steps)",0
R2D2_AlphaZeroWrapper_bd42f_00007,ERROR,,"AttackGraph(en2720.yaml[tiny], 2 services, 7 attack steps)",1
R2D2_AlphaZeroWrapper_bd42f_00008,ERROR,,"AttackGraph(en2720.yaml[small], 5 services, 27 attack steps)",1
R2D2_AlphaZeroWrapper_bd42f_00009,ERROR,,"AttackGraph(en2720.yaml[medium-small], 9 services, 44 attack steps)",1

Trial name,# failures,error file
R2D2_AlphaZeroWrapper_bd42f_00000,4,"/root/ray_results/R2D2/R2D2_AlphaZeroWrapper_bd42f_00000_0_attack_graph=AttackGraph(en2720.yaml[tiny], 2 services, 7 attack steps),seed=0_2021-09-19_18-09-33/error.txt"
R2D2_AlphaZeroWrapper_bd42f_00001,4,"/root/ray_results/R2D2/R2D2_AlphaZeroWrapper_bd42f_00001_1_attack_graph=AttackGraph(en2720.yaml[small], 5 services, 27 attack steps),seed=0_2021-09-19_18-09-35/error.txt"
R2D2_AlphaZeroWrapper_bd42f_00002,4,"/root/ray_results/R2D2/R2D2_AlphaZeroWrapper_bd42f_00002_2_attack_graph=AttackGraph(en2720.yaml[medium-small], 9 services, 44 attack steps),seed=0_2021-09-19_18-09-35/error.txt"
R2D2_AlphaZeroWrapper_bd42f_00003,4,"/root/ray_results/R2D2/R2D2_AlphaZeroWrapper_bd42f_00003_3_attack_graph=AttackGraph(en2720.yaml[medium], 13 services, 56 attack steps),seed=0_2021-09-19_18-09-35/error.txt"
R2D2_AlphaZeroWrapper_bd42f_00004,4,"/root/ray_results/R2D2/R2D2_AlphaZeroWrapper_bd42f_00004_4_attack_graph=AttackGraph(en2720.yaml[large], 16 services, 62 attack steps),seed=0_2021-09-19_18-09-48/error.txt"
R2D2_AlphaZeroWrapper_bd42f_00005,4,"/root/ray_results/R2D2/R2D2_AlphaZeroWrapper_bd42f_00005_5_attack_graph=AttackGraph(en2720.yaml[extra-large], 16 services, 65 attack steps),seed=0_2021-09-19_18-09-50/error.txt"
R2D2_AlphaZeroWrapper_bd42f_00006,4,"/root/ray_results/R2D2/R2D2_AlphaZeroWrapper_bd42f_00006_6_attack_graph=AttackGraph(en2720.yaml[full], 18 services, 78 attack steps),seed=0_2021-09-19_18-10-01/error.txt"
R2D2_AlphaZeroWrapper_bd42f_00007,4,"/root/ray_results/R2D2/R2D2_AlphaZeroWrapper_bd42f_00007_7_attack_graph=AttackGraph(en2720.yaml[tiny], 2 services, 7 attack steps),seed=1_2021-09-19_18-10-04/error.txt"
R2D2_AlphaZeroWrapper_bd42f_00008,4,"/root/ray_results/R2D2/R2D2_AlphaZeroWrapper_bd42f_00008_8_attack_graph=AttackGraph(en2720.yaml[small], 5 services, 27 attack steps),seed=1_2021-09-19_18-10-15/error.txt"
R2D2_AlphaZeroWrapper_bd42f_00009,4,"/root/ray_results/R2D2/R2D2_AlphaZeroWrapper_bd42f_00009_9_attack_graph=AttackGraph(en2720.yaml[medium-small], 9 services, 44 attack steps),seed=1_2021-09-19_18-10-18/error.txt"


2021-09-19 18:17:21,859	ERROR tune.py:557 -- Trials did not complete: [R2D2_AlphaZeroWrapper_bd42f_00000, R2D2_AlphaZeroWrapper_bd42f_00001, R2D2_AlphaZeroWrapper_bd42f_00002, R2D2_AlphaZeroWrapper_bd42f_00003, R2D2_AlphaZeroWrapper_bd42f_00004, R2D2_AlphaZeroWrapper_bd42f_00005, R2D2_AlphaZeroWrapper_bd42f_00006, R2D2_AlphaZeroWrapper_bd42f_00007, R2D2_AlphaZeroWrapper_bd42f_00008, R2D2_AlphaZeroWrapper_bd42f_00009, R2D2_AlphaZeroWrapper_bd42f_00010, R2D2_AlphaZeroWrapper_bd42f_00011, R2D2_AlphaZeroWrapper_bd42f_00012, R2D2_AlphaZeroWrapper_bd42f_00013, R2D2_AlphaZeroWrapper_bd42f_00014, R2D2_AlphaZeroWrapper_bd42f_00015, R2D2_AlphaZeroWrapper_bd42f_00016, R2D2_AlphaZeroWrapper_bd42f_00017, R2D2_AlphaZeroWrapper_bd42f_00018, R2D2_AlphaZeroWrapper_bd42f_00019, R2D2_AlphaZeroWrapper_bd42f_00020, R2D2_AlphaZeroWrapper_bd42f_00021, R2D2_AlphaZeroWrapper_bd42f_00022, R2D2_AlphaZeroWrapper_bd42f_00023, R2D2_AlphaZeroWrapper_bd42f_00024, R2D2_AlphaZeroWrapper_bd42f_00025, R2D2_AlphaZeroWrapp

KeyError: 'trial_id'

In [None]:
sns.set(style="darkgrid", rc={"figure.figsize": (12, 8)})

In [None]:
g = sns.lineplot(data=df, x="Graph size", y="Returns", hue="Agent", ci="sd")
g.legend(title="Agent", loc="upper left")
g.set_title("Returns vs Size (random attacker)")

In [None]:
g = sns.lineplot(data=df, x="Graph size", y="Episode lengths", hue="Agent", ci="sd")
g.legend(title="Agent", loc="upper left")
g.set_title("Episode lengths vs Size (random attacker)")

In [None]:
# pd.set_option("display.max_columns", 32)
df.groupby("Agent").describe()

