In [1]:
import os

import gym
import numpy as np
import pandas as pd
import seaborn as sns

from ray import init, rllib, tune, shutdown

In [2]:
from attack_simulator.agents import DEFENDERS
from attack_simulator.env import AttackSimulationEnv
from attack_simulator.graph import AttackGraph, SIZES

In [3]:
class AgentPolicy(rllib.policy.Policy):
    def __init__(self, observation_space, action_space, config):
        super().__init__(observation_space, action_space, config)
        agent_config = dict(
            input_dim=observation_space.shape[0],
            num_actions=action_space.n,
            random_seed=config["seed"],
            attack_graph=config["env_config"]["attack_graph"],
        )
        self._agent = DEFENDERS[config["agent_type"]](agent_config)

    def compute_actions(self, observations, *args, **kwargs):
        # FIXME: use a `numpy` array as a temporary workaround for
        #        https://github.com/ray-project/ray/issues/10100
        return np.array([self._agent.act(obs) for obs in observations]), [], {}

    def get_weights(self):
        return {}

    def set_weights(self, weights):
        pass


def template_agent(agent_type):
    default_config = rllib.agents.trainer.with_common_config(dict(agent_type=agent_type))
    return rllib.agents.trainer_template.build_trainer(
        name=agent_type,
        default_policy=AgentPolicy,
        default_config=default_config,
    )

In [4]:
from copy import deepcopy


class AttackSimAlphaZeroEnv(AttackSimulationEnv):
    def set_state(self, state):
        (
            self.simulation_time,
            self.ttc_remaining,
            self.attack_surface,
            self.attack_state,
            self.service_state,
            self._observation,
            self.rng,
            self.attacker,
        ) = deepcopy(state)

    def get_state(self):
        state = (
            self.simulation_time,
            self.ttc_remaining,
            self.attack_surface,
            self.attack_state,
            self.service_state,
            self._observation,
            self.rng,
            self.attacker,
        )
        return deepcopy(state)
    

class AlphaZeroWrapper(gym.Env):
    def __init__(self, config):
        self.env = config["env_class"](config)
        self.action_space = self.env.action_space
        assert isinstance(
            self.action_space, gym.spaces.Discrete
        ), "AlphaZero requires a Discrete action space"
        shape = (self.action_space.n,)
        self.observation_space = gym.spaces.Dict(
            dict(obs=self.env.observation_space, action_mask=gym.spaces.Box(0, 1, shape))
        )
        self.reward = 0
        self.mask = np.full(shape, 1, dtype="int8")

    def reset(self):
        self.reward = 0
        observation = self.env.reset()
        return dict(obs=observation, action_mask=self.mask)

    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        self.reward += reward
        reward = self.reward if done else 0
        return dict(obs=observation, action_mask=self.mask), reward, done, info

    def set_state(self, state):
        env_state, self.reward = state
        self.env.set_state(env_state)
        return dict(obs=self.env.observation, action_mask=self.mask)

    def get_state(self):
        return self.env.get_state(), self.reward

    def close(self):
        self.env.close()

    def render(self, mode=None):
        self.env.render(mode)

    def seed(self, seed=None):
        self.env.seed(seed)

In [5]:
# 'contrib/AlphaZero' does NOT appear to work without its custom dense model
from ray.rllib.contrib.alpha_zero.models.custom_torch_models import DenseModel

rllib.models.ModelCatalog.register_custom_model("alpha_zero_dense_model", DenseModel)

In [6]:
# work around: https://github.com/ray-project/ray/issues/17618

from IPython.core.interactiveshell import InteractiveShell


class keep_ipython_sane:
    def __enter__(self):
        self.instance = InteractiveShell.instance()
        
    def __exit__(self, *args, **kwargs):
        # feel free to improve with error handling, etc.
        InteractiveShell._instance = self.instance

In [7]:
if os.path.isdir("/var/run/secrets/kubernetes.io"):  # inside k8s pod
    args = dict(address="auto")
else:
    # listen on all interfaces inside a container for port-forwarding to work
    dashboard_host = "0.0.0.0" if os.path.exists("/.dockerenv") else "127.0.0.1"
    args = dict(num_cpus=4, dashboard_host=dashboard_host)

# ALTERNATIVE: use the "Ray client" to connect to a remote cluster
# Unfortunately, JupyterNotebookReporter displays an object reference
# <IPython.core.display.HTML object> instead of content...
# --- --- ---
#
# from ray.util.client import worker
#
# worker.INITIAL_TIMEOUT_SEC = worker.MAX_TIMEOUT_SEC = 1
#
# ray_client_server = 'host.docker.internal' if os.path.exists("/.dockerenv") else '127.0.0.1'
# try:
#     init(address=f'ray://{ray_client_server}:10001')
# except Connection Error:
#     pass  # TODO: try something else...

In [8]:
from ray.tune.utils.trainable import TrainableUtil
from tqdm.auto import tqdm

rename = dict(
    agent_type="Agent",
    graph_size="Graph size",
    episode_length="Episode lengths",
    episode_reward="Returns",
)

agent_types = ["contrib/AlphaZero", "R2D2", "rule-based", "random"]
graphs = [AttackGraph(dict(graph_size=size)) for size in SIZES]
seeds = [0, 1, 2, 3, 6, 7, 11, 28, 42, 1337]
iterations = 10
rollouts = 10

train_config = dict(
    num_workers=4,
    rollout_fragment_length=32,
    train_batch_size=640,
    buffer_size=512,
    batch_mode="complete_episodes",
)
eval_config = dict(
    evaluation_interval=1,
    evaluation_num_workers=1,
    evaluation_config=dict(explore=False, replay_sequence_length=-1),
    evaluation_num_episodes=rollouts,
)

def generate(savename):
    init(**args)
    
    frames = []
    for graph in tqdm(graphs, 'graphs'):
        for agent_type in tqdm(agent_types, f'└── {graph.graph_size}'):
            agent_name = agent_type.split("/")[-1]
            for seed in tqdm(seeds, f'\u00a0\u2001\u2001\u2001└── {agent_name}@{graph.graph_size}'):
                config = dict(
                    framework="torch",
                    env=AttackSimulationEnv,
                    env_config=dict(attack_graph=graph),
                    seed=seed,
                    log_level='ERROR',
                )
                if agent_type in DEFENDERS:
                    config.update(eval_config)
                    with keep_ipython_sane():
                        agent = template_agent(agent_type)(config=config)
                else:
                    if agent_type == "contrib/AlphaZero":
                        config["env_config"].update(env_class=AttackSimAlphaZeroEnv)
                        config.update(
                            env=AlphaZeroWrapper,
                            model=dict(custom_model="alpha_zero_dense_model"),
                        )
                    if agent_type == "R2D2":
                        config.update(model=dict(use_lstm=True))

                    name = f'{agent_name}_{graph.graph_size}_{seed}'
                    if not os.path.exists(name):
                        config.update(train_config)
                        with keep_ipython_sane():
                            agent = rllib.agents.registry.get_trainer_class(agent_type)(config=config)
                        for _ in tqdm(range(iterations), f'\u00a0\u2001\u2001\u2001\u2001\u2001\u2001└── {name}'):
                            results = agent.train()
                            # TODO: break based on results?
                        agent.save(name)
                        del agent

                    config.update(eval_config, num_workers=0)
                    with keep_ipython_sane():
                        agent = rllib.agents.registry.get_trainer_class(agent_type)(config=config)
                        checkpoint_path = TrainableUtil.get_checkpoints_paths(name).chkpt_path[0]
                        agent.restore(checkpoint_path)

                stats = agent.evaluate()['evaluation']['hist_stats']
                frame = pd.DataFrame(dict(agent_type=agent_type, graph_size=graph.num_attacks, **stats))
                frames.append(frame)
    shutdown()
    results_df = pd.concat(frames, ignore_index=True).rename(columns=rename)
    results_df.to_csv(savename)
    return results_df

AttackGraph(en2720.yaml[tiny], 2 services, 7 attack steps)
AttackGraph(en2720.yaml[small], 5 services, 27 attack steps)
AttackGraph(en2720.yaml[medium-small], 9 services, 44 attack steps)
AttackGraph(en2720.yaml[medium], 13 services, 56 attack steps)
AttackGraph(en2720.yaml[large], 16 services, 62 attack steps)
AttackGraph(en2720.yaml[extra-large], 16 services, 65 attack steps)
AttackGraph(en2720.yaml[full], 18 services, 78 attack steps)


In [None]:
savename = "returns-agent-eval.csv"

df = generate(savename) if not os.path.exists(savename) else pd.read_csv(savename, index_col=0)

2021-09-30 00:38:36,796	INFO worker.py:825 -- Connecting to existing Ray cluster at address: 10.28.0.20:6379


graphs:   0%|          | 0/7 [00:00<?, ?it/s]

└── tiny:   0%|          | 0/4 [00:00<?, ?it/s]

    └── AlphaZero@tiny:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 00:38:37,090	INFO trainer.py:726 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
2021-09-30 00:38:40,555	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_tiny_0/checkpoint_000010/checkpoint-10
2021-09-30 00:38:40,557	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 100.65515375137329, '_episodes_total': 483}
2021-09-30 00:40:34,203	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_tiny_1/checkpoint_000010/checkpoint-10
2021-09-30 00:40:34,205	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 103.21623516082764, '_episodes_total': 530}
2021-09-30 00:43:03,922	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_tiny_2/checkpoint_000010/checkpoint-10
2021-09-30 00:43:03,924	INFO trainable.py:390 -- Current s

    └── R2D2@tiny:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 01:03:22,768	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: R2D2_tiny_0/checkpoint_000010/checkpoint-10
2021-09-30 01:03:22,770	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 32.59596228599548, '_episodes_total': 809}
2021-09-30 01:03:30,125	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: R2D2_tiny_1/checkpoint_000010/checkpoint-10
2021-09-30 01:03:30,128	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 36.03712821006775, '_episodes_total': 784}
2021-09-30 01:03:39,158	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: R2D2_tiny_2/checkpoint_000010/checkpoint-10
2021-09-30 01:03:39,160	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 42.56101584434509, '_episodes_total': 905}
2021-09-30 01:03:43,113	INFO trainable.py:382 -- Res

    └── rule-based@tiny:   0%|          | 0/10 [00:00<?, ?it/s]

    └── random@tiny:   0%|          | 0/10 [00:00<?, ?it/s]

└── small:   0%|          | 0/4 [00:00<?, ?it/s]

    └── AlphaZero@small:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 01:07:10,928	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_small_0/checkpoint_000010/checkpoint-10
2021-09-30 01:07:10,932	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 159.9058587551117, '_episodes_total': 413}
2021-09-30 01:14:49,573	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_small_1/checkpoint_000010/checkpoint-10
2021-09-30 01:14:49,576	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 169.20812010765076, '_episodes_total': 420}


       └── AlphaZero_small_2:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 01:22:47,823	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_small_2/checkpoint_000010/checkpoint-10
2021-09-30 01:22:47,825	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 173.27614521980286, '_episodes_total': 421}


       └── AlphaZero_small_3:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 01:28:10,535	INFO trainable.py:106 -- Trainable.setup took 17.189 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 01:28:10,819	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_small_3/checkpoint_000010/checkpoint-10
2021-09-30 01:28:10,869	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 195.68022751808167, '_episodes_total': 430}
2021-09-30 01:38:53,394	INFO trainable.py:106 -- Trainable.setup took 11.633 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── AlphaZero_small_6:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 01:43:18,705	INFO trainable.py:106 -- Trainable.setup took 16.655 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 01:43:19,106	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_small_6/checkpoint_000010/checkpoint-10
2021-09-30 01:43:19,194	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 243.63806200027466, '_episodes_total': 468}
2021-09-30 01:55:42,003	INFO trainable.py:106 -- Trainable.setup took 17.264 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── AlphaZero_small_7:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 02:03:08,011	INFO trainable.py:106 -- Trainable.setup took 38.835 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 02:03:08,564	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_small_7/checkpoint_000010/checkpoint-10
2021-09-30 02:03:08,590	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 401.3225677013397, '_episodes_total': 416}
2021-09-30 02:04:33,938	INFO trainable.py:106 -- Trainable.setup took 36.542 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── AlphaZero_small_11:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 02:12:46,011	INFO trainable.py:106 -- Trainable.setup took 56.991 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 02:12:46,893	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_small_11/checkpoint_000010/checkpoint-10
2021-09-30 02:12:46,955	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 427.0717580318451, '_episodes_total': 478}
2021-09-30 02:17:07,757	INFO trainable.py:106 -- Trainable.setup took 54.749 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── AlphaZero_small_28:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 02:26:24,896	INFO trainable.py:106 -- Trainable.setup took 56.116 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 02:26:25,821	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_small_28/checkpoint_000010/checkpoint-10
2021-09-30 02:26:25,925	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 492.13836669921875, '_episodes_total': 432}
2021-09-30 02:30:31,578	INFO trainable.py:106 -- Trainable.setup took 41.824 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── AlphaZero_small_42:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 02:40:25,958	INFO trainable.py:106 -- Trainable.setup took 64.915 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 02:40:27,183	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_small_42/checkpoint_000010/checkpoint-10
2021-09-30 02:40:27,328	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 518.9625465869904, '_episodes_total': 451}
2021-09-30 02:46:21,901	INFO trainable.py:106 -- Trainable.setup took 62.225 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── AlphaZero_small_1337:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 02:58:11,620	INFO trainable.py:106 -- Trainable.setup took 102.588 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 02:58:13,057	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_small_1337/checkpoint_000010/checkpoint-10
2021-09-30 02:58:13,148	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 595.7492537498474, '_episodes_total': 378}


    └── R2D2@small:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 03:11:48,547	INFO trainable.py:106 -- Trainable.setup took 37.543 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── R2D2_small_0:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 03:24:03,402	INFO trainable.py:106 -- Trainable.setup took 67.186 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 03:24:06,543	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: R2D2_small_0/checkpoint_000010/checkpoint-10
2021-09-30 03:24:06,642	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 650.2927560806274, '_episodes_total': 619}
2021-09-30 03:25:12,295	INFO trainable.py:106 -- Trainable.setup took 42.731 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── R2D2_small_1:   0%|          | 0/10 [00:01<?, ?it/s]

2021-09-30 03:40:05,184	INFO trainable.py:106 -- Trainable.setup took 71.909 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 03:40:07,585	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: R2D2_small_1/checkpoint_000010/checkpoint-10
2021-09-30 03:40:07,685	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 801.6426005363464, '_episodes_total': 599}
2021-09-30 03:41:24,090	INFO trainable.py:106 -- Trainable.setup took 44.777 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── R2D2_small_2:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 03:58:44,996	INFO trainable.py:106 -- Trainable.setup took 82.799 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 03:58:48,962	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: R2D2_small_2/checkpoint_000010/checkpoint-10
2021-09-30 03:58:49,080	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 934.9014189243317, '_episodes_total': 633}
2021-09-30 03:59:52,724	INFO trainable.py:106 -- Trainable.setup took 48.412 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── R2D2_small_3:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 04:19:12,449	INFO trainable.py:106 -- Trainable.setup took 91.665 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 04:19:16,577	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: R2D2_small_3/checkpoint_000010/checkpoint-10
2021-09-30 04:19:16,635	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 1040.5419487953186, '_episodes_total': 677}
2021-09-30 04:20:38,202	INFO trainable.py:106 -- Trainable.setup took 52.617 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── R2D2_small_6:   0%|          | 0/10 [00:01<?, ?it/s]

2021-09-30 04:41:15,180	INFO trainable.py:106 -- Trainable.setup took 98.406 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 04:41:19,290	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: R2D2_small_6/checkpoint_000010/checkpoint-10
2021-09-30 04:41:19,373	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 1109.3675401210785, '_episodes_total': 627}
2021-09-30 04:42:50,911	INFO trainable.py:106 -- Trainable.setup took 56.576 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── R2D2_small_7:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 05:04:42,163	INFO trainable.py:106 -- Trainable.setup took 106.187 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 05:04:46,985	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: R2D2_small_7/checkpoint_000010/checkpoint-10
2021-09-30 05:04:47,137	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 1172.4949643611908, '_episodes_total': 621}
2021-09-30 05:06:19,130	INFO trainable.py:106 -- Trainable.setup took 60.890 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── R2D2_small_11:   0%|          | 0/10 [00:01<?, ?it/s]

2021-09-30 05:29:23,443	INFO trainable.py:106 -- Trainable.setup took 117.031 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 05:29:27,858	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: R2D2_small_11/checkpoint_000010/checkpoint-10
2021-09-30 05:29:28,081	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 1232.0638337135315, '_episodes_total': 593}
2021-09-30 05:31:16,906	INFO trainable.py:106 -- Trainable.setup took 68.874 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── R2D2_small_28:   0%|          | 0/10 [00:01<?, ?it/s]

2021-09-30 05:56:28,015	INFO trainable.py:106 -- Trainable.setup took 122.369 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 05:56:33,407	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: R2D2_small_28/checkpoint_000010/checkpoint-10
2021-09-30 05:56:33,548	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 1350.186371564865, '_episodes_total': 614}
2021-09-30 05:58:27,573	INFO trainable.py:106 -- Trainable.setup took 69.777 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── R2D2_small_42:   0%|          | 0/10 [00:00<?, ?it/s]

2021-09-30 06:24:59,782	INFO trainable.py:106 -- Trainable.setup took 128.946 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 06:25:05,660	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: R2D2_small_42/checkpoint_000010/checkpoint-10
2021-09-30 06:25:06,011	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 1422.7538180351257, '_episodes_total': 638}
2021-09-30 06:27:05,796	INFO trainable.py:106 -- Trainable.setup took 71.824 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── R2D2_small_1337:   0%|          | 0/10 [00:01<?, ?it/s]

2021-09-30 06:55:40,533	INFO trainable.py:106 -- Trainable.setup took 141.499 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 06:55:48,249	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: R2D2_small_1337/checkpoint_000010/checkpoint-10
2021-09-30 06:55:48,596	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 1527.8430078029633, '_episodes_total': 614}


    └── rule-based@small:   0%|          | 0/10 [00:01<?, ?it/s]

2021-09-30 06:56:32,561	INFO trainable.py:106 -- Trainable.setup took 15.546 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 06:56:58,381	INFO trainable.py:106 -- Trainable.setup took 17.049 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 06:57:22,760	INFO trainable.py:106 -- Trainable.setup took 14.377 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 06:57:49,374	INFO trainable.py:106 -- Trainable.setup took 16.011 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 06:58:12,143	INFO trainable.py:106 -- Trainable.setup took 14.628 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30

    └── random@small:   0%|          | 0/10 [00:01<?, ?it/s]

2021-09-30 07:00:57,013	INFO trainable.py:106 -- Trainable.setup took 15.765 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 07:01:23,254	INFO trainable.py:106 -- Trainable.setup took 16.506 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 07:01:48,414	INFO trainable.py:106 -- Trainable.setup took 15.827 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 07:02:17,402	INFO trainable.py:106 -- Trainable.setup took 18.044 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 07:02:45,581	INFO trainable.py:106 -- Trainable.setup took 16.649 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30

└── medium-small:   0%|          | 0/4 [00:02<?, ?it/s]

    └── AlphaZero@medium-small:   0%|          | 0/10 [00:02<?, ?it/s]

2021-09-30 07:09:32,212	INFO trainable.py:106 -- Trainable.setup took 230.072 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── AlphaZero_medium-small_0:   0%|          | 0/10 [00:02<?, ?it/s]

2021-09-30 07:48:52,740	INFO trainable.py:106 -- Trainable.setup took 495.178 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 07:48:57,459	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_medium-small_0/checkpoint_000010/checkpoint-10
2021-09-30 07:48:58,270	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 1805.718020915985, '_episodes_total': 367}
2021-09-30 08:06:58,944	INFO trainable.py:106 -- Trainable.setup took 278.423 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── AlphaZero_medium-small_1:   0%|          | 0/10 [00:02<?, ?it/s]

2021-09-30 08:50:45,133	INFO trainable.py:106 -- Trainable.setup took 589.646 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 08:50:49,530	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_medium-small_1/checkpoint_000010/checkpoint-10
2021-09-30 08:50:49,739	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 1971.3794963359833, '_episodes_total': 395}
2021-09-30 09:17:18,561	INFO trainable.py:106 -- Trainable.setup took 292.150 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── AlphaZero_medium-small_2:   0%|          | 0/10 [00:02<?, ?it/s]

2021-09-30 10:03:51,107	INFO trainable.py:106 -- Trainable.setup took 587.328 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 10:03:56,335	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_medium-small_2/checkpoint_000010/checkpoint-10
2021-09-30 10:03:57,437	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 2122.9606473445892, '_episodes_total': 379}
2021-09-30 10:32:03,335	INFO trainable.py:106 -- Trainable.setup took 286.502 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── AlphaZero_medium-small_3:   0%|          | 0/10 [00:04<?, ?it/s]

2021-09-30 11:21:05,868	INFO trainable.py:106 -- Trainable.setup took 626.599 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 11:21:11,204	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_medium-small_3/checkpoint_000010/checkpoint-10
2021-09-30 11:21:11,777	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 2234.22594666481, '_episodes_total': 325}
2021-09-30 12:48:16,470	INFO trainable.py:106 -- Trainable.setup took 331.692 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── AlphaZero_medium-small_6:   0%|          | 0/10 [00:03<?, ?it/s]

2021-09-30 13:39:24,437	INFO trainable.py:106 -- Trainable.setup took 655.571 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-09-30 13:39:28,748	INFO trainable.py:382 -- Restored on 10.28.0.20 from checkpoint: AlphaZero_medium-small_6/checkpoint_000010/checkpoint-10
2021-09-30 13:39:29,172	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 2318.7335131168365, '_episodes_total': 349}
2021-09-30 14:38:35,671	INFO trainable.py:106 -- Trainable.setup took 299.238 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


       └── AlphaZero_medium-small_7:   0%|          | 0/10 [00:04<?, ?it/s]

Exception ignored in: <generator object tqdm.__iter__ at 0x7fe0338740b0>
Traceback (most recent call last):
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/tqdm/std.py", line 1195, in __iter__
    self.close()
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/tqdm/notebook.py", line 286, in close
    self.disp(bar_style='danger', check_delay=False)
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/tqdm/notebook.py", line 180, in display
    rtext.value = right
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/traitlets/traitlets.py", line 606, in __set__
    self.set(obj, value)
  File "/root/.cache/pypoetry/virtualenvs/attack-simulator-_8mgyPy8-py3.8/lib/python3.8/site-packages/traitlets/traitlets.py", line 595, in set
    obj._notify_trait(self.name, old_value, new_value)
  Fil

In [None]:
df

In [None]:
sns.set(style="darkgrid", rc={"figure.figsize": (12, 8)})

In [None]:
g = sns.lineplot(data=df, x="Graph size", y="Returns", hue="Agent", ci="sd")
g.legend(title="Agent", loc="lower left")
g.set_title("Returns vs Size (random attacker)")

In [None]:
g = sns.lineplot(data=df, x="Graph size", y="Episode lengths", hue="Agent", ci="sd")
g.legend(title="Agent", loc="upper left")
g.set_title("Episode lengths vs Size (random attacker)")

In [None]:
pd.set_option("display.max_columns", 32)
df.groupby("Agent").describe()