In [1]:
import os

import numpy as np
import pandas as pd
import seaborn as sns

from ray import init, rllib, shutdown

In [2]:
from attack_simulator.agents import ATTACKERS
from attack_simulator.env import AttackSimulationEnv
from attack_simulator.graph import AttackGraph, SIZES

In [3]:
class NoAction(rllib.policy.Policy):
    def compute_actions(self, observations, *args, **kwargs):
        # FIXME: use a `numpy` array as a temporary workaround for
        #        https://github.com/ray-project/ray/issues/10100
        return np.zeros(len(observations)), [], {}

    def get_weights(self):
        return {}

    def set_weights(self, weights):
        pass


no_action = rllib.agents.trainer_template.build_trainer(name="NoAction", default_policy=NoAction)

In [7]:
from ray.util.client import worker

worker.INITIAL_TIMEOUT_SEC = worker.MAX_TIMEOUT_SEC = 1


def ray_init():
    ray_client_server = "host.docker.internal" if os.path.exists("/.dockerenv") else "127.0.0.1"
    try:
        context = init(address=f"ray://{ray_client_server}:10001")
    except ConnectionError:
        # listen on all interfaces inside a container for port-forwarding to work
        dashboard_host = "0.0.0.0" if os.path.exists("/.dockerenv") else "127.0.0.1"
        context = init(num_cpus=4, dashboard_host=dashboard_host)
    print("\x1b[33;1m", context, "\x1b[m")
    return context

Encountered connection issues in the data channel. Attempting to reconnect.
Failed to reconnect the data channel


In [5]:
from tqdm.auto import tqdm

rename = dict(
    attacker="Attacker",
    graph_size="Graph size",
    episode_reward="Returns",
    episode_lengths="Episode lengths",
)

num_episodes = 5  # run 5 episodes on the same environment
attackers = list(ATTACKERS)
graphs = [AttackGraph(dict(graph_size=size)) for size in SIZES]
seeds = [0, 1, 2, 3, 6, 7, 11, 28, 42, 1337]


def generate(savename):
    ray_init()

    frames = []
    for seed in tqdm(seeds, "seeds"):
        for attacker in tqdm(attackers, f"└── {seed}"):
            for graph in tqdm(graphs, f"\u00a0\u2001\u2001\u2001└── {attacker}@{seed}"):
                config = dict(
                    log_level="DEBUG",
                    framework="torch",
                    env=AttackSimulationEnv,
                    env_config=dict(attack_graph=graph, attacker=attacker),
                    seed=seed,
                    num_workers=0,
                    rollout_fragment_length=0,
                    evaluation_interval=1,
                    evaluation_num_workers=1,
                    evaluation_config=dict(explore=False),
                    evaluation_num_episodes=num_episodes,
                )
                agent = no_action(config=config)
                try:
                    stats = agent.evaluate()["evaluation"]["hist_stats"]
                    frame = pd.DataFrame(
                        dict(attacker=attacker, graph_size=graph.num_attacks, **stats)
                    )
                    frames.append(frame)
                except Exception as e:
                    print(seed, attacker, graph.graph_size, e)
                agent.stop()

    shutdown()
    results_df = pd.concat(frames, ignore_index=True).rename(columns=rename)
    results_df.to_csv(savename)
    return results_df

AttackGraph(en2720.yaml[tiny], 2 services, 7 attack steps)
AttackGraph(en2720.yaml[small], 5 services, 27 attack steps)
AttackGraph(en2720.yaml[medium-small], 9 services, 44 attack steps)
AttackGraph(en2720.yaml[medium], 13 services, 56 attack steps)
AttackGraph(en2720.yaml[large], 16 services, 62 attack steps)
AttackGraph(en2720.yaml[extra-large], 16 services, 65 attack steps)
AttackGraph(en2720.yaml[full], 18 services, 78 attack steps)


In [6]:
savename = "length-agent-eval.csv"

df = generate(savename) if not os.path.exists(savename) else pd.read_csv(savename, index_col=0)

ClientContext(dashboard_url='10.28.0.45:8265', python_version='3.8.11', ray_version='1.7.1', ray_commit='{{RAY_COMMIT_SHA}}', protocol_version='2021-09-02', _num_clients=1, _context_to_restore=<ray.util.client._ClientContext object at 0xffff5b6ce3a0>)


seeds:   0%|          | 0/10 [00:00<?, ?it/s]

└── 0:   0%|          | 0/6 [00:00<?, ?it/s]

    └── well-informed@0:   0%|          | 0/7 [00:00<?, ?it/s]

2021-10-29 14:17:09,662	INFO rollout_worker.py:1540 -- Validating sub-env at vector index=0 ... (ok)
2021-10-29 14:17:09,694	DEBUG rollout_worker.py:1363 -- Creating policy for default_policy
2021-10-29 14:17:09,695	DEBUG catalog.py:702 -- Created preprocessor <ray.rllib.models.preprocessors.NoPreprocessor object at 0xffff486bdfa0>: Box(0, 1, (9,), int8) -> (9,)
2021-10-29 14:17:09,696	INFO rollout_worker.py:1386 -- Built policy map: {}
2021-10-29 14:17:09,697	INFO rollout_worker.py:1387 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0xffff486bdfa0>}
2021-10-29 14:17:09,697	INFO rollout_worker.py:614 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0xffff486c9400>}
2021-10-29 14:17:09,698	DEBUG rollout_worker.py:723 -- Created rollout worker with env <ray.rllib.env.base_env._VectorEnvToBaseEnv object at 0xffff486c96d0> (<AttackSimulationEnv instance>), policies {}
2021-10-29 14:17:09,702	DEBUG tra

TypeError: got an unexpected keyword argument '_ray_trace_ctx'

In [None]:
df

In [None]:
sns.set(style="darkgrid", rc={"figure.figsize": (12, 8)})

In [None]:
g = sns.lineplot(data=df, x="Graph size", y="Returns", hue="Attacker", ci="sd")
g.legend(title="Attacker", loc="upper right")
g.set_title("Defender: no-action")

In [None]:
g = sns.lineplot(data=df, x="Graph size", y="Episode lengths", hue="Attacker", ci="sd")
g.legend(title="Attacker", loc="upper left")
g.set_title("Defender: no-action")

In [None]:
pd.set_option("display.max_columns", 32)
df.groupby("Attacker").describe()

In [None]:
import ray

ray.__version__

In [None]:
help(ray.init)

Log channel is reconnecting. Logs produced while the connection was down can be found on the head node of the cluster in `ray_client_server_[port].out`
