In [None]:
import os

import seaborn as sns

from ray import init, rllib, tune, shutdown

In [None]:
from attack_simulator.agents import DEFENDERS
from attack_simulator.env import AttackSimulationEnv
from attack_simulator.graph import AttackGraph, SIZES

In [None]:
import gym
import numpy as np

class CustomPreprocessor(rllib.models.preprocessors.Preprocessor):
    def _init_shape(self, observation_space, options=None):
        return (len(observation_space.spaces),)

    def transform(self, observation):
        return np.array(observation)
    
    @property
    def observation_space(self):
        space = gym.spaces.Box(0, 1, self.shape, dtype=int)
        space.original_space = self._obs_space
        return space

rllib.models.ModelCatalog.register_custom_preprocessor('custom', CustomPreprocessor)


class AgentPolicy(rllib.policy.Policy):
    def __init__(self, observation_space, action_space, config):
        super().__init__(observation_space, action_space, config)
        agent_config = dict(
            input_dim=observation_space.shape[0], # same as len(observation_space.original_space.spaces)
            num_actions=action_space.n,
            random_seed=config['seed'],
            attack_graph=config['env_config']['attack_graph'],
        )
        self._agent = DEFENDERS[config['agent_type']](agent_config)

    def compute_actions(self, observations, *args, **kwargs):
        return [self._agent.act(obs) for obs in observations], [], {}

    def compute_single_action(self, observation, *args, **kwargs):
        return self._agent.act(observation), [], {}

    def get_weights(self):
        return {}

    def set_weights(self, weights):
        pass
    
    
def wrap_agent(agent_type, config):
    default_config = rllib.agents.trainer.with_common_config(dict(config, agent_type=agent_type, model=dict(custom_preprocessor='custom'), env_class=config['env']))
    return rllib.agents.trainer_template.build_trainer(
        name=agent_type,
        default_policy=AgentPolicy,
        default_config=default_config,
    )(config=config)

In [None]:
import pandas as pd

class RolloutAggregator:
    def __init__(self, **kwargs):
        self._kwargs = kwargs
        self._episodes = []

    def begin_rollout(self):
        self._rewards = []

    def append_step(self, obs, action, next_obs, reward, done, info):
        self._rewards.append(reward)

    def end_rollout(self):
        self._episodes.append(
            dict(self._kwargs, episode_length=len(self._rewards), episode_reward=sum(self._rewards))
        )
    
    def to_df(self):
        return pd.DataFrame(self._episodes)

In [None]:
if os.path.isdir('/var/run/secrets/kubernetes.io'):  # inside k8s pod
    args = dict(address='auto')
else:  # listen on all interfaces inside a container for port-forwarding to work
    dashboard_host = "0.0.0.0" if os.path.exists("/.dockerenv") else "127.0.0.1"
    args = dict(num_cpus=4, dashboard_host=dashboard_host)

init(**args)

In [None]:
from ray.rllib.rollout import rollout
from tqdm import tqdm

agent_types = ['R2D2', 'PPO', 'rule-based', 'random']
seeds = [0, 1, 2, 3, 6, 7, 11, 28, 42, 1337]
training_iterations = 50
rollouts = 100

def generate(savename):
    frames = []

    for graph_size in SIZES:
        graph = AttackGraph(dict(graph_size=graph_size))

        for seed in seeds:
            config = dict(
                framework='torch',
                model=dict(use_lstm=True),
                env=AttackSimulationEnv,
                env_config=dict(attack_graph=graph),
                seed=seed,
                create_env_on_driver=True,  # apparently, assumed by `rollout`
                num_workers=2,  # use 0 to run on driver for debugging
                batch_mode='complete_episodes',
            )

            for agent_type in agent_types:
                if agent_type in DEFENDERS:
                    agent = wrap_agent(agent_type, config)
                else:
                    # if agent == 'contrib/AlphaZero':
                    #    config.update(custom_preprocessor='AlphaZeroPreprocessor')
                    agent = rllib.agents.registry.get_trainer_class(agent_type)(config=config)
                    pbar = tqdm(range(training_iterations), f'{graph_size:13.13s} [{seed: 6d}] {agent_type:11.11s}')
                    for _ in pbar:
                        results = agent.train()
                        # TODO: break based on results

                aggregator = RolloutAggregator(agent_type=agent_type, graph_size=graph.num_attacks)
                rollout(agent, 'AttackSimulator', num_steps=0, num_episodes=rollouts, saver=aggregator)
                frames.append(aggregator.to_df())

    df = pd.concat(frames, ignore_index=True).rename(columns=dict(agent_type='Agent', graph_size='Graph size', episode_length='Episode lengths', episode_reward='Returns'))
    df.to_csv(savename)
    return df

In [None]:
%%capture noise --no-stderr

savename = 'data.csv'

df = generate(savename) if not os.path.exists(savename) else pd.read_csv(savename, index_col=0)
df

In [None]:
shutdown()

In [None]:
sns.set(style='darkgrid', rc={'figure.figsize': (12, 8)})

In [None]:
g = sns.lineplot(data=df, x='Graph size', y='Returns', hue='Agent', ci='sd')
g.legend(title='Agent', loc='upper left')
g.set_title('Returns vs Size (random attacker)')

In [None]:
g = sns.lineplot(data=df, x='Graph size', y='Episode lengths', hue='Agent', ci='sd')
g.legend(title='Agent', loc='upper left')
g.set_title('Episode lengths vs Size (random attacker)')

In [None]:
pd.set_option('display.max_columns', 32)
df.groupby('Agent').describe()