In [1]:
from gym.spaces import Box
import numpy as np

from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models.tf.fcnet import FullyConnectedNetwork
from ray.rllib.models.torch.misc import SlimFC
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.agents.ppo.ppo import PPOTrainer
from ray.rllib.agents.ppo import ppo
from ray.rllib.models.tf.tf_action_dist import Categorical

tf1, tf, tfv = try_import_tf()

In [12]:
class HieracrchyModel(TFModelV2):
    
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        super(HieracrchyModel, self).__init__(
            obs_space, action_space, num_outputs, model_config, name)
        # Base of the model for PPO agents
        self.model = FullyConnectedNetwork(
            obs_space, 2, 2, model_config, name
        )

        ppo_config = ppo.DEFAULT_CONFIG.copy()
        ppo_config.update({"num_gpus": 0,"num_workers": 0,
                # Also, use "framework: tf2" for tfe eager execution.
                "framework": "tf2",
                "train_batch_size": batch_size,
                "horizon": 100,
                "gamma": 0.95,
                "model": {
                    "fcnet_hiddens": [512, 512],
                    "fcnet_activation": "relu",
                }})
        
        b_line = PPOTrainer(config=ppo_config,env="CybORG")
        b_line.restore("b_line_agent/checkpoint_000109/checkpoint-109")
        meander = PPOTrainer(config=ppo_config,env="CybORG")
        meander.restore("supervisor_ppo/checkpoint_000183/checkpoint-183")
        self.sub_agents = [b_line.get_policy().model, meander.get_policy().model]
        
        self.action = 0

    @override(ModelV2)
    def forward(self, input_dict, state, seq_lens):
        action_logits = self.model.forward({'obs_flat': input_dict[SampleBatch.CUR_OBS]}, state, seq_lens)[0]
        self.action = tf.random.categorical(action_logits, 1, dtype=tf.int32)[0].numpy()[0]
        sub_logits = self.sub_agents[self.action].forward({'obs_flat': input_dict[SampleBatch.CUR_OBS]}, state, seq_lens)[0]
        return sub_logits, state
    
    @override(ModelV2)
    def value_function(self):
        return self.model.value_function()


In [15]:
from ray.rllib.agents.ppo.ppo_tf_policy import PPOTFPolicy
from ray.rllib.agents.ppo.ppo_tf_policy import ppo_surrogate_loss
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.agents.ppo.ppo import PPOTrainer

def after_action(policy):
    if isinstance(policy.model, tf.keras.Model):
        return {}
    return {
        SampleBatch.AGENT_INDEX: [policy.model.action],
        SampleBatch.VF_PREDS: policy.model.value_function(),
    } 

def loss(self, model, dist_class, train_batch):
    train_batch[SampleBatch.ACTIONS] = train_batch[SampleBatch.AGENT_INDEX]
    return ppo_surrogate_loss(self, model, dist_class, train_batch)

def build_model(policy, obs_space, action_space, config) -> ModelV2:
    return ModelCatalog.get_model_v2(obs_space,
            2,
            2,
            config["model"],
            name="option_critic_model",
                                    
            framework="tf",
            model_interface=HieracrchyModel)

HierarchyPolicy = PPOTFPolicy.with_updates(
    name="HierarchyPPOPolicy",
    loss_fn=loss,
    make_model=build_model,
    extra_action_out_fn=after_action)

class HieracrchyTrainer(PPOTrainer):
    def get_default_policy_class(self, config):
        return HierarchyPolicy

AttributeError: type object 'HieracrchyModel' has no attribute 'build'

In [16]:
from CybORG import CybORG
from CybORG.Agents import B_lineAgent, SleepAgent, GreenAgent
from CybORG.Agents.SimpleAgents.BaseAgent import BaseAgent
from CybORG.Agents.SimpleAgents.BlueReactAgent import BlueReactRemoveAgent
from CybORG.Agents.SimpleAgents.Meander import RedMeanderAgent
from CybORG.Agents.Wrappers.EnumActionWrapper import EnumActionWrapper
from CybORG.Agents.Wrappers.FixedFlatWrapper import FixedFlatWrapper
from CybORG.Agents.Wrappers.OpenAIGymWrapper import OpenAIGymWrapper
from CybORG.Agents.Wrappers.ReduceActionSpaceWrapper import ReduceActionSpaceWrapper
from CybORG.Agents.Wrappers import ChallengeWrapper
from ray.rllib.models import ModelCatalog
from ray.tune.registry import register_env
from ray.rllib.agents import ppo
import inspect
from CybORG.Agents.Wrappers.rllib_wrapper import RLlibWrapper

def env_creator(env_config: dict):
    path = str(inspect.getfile(CybORG))
    path = path[:-10] + '/Shared/Scenarios/Scenario2.yaml'
    agents = {"Red": B_lineAgent, "Green": GreenAgent}
    cyborg = CybORG(scenario_file=path, environment='sim', agents=agents)
    env = RLlibWrapper(env=cyborg, agent_name="Blue", max_steps=100)
    return env

def print_results(results_dict):
    train_iter = results_dict["training_iteration"]
    r_mean = results_dict["episode_reward_mean"]
    r_max = results_dict["episode_reward_max"]
    r_min = results_dict["episode_reward_min"]
    print(f"{train_iter:4d} \tr_mean: {r_mean:.1f} \tr_max: {r_max:.1f} \tr_min: {r_min: .1f}")

import subprocess
import json
import os

batch_size = 4000
# Set up CybORG
register_env(name="CybORG", env_creator=env_creator)
config = ppo.DEFAULT_CONFIG.copy()

ModelCatalog.register_custom_model("h_model", HieracrchyModel)

from shutil import make_archive
allrewards = []

config.update({"num_gpus": 1,"num_workers": 0,
                # Also, use "framework: tf2" for tfe eager execution.
                "framework": "tf2",
                "train_batch_size": batch_size,
                "horizon": 100,
                "gamma": 0.9,
                "model": {
                    "custom_model": "h_model",
                    "fcnet_hiddens": [512, 512],
                    "fcnet_activation": "relu",
                },

                }) 
trainer = HieracrchyTrainer(config=config, env="CybORG")

reward = []
novel_obs = []
novel_actions = []
for i in range(200):
    results_dict = trainer.train()
    print_results(results_dict)


[2m[36m(RolloutWorker pid=2945)[0m 2022-08-04 15:57:26,904	ERROR worker.py:451 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=2945, ip=172.28.0.2, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f38288c10a0>)
[2m[36m(RolloutWorker pid=2945)[0m   File "/usr/local/lib/python3.8/dist-packages/ray/rllib/evaluation/rollout_worker.py", line 630, in __init__
[2m[36m(RolloutWorker pid=2945)[0m     self._build_policy_map(
[2m[36m(RolloutWorker pid=2945)[0m   File "/usr/local/lib/python3.8/dist-packages/ray/rllib/evaluation/rollout_worker.py", line 1788, in _build_policy_map
[2m[36m(RolloutWorker pid=2945)[0m     self.policy_map.create_policy(
[2m[36m(RolloutWorker pid=2945)[0m   File "/usr/local/lib/python3.8/dist-packages/ray/rllib/policy/policy_map.py", line 146, in create_policy
[2m[36m(RolloutWorker pid=2945)[0m     self[policy_id] = class_(
[2m[36m

RayActorError: The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=2943, ip=172.28.0.2, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f797d9190d0>)
  File "/usr/local/lib/python3.8/dist-packages/ray/rllib/evaluation/rollout_worker.py", line 630, in __init__
    self._build_policy_map(
  File "/usr/local/lib/python3.8/dist-packages/ray/rllib/evaluation/rollout_worker.py", line 1788, in _build_policy_map
    self.policy_map.create_policy(
  File "/usr/local/lib/python3.8/dist-packages/ray/rllib/policy/policy_map.py", line 146, in create_policy
    self[policy_id] = class_(
  File "/usr/local/lib/python3.8/dist-packages/ray/rllib/policy/eager_tf_policy.py", line 405, in __init__
    self.model = make_model(self, observation_space, action_space, config)
  File "<ipython-input-9-952e9e988812>", line 43, in build
  File "<ipython-input-12-f25df86380b2>", line 7, in __init__
  File "/usr/local/lib/python3.8/dist-packages/ray/rllib/models/tf/fcnet.py", line 56, in __init__
    shape=(int(np.product(obs_space.shape)),), name="observations"
AttributeError: 'HierarchyPPOPolicy_eager' object has no attribute 'shape'

[2m[36m(RolloutWorker pid=2952)[0m 2022-08-04 15:57:26,987	ERROR worker.py:451 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=2952, ip=172.28.0.2, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f0bc6325070>)
[2m[36m(RolloutWorker pid=2952)[0m   File "/usr/local/lib/python3.8/dist-packages/ray/rllib/evaluation/rollout_worker.py", line 630, in __init__
[2m[36m(RolloutWorker pid=2952)[0m     self._build_policy_map(
[2m[36m(RolloutWorker pid=2952)[0m   File "/usr/local/lib/python3.8/dist-packages/ray/rllib/evaluation/rollout_worker.py", line 1788, in _build_policy_map
[2m[36m(RolloutWorker pid=2952)[0m     self.policy_map.create_policy(
[2m[36m(RolloutWorker pid=2952)[0m   File "/usr/local/lib/python3.8/dist-packages/ray/rllib/policy/policy_map.py", line 146, in create_policy
[2m[36m(RolloutWorker pid=2952)[0m     self[policy_id] = class_(
[2m[36m

[2m[36m(RolloutWorker pid=3159)[0m 2022-08-04 15:57:27,250	ERROR worker.py:451 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=3159, ip=172.28.0.2, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f85f8b730d0>)
[2m[36m(RolloutWorker pid=3159)[0m   File "/usr/local/lib/python3.8/dist-packages/ray/rllib/evaluation/rollout_worker.py", line 630, in __init__
[2m[36m(RolloutWorker pid=3159)[0m     self._build_policy_map(
[2m[36m(RolloutWorker pid=3159)[0m   File "/usr/local/lib/python3.8/dist-packages/ray/rllib/evaluation/rollout_worker.py", line 1788, in _build_policy_map
[2m[36m(RolloutWorker pid=3159)[0m     self.policy_map.create_policy(
[2m[36m(RolloutWorker pid=3159)[0m   File "/usr/local/lib/python3.8/dist-packages/ray/rllib/policy/policy_map.py", line 146, in create_policy
[2m[36m(RolloutWorker pid=3159)[0m     self[policy_id] = class_(
[2m[36m

[2m[36m(RolloutWorker pid=2949)[0m 2022-08-04 15:57:27,681	ERROR worker.py:451 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=2949, ip=172.28.0.2, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7fc20c9ef0a0>)
[2m[36m(RolloutWorker pid=2949)[0m   File "/usr/local/lib/python3.8/dist-packages/ray/rllib/evaluation/rollout_worker.py", line 630, in __init__
[2m[36m(RolloutWorker pid=2949)[0m     self._build_policy_map(
[2m[36m(RolloutWorker pid=2949)[0m   File "/usr/local/lib/python3.8/dist-packages/ray/rllib/evaluation/rollout_worker.py", line 1788, in _build_policy_map
[2m[36m(RolloutWorker pid=2949)[0m     self.policy_map.create_policy(
[2m[36m(RolloutWorker pid=2949)[0m   File "/usr/local/lib/python3.8/dist-packages/ray/rllib/policy/policy_map.py", line 146, in create_policy
[2m[36m(RolloutWorker pid=2949)[0m     self[policy_id] = class_(
[2m[36m