In [1]:
import yaml

from torchrl.envs.utils import check_env_specs

from config import TrainingConfig, LossConfig, AgentNNConfig, BackboneConfig, HeadConfig, EnvConfig
from algorithms.simple_nn import TrainableNNAgentPolicy
from algorithms.generic_policy import MultiAgentPolicy
from algorithms.generator import AgentGenerator
from algorithms.keys_processors import CombinedExtractor

with open("configs/run/test_single_training_transformer.yaml", "r") as file:
    config = yaml.safe_load(file)
training_config_defender = TrainingConfig.from_dict(config, suffix="_defender")
loss_config_defender = LossConfig.from_dict(config, suffix="_defender")
training_config_attacker = TrainingConfig.from_dict(config, suffix="_attacker")
loss_config_attacker = LossConfig.from_dict(config, suffix="_attacker")
agent_config = AgentNNConfig.from_dict(config)
backbone_config = BackboneConfig.from_dict(config, suffix="_backbone")
head_config = HeadConfig.from_dict(config, suffix="_head")
env_config = EnvConfig.from_dict(config)

env_map, env = env_config.create("cuda")
check_env_specs(env)

2025-06-22 22:25:24,615 [torchrl][INFO] check_env_specs succeeded!


In [2]:
defender_extractor = CombinedExtractor(player_type=0, env=env, actions=backbone_config.extractors)
defender_agent_transformer = TrainableNNAgentPolicy(
    player_type=0,
    max_sequence_size=env_config.num_steps + 1,
    extractor=defender_extractor,
    action_size=env.action_size,
    env_type=env_config.env_pair,
    agent_config=agent_config,
    backbone_config=backbone_config,
    head_config=head_config,
    device="cuda:0",
    loss_config=loss_config_defender,
    training_config=training_config_defender,
    run_name="test",
)
defender_agent_transformer.eval()
defender_agent_transformer.load("saved_models/2025-06-16_10:19:26-full-transformer/defender/agent_0.pth")

attacker_extractor = CombinedExtractor(player_type=1, env=env, actions=backbone_config.extractors)
attacker_agent_transformer = MultiAgentPolicy(
    action_size=env.action_size,
    player_type=1,
    device="cuda",
    embedding_size=agent_config.embedding_size,
    run_name="test",
    policy_generator=AgentGenerator(
        TrainableNNAgentPolicy,
        {
            "extractor": attacker_extractor,
            "max_sequence_size": env_config.num_steps + 1,
            "action_size": env.action_size,
            "env_type": env_config.env_pair,
            "player_type": 1,
            "device": "cuda",
            "loss_config": loss_config_attacker,
            "training_config": training_config_attacker,
            "run_name": "test",
            "agent_config": agent_config,
            "backbone_config": backbone_config,
            "head_config": head_config,
        }
    ),
)
attacker_agent_transformer.eval()
attacker_agent_transformer.load("saved_models/2025-06-16_10:19:26-full-transformer/attacker")

In [3]:
with open("configs/run/test_single_training.yaml", "r") as file:
    config = yaml.safe_load(file)
training_config_defender = TrainingConfig.from_dict(config, suffix="_defender")
loss_config_defender = LossConfig.from_dict(config, suffix="_defender")
training_config_attacker = TrainingConfig.from_dict(config, suffix="_attacker")
loss_config_attacker = LossConfig.from_dict(config, suffix="_attacker")
agent_config = AgentNNConfig.from_dict(config)
backbone_config = BackboneConfig.from_dict(config, suffix="_backbone")
head_config = HeadConfig.from_dict(config, suffix="_head")

defender_extractor_ffn = CombinedExtractor(player_type=0, env=env, actions=backbone_config.extractors)
defender_agent_ffn = TrainableNNAgentPolicy(
    player_type=0,
    max_sequence_size=env_config.num_steps + 1,
    extractor=defender_extractor_ffn,
    action_size=env.action_size,
    env_type=env_config.env_pair,
    device="cuda:0",
    loss_config=loss_config_defender,
    training_config=training_config_defender,
    run_name="test",
    agent_config=agent_config,
    backbone_config=backbone_config,
    head_config=head_config,
)
defender_agent_ffn.eval()
defender_agent_ffn.load("saved_models/2025-06-16_10:19:26-full-fnn/defender/agent_0.pth")

attacker_extractor_ffn = CombinedExtractor(player_type=1, env=env, actions=backbone_config.extractors)
attacker_agent_ffn = MultiAgentPolicy(
    action_size=env.action_size,
    player_type=1,
    device="cuda:0",
    embedding_size=agent_config.embedding_size,
    run_name="test",
    policy_generator=AgentGenerator(
        TrainableNNAgentPolicy,
        {
            "extractor": attacker_extractor_ffn,
            "max_sequence_size": env_config.num_steps + 1,
            "action_size": env.action_size,
            "env_type": env_config.env_pair,
            "player_type": 1,
            "device": "cuda:0",
            "loss_config": loss_config_attacker,
            "training_config": training_config_attacker,
            "run_name": "test",
            "agent_config": agent_config,
            "backbone_config": backbone_config,
            "head_config": head_config,
        }
    ),
)
attacker_agent_ffn.eval()
attacker_agent_ffn.load("saved_models/2025-06-16_10:19:26-full-fnn/attacker")

In [4]:
from algorithms.coevosg import CoevoSGDefenderAgent, CoevoSGAttackerAgent, CoevoSGConfig

defender_agent_coevosg = CoevoSGDefenderAgent(
    device="cpu",
    run_name="test",
    config=CoevoSGConfig(),
    env=env,
)

attacker_agent_coevosg = CoevoSGAttackerAgent(
    device="cpu",
    run_name="test",
    config=CoevoSGConfig(),
    env=env,
)

defender_agent_coevosg.eval()
attacker_agent_coevosg.eval()
defender_agent_coevosg.load("saved_models/2025-06-11_10:38:51-full-coevosg-/defender/agent_0.pth")
attacker_agent_coevosg.load("saved_models/2025-06-11_10:38:51-full-coevosg-/attacker/agent_0.pth")

In [5]:
from algorithms.generic_policy import RandomAgent, GreedyOracleAgent

attacker_agent_random = RandomAgent(action_size=env.action_size, embedding_size=agent_config.embedding_size, player_type=1, device="cuda:0", run_name="test")
attacker_greedy_oracle = GreedyOracleAgent(
    action_size=env.action_size, total_steps=env.num_steps, embedding_size=agent_config.embedding_size, player_type=1, device="cuda:0", run_name="test", env_map=env_map
)

In [6]:
from utils import compare_agent_pairs

results = compare_agent_pairs(
    [
        (defender_agent_transformer, attacker_agent_transformer, "transformer"),
        (defender_agent_ffn, attacker_agent_ffn, "ffn"),
        (defender_agent_coevosg, attacker_agent_coevosg, "coevosg"),
    ],
    [
        (attacker_agent_random, "random"),
        (attacker_greedy_oracle, "greedy"),
    ],
    env,
    print_results=True,
)

Defender: transformer vs Attacker: transformer => Defender avg reward: 371.7089 (18.7252)
Defender: transformer vs Attacker: ffn => Defender avg reward: 375.0861 (5.5061)
Defender: transformer vs Attacker: coevosg => Defender avg reward: 346.2115
Defender: transformer vs Attacker: random => Defender avg reward: 383.2437
Defender: transformer vs Attacker: greedy => Defender avg reward: 331.6626
Defender: transformer => Avg reward: 361.5826 (21.7116)
Defender: ffn vs Attacker: transformer => Defender avg reward: 370.4366 (18.7444)
Defender: ffn vs Attacker: ffn => Defender avg reward: 381.5039 (6.3083)
Defender: ffn vs Attacker: coevosg => Defender avg reward: 322.1329
Defender: ffn vs Attacker: random => Defender avg reward: 384.2910
Defender: ffn vs Attacker: greedy => Defender avg reward: 371.1407
Defender: ffn => Avg reward: 365.9010 (25.2255)
Defender: coevosg vs Attacker: transformer => Defender avg reward: 357.4727 (9.7810)
Defender: coevosg vs Attacker: ffn => Defender avg reward

In [None]:
results

In [None]:
from algorithms.generic_policy import CombinedPolicy

combined = CombinedPolicy(defender_agent_transformer, attacker_greedy_oracle)
output = combined.evaluate(env, 1000, current_player=0, add_logs=False)

In [None]:
output