In [1]:
from gymnasium import spaces
import yaml
import torch

from agilerl.modules.configs import MlpNetConfig, CnnNetConfig, MultiInputNetConfig
from agilerl.networks.q_networks import QNetwork, RainbowQNetwork
from agilerl.networks.value_functions import ValueFunction
from agilerl.networks.actors import StochasticActor, DeterministicActor

from agilerl.algorithms.dqn import DQN
from agilerl.utils.utils import create_population

  from .autonotebook import tqdm as notebook_tqdm


### QNetwork

In [2]:
from tests.helper_functions import generate_dict_or_tuple_space
from agilerl.utils.evolvable_networks import is_image_space

img_space = spaces.Box(low=0, high=255, shape=(4, 84, 84))
vec_space = spaces.Box(low=-1, high=1, shape=(4,), dtype='float32')
dict_space = spaces.Dict({'img': img_space, 'vec': vec_space})
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

img_config = CnnNetConfig(
    channel_size=[16],
    kernel_size=[4],
    stride_size=[1],
)
vec_config = MlpNetConfig(
    hidden_size=[64],
)
multi_input_config = MultiInputNetConfig(
    channel_size=[8, 8, 8],
    kernel_size=[2, 2, 2],
    stride_size=[2, 2, 2],
    hidden_size=[32, 32, 32],
    vector_space_mlp=False
)

actor = RainbowQNetwork(
    observation_space=vec_space,
    action_space=spaces.Discrete(4),
    encoder_config=vec_config,
    support=torch.linspace(-10, 10, 51),
    latent_dim=64,
    device=device
)

In [3]:
actor

StochasticActor(
  (encoder): EvolvableMLP(
    (model): Sequential(
      (encoder_linear_layer_1): Linear(in_features=4, out_features=64, bias=True)
      (encoder_layer_norm_1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (encoder_activation_1): ReLU()
      (encoder_linear_layer_output): Linear(in_features=64, out_features=64, bias=True)
      (encoder_activation_output): ReLU()
    )
  )
  (head_net): EvolvableDistribution(
    (_wrapped): EvolvableMLP(
      (model): Sequential(
        (actor_linear_layer_1): Linear(in_features=64, out_features=16, bias=True)
        (actor_layer_norm_1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
        (actor_activation_1): ReLU()
        (actor_linear_layer_output): Linear(in_features=16, out_features=4, bias=True)
        (actor_activation_output): Softmax(dim=-1)
      )
    )
  )
)

In [4]:
getattr(actor, "encoder.add_node")()

Wrapping mutation method encoder.add_node for StochasticActor
Wrapping mutation method add_node for EvolvableMLP
Recreating network after mutation: add_node


{'hidden_layer': 0, 'numb_new_nodes': 32}

In [5]:
actor

StochasticActor(
  (encoder): EvolvableMLP(
    (model): Sequential(
      (encoder_linear_layer_1): Linear(in_features=4, out_features=96, bias=True)
      (encoder_layer_norm_1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
      (encoder_activation_1): ReLU()
      (encoder_linear_layer_output): Linear(in_features=96, out_features=64, bias=True)
      (encoder_activation_output): ReLU()
    )
  )
  (head_net): EvolvableDistribution(
    (_wrapped): EvolvableMLP(
      (model): Sequential(
        (actor_linear_layer_1): Linear(in_features=64, out_features=16, bias=True)
        (actor_layer_norm_1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
        (actor_activation_1): ReLU()
        (actor_linear_layer_output): Linear(in_features=16, out_features=4, bias=True)
        (actor_activation_output): Softmax(dim=-1)
      )
    )
  )
)

In [3]:
pop = DQN.population(4, observation_space=img_space, action_space=spaces.Discrete(4))

In [3]:
from agilerl.modules.cnn import EvolvableCNN
from agilerl.hpo.mutation import Mutations

In [3]:
from accelerate import Accelerator

with open('configs/training/maddpg.yaml') as f:
    config = yaml.safe_load(f)

vector_actions = spaces.Box(low=-1, high=1, shape=(4,), dtype='float32')
discrete_actions = spaces.Discrete(4)

# accelerator = Accelerator()
INIT_HP = config["INIT_HP"]
MUTATION_PARAMS = config["MUTATION_PARAMS"]
INIT_HP['AGENT_IDS'] = [f'agent_{i}' for i in range(4)]
n_agents = 4
agent_pop = create_population(
    algo=INIT_HP["ALGO"],
    observation_space=[dict_space for _ in range(n_agents)],
    action_space=[discrete_actions for _ in range(n_agents)],
    net_config={'encoder_config': multi_input_config},
    INIT_HP=INIT_HP,
    population_size=INIT_HP["POP_SIZE"],
    num_envs=INIT_HP["NUM_ENVS"],
    device=device,
    # accelerator=accelerator
)

In [4]:
ind = agent_pop[0]

In [6]:
ind.clone()

<agilerl.algorithms.maddpg.MADDPG at 0x7f10d2bb7950>

In [5]:
ind = agent_pop[0]
ind.optimizer

OptimizerWrapper(
    optimizer=Adam,
    lr=0.0001,
    networks=['actor'],
    optimizer_kwargs={}
    multiagent=False
)

In [5]:
mutations = Mutations(
    "test",
    0,
    1,
    0.5,
    0,
    0,
    0,
    0.5,
    agent_ids=INIT_HP['AGENT_IDS'],
    device=device
)
new_population = [agent.clone(wrap=False) for agent in agent_pop]
mutated_population = [
    mutations.architecture_mutate(agent) for agent in new_population
]

[OptimizerConfig(name='optimizer', networks=['actor'], lr='lr', optimizer_cls='Adam', optimizer_kwargs={}, multiagent=False)]
[OptimizerConfig(name='optimizer', networks=['actor'], lr='lr', optimizer_cls='Adam', optimizer_kwargs={}, multiagent=False)]
[OptimizerConfig(name='optimizer', networks=['actor'], lr='lr', optimizer_cls='Adam', optimizer_kwargs={}, multiagent=False)]
[OptimizerConfig(name='optimizer', networks=['actor'], lr='lr', optimizer_cls='Adam', optimizer_kwargs={}, multiagent=False)]


In [9]:
ind = agent_pop[0]
print(ind.lr)
print(ind.learn_step)
print(ind.batch_size)

0.001
2048
128
