In [1]:
from gymnasium import spaces
import yaml
import torch

from agilerl.modules.configs import MlpNetConfig, CnnNetConfig, MultiInputNetConfig
from agilerl.networks.q_networks import QNetwork, RainbowQNetwork
from agilerl.networks.value_functions import ValueFunction
from agilerl.networks.actors import StochasticActor, DeterministicActor

from agilerl.algorithms.dqn import DQN
from agilerl.utils.utils import create_population

  from .autonotebook import tqdm as notebook_tqdm


### QNetwork

In [2]:
from tests.helper_functions import generate_dict_or_tuple_space

img_space = spaces.Box(low=0, high=255, shape=(4, 84, 84))
vec_space = spaces.Box(low=-1, high=1, shape=(4,), dtype='float32')
dict_space = spaces.Dict({'img': img_space, 'vec': vec_space})
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

img_config = CnnNetConfig(
    channel_size=[16, 16],
    kernel_size=[4, 4],
    stride_size=[1, 1],
)
vec_config = MlpNetConfig(
    hidden_size=[64],
)
multi_input_config = MultiInputNetConfig(
    channel_size=[8, 8, 8],
    kernel_size=[2, 2, 2],
    stride_size=[2, 2, 2],
    hidden_size=[32, 32, 32],
    vector_space_mlp=False
)

actor = StochasticActor(
    observation_space=img_space,
    action_space=spaces.Discrete(4),
    # support=torch.linspace(-10, 10, 51),
    encoder_config=img_config,
    latent_dim=64,
    device=device
)


In [25]:
actor

StochasticActor(
  (encoder): EvolvableCNN(
    (model): Sequential(
      (encoder_conv_layer_1): Conv2d(4, 16, kernel_size=(4, 4), stride=(1, 1))
      (encoder_layer_norm_1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (encoder_activation_1): ReLU()
      (encoder_conv_layer_2): Conv2d(16, 16, kernel_size=(4, 4), stride=(1, 1))
      (encoder_layer_norm_2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (encoder_activation_2): ReLU()
      (encoder_flatten): Flatten(start_dim=1, end_dim=-1)
      (encoder_linear_output): Linear(in_features=97344, out_features=64, bias=True)
      (encoder_output_activation): ReLU()
    )
  )
  (head_net): EvolvableDistribution(
    (_wrapped): EvolvableMLP(
      (model): Sequential(
        (actor_linear_layer_1): Linear(in_features=64, out_features=16, bias=True)
        (actor_layer_norm_1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
        (actor_activati

In [26]:
getattr(actor, 'head_net.add_layer')()

In [27]:
actor

StochasticActor(
  (encoder): EvolvableCNN(
    (model): Sequential(
      (encoder_conv_layer_1): Conv2d(4, 16, kernel_size=(4, 4), stride=(1, 1))
      (encoder_layer_norm_1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (encoder_activation_1): ReLU()
      (encoder_conv_layer_2): Conv2d(16, 16, kernel_size=(4, 4), stride=(1, 1))
      (encoder_layer_norm_2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (encoder_activation_2): ReLU()
      (encoder_flatten): Flatten(start_dim=1, end_dim=-1)
      (encoder_linear_output): Linear(in_features=97344, out_features=64, bias=True)
      (encoder_output_activation): ReLU()
    )
  )
  (head_net): EvolvableDistribution(
    (_wrapped): EvolvableMLP(
      (model): Sequential(
        (actor_linear_layer_1): Linear(in_features=64, out_features=16, bias=True)
        (actor_layer_norm_1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
        (actor_activati

In [3]:
sample_dict = {
    'img': torch.rand((1, 4, 84, 84), device=device),
    'vec': torch.rand((1, 4), device=device)
}
out = q_network(sample_dict)

In [3]:
from agilerl.modules.cnn import EvolvableCNN
from agilerl.hpo.mutation import Mutations

In [4]:
from accelerate import Accelerator

with open('configs/training/ppo.yaml') as f:
    config = yaml.safe_load(f)

vector_actions = spaces.Box(low=-1, high=1, shape=(4,), dtype='float32')
discrete_actions = spaces.Discrete(4)

accelerator = Accelerator()
INIT_HP = config["INIT_HP"]
INIT_HP['AGENT_IDS'] = [f'agent_{i}' for i in range(4)]
n_agents = 4
agent_pop = create_population(
    algo=INIT_HP["ALGO"],
    observation_space=img_space,
    action_space=vector_actions,
    net_config={'encoder_config': img_config},
    INIT_HP=INIT_HP,
    population_size=INIT_HP["POP_SIZE"],
    num_envs=INIT_HP["NUM_ENVS"],
    device=device,
    accelerator=accelerator
)

In [5]:
mutations = Mutations(
    'PPO',
    0,
    1,
    0.5,
    0,
    0,
    0,
    ["batch_size", "lr", "learn_step"],
    0.5)

new_population = [agent.clone(wrap=False) for agent in agent_pop]
mutated_population = mutations.mutation(new_population, True)
print([ind.mut for ind in mutated_population])

Mutating actor with method head_net.remove_layer
Entering mutation context, setting last mutation head_net.remove_layer
Entering mutation context, setting last mutation remove_layer
Entering mutation context, setting last mutation remove_layer
Entering mutation context, setting last mutation add_node
head_net.remove_layer
Entering mutation context, setting last mutation head_net.remove_layer
Entering mutation context, setting last mutation remove_layer


In [6]:
mutated_population[0].actor

StochasticActor(
  (encoder): EvolvableCNN(
    (model): Sequential(
      (encoder_conv_layer_1): Conv2d(4, 16, kernel_size=(4, 4), stride=(1, 1))
      (encoder_layer_norm_1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (encoder_activation_1): ReLU()
      (encoder_conv_layer_2): Conv2d(16, 16, kernel_size=(4, 4), stride=(1, 1))
      (encoder_layer_norm_2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (encoder_activation_2): ReLU()
      (encoder_flatten): Flatten(start_dim=1, end_dim=-1)
      (encoder_linear_output): Linear(in_features=97344, out_features=32, bias=True)
      (encoder_output_activation): ReLU()
    )
  )
  (head_net): EvolvableDistribution(
    (_wrapped): EvolvableMLP(
      (model): Sequential(
        (actor_linear_layer_1): Linear(in_features=32, out_features=16, bias=True)
        (actor_layer_norm_1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
        (actor_activati

In [7]:
mutated_population[0].critic

ValueFunction(
  (encoder): EvolvableCNN(
    (model): Sequential(
      (encoder_conv_layer_1): Conv2d(4, 16, kernel_size=(4, 4), stride=(1, 1))
      (encoder_layer_norm_1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (encoder_activation_1): ReLU()
      (encoder_conv_layer_2): Conv2d(16, 16, kernel_size=(4, 4), stride=(1, 1))
      (encoder_layer_norm_2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (encoder_activation_2): ReLU()
      (encoder_flatten): Flatten(start_dim=1, end_dim=-1)
      (encoder_linear_output): Linear(in_features=97344, out_features=32, bias=True)
      (encoder_output_activation): ReLU()
    )
  )
  (head_net): EvolvableMLP(
    (model): Sequential(
      (value_linear_layer_1): Linear(in_features=32, out_features=16, bias=True)
      (value_layer_norm_1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
      (value_activation_1): ReLU()
      (value_linear_layer_2): Linea

In [33]:
getattr(agent_pop[0].actor, 'head_net.add_layer')()

In [34]:
agent_pop[0].actor

StochasticActor(
  (encoder): EvolvableCNN(
    (model): Sequential(
      (encoder_conv_layer_1): Conv2d(4, 16, kernel_size=(4, 4), stride=(1, 1))
      (encoder_layer_norm_1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (encoder_activation_1): ReLU()
      (encoder_conv_layer_2): Conv2d(16, 16, kernel_size=(4, 4), stride=(1, 1))
      (encoder_layer_norm_2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (encoder_activation_2): ReLU()
      (encoder_flatten): Flatten(start_dim=1, end_dim=-1)
      (encoder_linear_output): Linear(in_features=97344, out_features=32, bias=True)
      (encoder_output_activation): ReLU()
    )
  )
  (head_net): EvolvableDistribution(
    (_wrapped): EvolvableMLP(
      (model): Sequential(
        (actor_linear_layer_1): Linear(in_features=32, out_features=16, bias=True)
        (actor_layer_norm_1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
        (actor_activati

In [17]:
agent_pop[0].critic

ValueFunction(
  (encoder): EvolvableCNN(
    (model): Sequential(
      (encoder_conv_layer_1): Conv2d(4, 16, kernel_size=(4, 4), stride=(1, 1))
      (encoder_layer_norm_1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (encoder_activation_1): ReLU()
      (encoder_conv_layer_2): Conv2d(16, 16, kernel_size=(4, 4), stride=(1, 1))
      (encoder_layer_norm_2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (encoder_activation_2): ReLU()
      (encoder_flatten): Flatten(start_dim=1, end_dim=-1)
      (encoder_linear_output): Linear(in_features=97344, out_features=32, bias=True)
      (encoder_output_activation): ReLU()
    )
  )
  (head_net): EvolvableMLP(
    (model): Sequential(
      (value_linear_layer_1): Linear(in_features=32, out_features=64, bias=True)
      (value_layer_norm_1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (value_activation_1): ReLU()
      (value_linear_layer_output): 

In [12]:
getattr(agent_pop[0].actor, "encoder.add_layer")()

In [10]:
agent_pop[0].actor.head_net.net_config

{'hidden_size': [48],
 'activation': 'ReLU',
 'output_activation': None,
 'min_hidden_layers': 1,
 'max_hidden_layers': 3,
 'min_mlp_nodes': 64,
 'max_mlp_nodes': 500,
 'layer_norm': True,
 'init_layers': True,
 'output_vanish': True,
 'noise_std': 0.5,
 'new_gelu': False,
 'noisy': False}

In [21]:
dict(agent_pop[0].actor.advantage_net.model[0].named_parameters())['weight_mu'].shape

torch.Size([64, 32])

In [11]:
agent_pop[0].actor.last_mutation_attr

'head_net.add_node'