In [1]:
from gymnasium import spaces
import yaml
import torch

from agilerl.modules.configs import MlpNetConfig, CnnNetConfig, MultiInputNetConfig
from agilerl.networks.q_networks import QNetwork, RainbowQNetwork
from agilerl.networks.value_functions import ValueFunction
from agilerl.networks.actors import StochasticActor, DeterministicActor

from agilerl.algorithms.dqn import DQN
from agilerl.utils.utils import create_population

  from .autonotebook import tqdm as notebook_tqdm


### QNetwork

In [2]:
from tests.helper_functions import generate_dict_or_tuple_space
from agilerl.utils.evolvable_networks import is_image_space

img_space = spaces.Box(low=0, high=255, shape=(4, 84, 84))
vec_space = spaces.Box(low=-1, high=1, shape=(4,), dtype='float32')
dict_space = spaces.Dict({'img': img_space, 'vec': vec_space})
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

img_config = CnnNetConfig(
    channel_size=[16],
    kernel_size=[4],
    stride_size=[1],
)
vec_config = MlpNetConfig(
    hidden_size=[64],
)
multi_input_config = MultiInputNetConfig(
    channel_size=[8, 8, 8],
    kernel_size=[2, 2, 2],
    stride_size=[2, 2, 2],
    hidden_size=[32, 32, 32],
    vector_space_mlp=False
)

actor = RainbowQNetwork(
    observation_space=vec_space,
    action_space=spaces.Discrete(4),
    support=torch.linspace(-10, 10, 51),
    latent_dim=64,
    device=device
)

In [None]:
from numbers import Number
from dataclasses import dataclass, field
from typing import Any, Dict, Optional

@dataclass
class RLHyperparameter:
    min: float
    max: float
    shrink_factor: float = 0.8
    grow_factor: float = 1.2
    dtype: type = float
    value: Optional[Number] = field(default=None, init=False)

    def mutate(self) -> Number:
        assert self.value is not None, "Hyperparameter value is not set"

        # Equal probability of growing or shrinking 
        if torch.rand(1).item() < 0.5:
            if self.value * self.shrink_factor > self.min:
                self.value *= self.shrink_factor
            else:
                self.value = self.min
        else:
            if self.value * self.grow_factor < self.max:
                self.value *= self.grow_factor
            else:
                self.value = self.max

        return self.value

class HyperparameterConfig:
    """Stores the RL hyperparameters that will be mutated during training. For each
    hyperparameter, we store the name of the attribute where the hyperparameter is 
    stored, and the range of values that the hyperparameter can take."""

    def __init__(self, **kwargs: Dict[str, RLHyperparameter]):
        self.config = kwargs
        for key, value in kwargs.items():
            setattr(self, key, value)
    
    def items(self) -> Dict[str, Any]:
        return self.config.items()
    

HyperparameterConfig(lr=(1e-3, 1e-2), learn_step=(1000, 3000)).items()

dict_items([('lr', (0.001, 0.01)), ('learn_step', (1000, 3000))])

In [4]:
getattr(actor, 'add_latent_node')()

{'numb_new_nodes': 8}

In [7]:
actor.encoder.device

device(type='cuda')

In [7]:
actor

RainbowQNetwork(
  (encoder): EvolvableMLP(
    (model): Sequential(
      (encoder_linear_layer_1): Linear(in_features=4, out_features=16, bias=True)
      (encoder_layer_norm_1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
      (encoder_activation_1): ReLU()
      (encoder_linear_layer_output): Linear(in_features=16, out_features=64, bias=True)
      (encoder_activation_output): ReLU()
    )
  )
  (head_net): RainbowMLP(
    (model): Sequential(
      (value_linear_layer_1): NoisyLinear(in_features=64, out_features=32)
      (value_layer_norm_1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
      (value_activation_1): ReLU()
      (value_linear_layer_output): NoisyLinear(in_features=32, out_features=51)
      (value_activation_output): Identity()
    )
    (advantage_net): Sequential(
      (advantage_linear_layer_1): NoisyLinear(in_features=64, out_features=32)
      (advantage_layer_norm_1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
      (advantage_act

In [3]:
from agilerl.modules.cnn import EvolvableCNN
from agilerl.hpo.mutation import Mutations

In [4]:
from accelerate import Accelerator

with open('configs/training/dqn_rainbow.yaml') as f:
    config = yaml.safe_load(f)

vector_actions = spaces.Box(low=-1, high=1, shape=(4,), dtype='float32')
discrete_actions = spaces.Discrete(4)

# accelerator = Accelerator()
INIT_HP = config["INIT_HP"]
INIT_HP['AGENT_IDS'] = [f'agent_{i}' for i in range(4)]
n_agents = 4
agent_pop = create_population(
    algo=INIT_HP["ALGO"],
    observation_space=img_space,
    action_space=discrete_actions,
    net_config={'encoder_config': img_config},
    INIT_HP=INIT_HP,
    population_size=INIT_HP["POP_SIZE"],
    num_envs=INIT_HP["NUM_ENVS"],
    device=device,
    # accelerator=accelerator
)

In [5]:
mutations = Mutations(
    'PPO',
    0,
    1,
    0.5,
    0,
    0,
    0,
    ["batch_size", "lr", "learn_step"],
    0.5,
    device=device,)

new_population = [agent.clone(wrap=True) for agent in agent_pop]
mutated_population = mutations.mutation(new_population, True)
# print([ind.mut for ind in mutated_population])

In [14]:
idx = 1

orig_ind = agent_pop[idx]
mutated_ind = mutated_population[idx]

orig_ind.actor.advantage_net.state_dict()

OrderedDict([('model.advantage_linear_layer_1.weight_mu',
              tensor([[ 0.1434, -0.0175,  0.1293,  ..., -0.0698, -0.1398, -0.3118],
                      [ 0.1460, -0.0819,  0.1643,  ..., -0.1453, -0.2205, -0.0706],
                      [-0.2764,  0.2844,  0.0405,  ...,  0.2588, -0.1528, -0.0314],
                      ...,
                      [-0.1968,  0.1745, -0.2624,  ..., -0.0390,  0.0210, -0.0372],
                      [ 0.1461, -0.2313,  0.1959,  ..., -0.1141,  0.1632,  0.3119],
                      [ 0.0233,  0.1132,  0.0994,  ..., -0.2890, -0.1585, -0.0369]])),
             ('model.advantage_linear_layer_1.weight_sigma',
              tensor([[-0.2387,  0.2303,  0.0522,  ..., -0.1071,  0.3164,  0.0929],
                      [-0.0021,  0.1727,  0.0638,  ...,  0.0269, -0.0729,  0.0485],
                      [-0.0135,  0.0421, -0.0333,  ..., -0.0521,  0.0826, -0.1900],
                      ...,
                      [-0.0627, -0.1481, -0.2538,  ..., -0.1304,  0.

In [13]:
print(mutated_ind.actor.last_mutation_attr)
mutated_ind.actor.advantage_net.state_dict()

head_net.add_layer


OrderedDict([('model.advantage_linear_layer_1.weight_mu',
              tensor([[ 0.1434, -0.0175,  0.1293,  ..., -0.0698, -0.1398, -0.3118],
                      [ 0.1460, -0.0819,  0.1643,  ..., -0.1453, -0.2205, -0.0706],
                      [-0.2764,  0.2844,  0.0405,  ...,  0.2588, -0.1528, -0.0314],
                      ...,
                      [-0.1968,  0.1745, -0.2624,  ..., -0.0390,  0.0210, -0.0372],
                      [ 0.1461, -0.2313,  0.1959,  ..., -0.1141,  0.1632,  0.3119],
                      [ 0.0233,  0.1132,  0.0994,  ..., -0.2890, -0.1585, -0.0369]])),
             ('model.advantage_linear_layer_1.weight_sigma',
              tensor([[-0.2387,  0.2303,  0.0522,  ..., -0.1071,  0.3164,  0.0929],
                      [-0.0021,  0.1727,  0.0638,  ...,  0.0269, -0.0729,  0.0485],
                      [-0.0135,  0.0421, -0.0333,  ..., -0.0521,  0.0826, -0.1900],
                      ...,
                      [-0.0627, -0.1481, -0.2538,  ..., -0.1304,  0.