# Evolutionary Strategies tests

In [1]:
from environments.lunarlander import LunarLanderWrapper
import torch
import torch.nn as nn
import numpy as np


In [2]:
class LayerNorm(nn.Module):
    """
    Layer normalization module.
    """

    def __init__(self, features, eps=1e-6):
        super().__init__()
        self.gamma = nn.Parameter(torch.ones(features))
        self.beta = nn.Parameter(torch.zeros(features))
        self.eps = eps

    def forward(self, x):
        mean = x.mean(-1, keepdim=True)
        std = x.std(-1, keepdim=True)
        return self.gamma * (x - mean) / (std + self.eps) + self.beta


In [12]:
# test layernorm:
lnorm = LayerNorm(4)
for name, param in lnorm.named_parameters():
    print(name, param)
    print(len(param.shape))
    

gamma Parameter containing:
tensor([1., 1., 1., 1.], requires_grad=True)
1
beta Parameter containing:
tensor([0., 0., 0., 0.], requires_grad=True)
1


In [51]:
class Parameters:
    def __init__(self, conf={}, init=True):
        if not init:
            return
        
        use_cuda = False
        if hasattr(conf, 'disable_cuda'):
            use_cuda = not conf.disable_cuda and torch.cuda.is_available()
        
        self.device = torch.device("cuda" if use_cuda else "cpu")
        self.hidden_size = 6
        self.actor_num_layers = 2
        self.activation_layer = 'tanh'
        
        self.state_dim = 3
        self.action_dim = 3

In [15]:
activations = {
    'relu': nn.ReLU(),
    'tanh': nn.Tanh(),
    'leakyRelu': nn.LeakyReLU(),
}

In [21]:
# test list extension:
lst = []
lst.extend([[1, 2, 3], [4, 5, 6]])
[*lst]

[[1, 2, 3], [4, 5, 6]]

In [29]:
# test reshape:
s = torch.tensor([1, 2, 3, 4, 5, 6])
s.reshape(-1, 1)

tensor([[1],
        [2],
        [3],
        [4],
        [5],
        [6]])

In [52]:

class Actor(torch.nn.Module):
    def __init__(self, args: Parameters, init=False):
        super(Actor, self).__init__()
        self.args = args
        self.h = self.args.hidden_size
        self.L = self.args.actor_num_layers
        self.activation = activations[self.args.activation_layer]
        
        layers = []
        
        # input layer:
        layers.extend([
            nn.Linear(args.state_dim, self.h),
            self.activation,
        ])
        
        # hidden layer(s):
        for _ in range(self.L):
            layers.extend([
                nn.Linear(self.h, self.h),
                LayerNorm(self.h),
                self.activation,
            ])
        
        # output layer:
        layers.extend([
            nn.Linear(self.h, args.action_dim),
            nn.Tanh(),
        ])
        self.net = nn.Sequential(*layers)

    def forward(self, state: torch.tensor):
        return self.net(state)
    
    def select_action(self, state: torch.tensor):
        state = torch.FloatTensor(state.reshape(1, -1)).to(self.args.device)
        return self.forward(state).cpu().data.numpy().flatten()
    
    def extract_parameters(self):
        ''' Extract the parameters of the network and flatten it into a single vector. 
        This is used for the genetic algorithm. 
    
        Returns:
            torch.tensor: Flattened parameters of the network.
        '''
        tot_size = self.count_parameters()
        p_vec = torch.zeros(tot_size, dtype=torch.float32).to(self.args.device)
        i = 0
        for name, param in self.named_parameters():
            if 'lnorm' in name or len(param.shape) != 2:
                continue
            sz = param.numel()
            p_vec[i:i+sz] = param.view(-1)
            i += sz
        return p_vec.detach().clone()
            
    def inject_parameters(self, parameters):
        ''' Inject the parameters into the network. This is used for the genetic algorithm.
        
        Args:
            parameters (torch.tensor): Flattened parameters of the network.
        '''
        i = 0
        for name, param in self.named_parameters():
            if 'lnorm' in name or len(param.shape) != 2:
                continue
            sz = param.numel()
            raw = parameters[i:i+sz]
            reshaped = raw.reshape(param.shape)
            param.data.copy_(reshaped.data)
            i += sz
    
    def count_parameters(self):
        ''' Count the number of parameters in the network.'''
        count = 0
        for name, param in self.named_parameters():
            if 'lnorm' in name or len(param.shape) != 2:
                continue
            count += np.prod(param.shape)
        return count

In [54]:
# test actor:
actor = Actor(Parameters(), init=True)

# for name, param in actor.named_parameters():
#     print(param.data.view(-1))
print(actor.extract_parameters().shape)      

torch.Size([108])


In [None]:
class EvolutionStrategy:
    def __init__(self, actor: Actor, pop_size = 10):
        super(EvolutionStrategy, self).__init__()
        self.pop_size = pop_size
        self.pop = [actor for _ in range(pop_size)]
        
    def ask(self):
        pass
    
    def tell(self, fitness_lst):
        pass
    
    def result(self):
        pass
        

    