In [1]:
import torch
import torch.nn as nn
import gym
from itertools import count
import numpy as np
import random
import torch.nn.functional as F

envs = ['CartPole-v1','Acrobot-v1','MountainCar-v0','Pendulum-v0','BipedalWalker-v2']
env = gym.make(envs[0]).unwrapped

discrete_actions = True
#TODO
#parralel fitness measuring


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [2]:
#get model parameters as vector
def get_params(model):
    params = []
    for p in model.parameters():
        view = p.view(p.numel())
        params.append(view)
    params = torch.cat(params, dim=0)
    return params


device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
class Creature(nn.Module):
    def __init__(self):
        super(Creature, self).__init__()
    
        self.layer1 = nn.Linear(env.observation_space.shape[0], 6)
        self.layer2 = nn.Linear(6, 6)
        
        if discrete_actions:
            self.layer3 = nn.Linear(6, env.action_space.n)
        else:
            self.layer3 = nn.Linear(6, env.action_space.shape[0])
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        return out

class Encoder(nn.Module):
    def __init__(self, input_num, hidden_num):
        super(Encoder, self).__init__()
    
        self.layer1 = nn.Sequential(
            nn.Conv1d(1, 8, 3, stride=1, padding=1),  
            nn.ReLU(True),
            nn.BatchNorm1d(8),
            nn.Conv1d(8, 4, 3, stride=1, padding=1),  
            nn.ReLU(True),
            nn.BatchNorm1d(4),
            nn.MaxPool1d(2, stride=2))
        
        self.layer2 = nn.Sequential(
            nn.Conv1d(4, 16, 3, stride=1, padding=1),  
            nn.ReLU(True),
            nn.BatchNorm1d(16),
            nn.Conv1d(16, 8, 3, stride=1, padding=1),  
            nn.ReLU(True),
            nn.BatchNorm1d(8),
            nn.MaxPool1d(2, stride=2))
        
        self.layer3 = nn.Sequential(
            nn.Conv1d(8, 32, 3, stride=1, padding=1),  
            nn.ReLU(True),
            nn.BatchNorm1d(32),
            nn.Conv1d(32, 16, 3, stride=1, padding=1),  
            nn.ReLU(True),
            nn.BatchNorm1d(16),
            nn.MaxPool1d(2, stride=2))
        
        
        self.layer4 = nn.Linear(160, 50)
        self.layer5 = nn.Linear(50, hidden_num)
    def forward(self, x):
        x = x.unsqueeze(1)
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)

        out = out.view(out.size(0),out.size(1)*out.size(2))
        out = F.relu(self.layer4(out))
        out = self.layer5(out)
        return out

class Decoder(nn.Module):
    def __init__(self, input_num, hidden_num):
        super(Decoder, self).__init__()
    
        self.layer1 = nn.Sequential(
            nn.ConvTranspose1d(1, 32, 9, stride=1, padding=0),  
            nn.BatchNorm1d(32),
            nn.ReLU(True),
            nn.ConvTranspose1d(32, 16, 9, stride=1, padding=0),  
            nn.BatchNorm1d(16),
            nn.ReLU(True))
        
        
        self.layer2 = nn.Sequential(
            nn.ConvTranspose1d(16,  8, 9, stride=1, padding=0),  
            nn.BatchNorm1d(8),
            nn.ReLU(True),
            nn.ConvTranspose1d(8,  4, 9, stride=1, padding=0),  
            nn.BatchNorm1d(4),
            nn.ReLU(True))
        
        self.layer3 = nn.Sequential(
            nn.ConvTranspose1d(4, 2, 9, stride=1, padding=0),  
            nn.BatchNorm1d(2),
            nn.ReLU(True),
            nn.ConvTranspose1d(2, 1, 9, stride=1, padding=0),  
            nn.BatchNorm1d(1),
            nn.ReLU(True))
        
        self.layer4 = nn.Linear(73, 55)
        self.layer5 = nn.Linear(55, input_num)
        
    def forward(self, x):
        x = x.unsqueeze(1)
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0),out.size(1)*out.size(2))
        
        out = F.relu(self.layer4(out))
        out = self.layer5(out)
        return out
input_num = get_params(Creature()).numel()
hidden_num = 25
enc_model = Encoder(input_num,hidden_num)
print(enc_model(torch.zeros([100,input_num])).shape)


dec_model = Decoder(input_num,hidden_num)
print(dec_model(torch.zeros([100,hidden_num])).shape)

torch.Size([100, 25])
torch.Size([100, 86])


In [3]:
#measure creature fitness
def measure_fitness(creature,render = False,max_steps = 1000,n_behavior_samples = 10):
    observation = env.reset()
    #creature fitness is cumulative reward in simulation
    total_reward = 0
    
    #sample behavior from episode for autoencoder training
    behavior_samples = []
    for i in range(max_steps):
        
        if render:
            
            env.render()
            
        #convert observation into tensor
        obs = torch.from_numpy(observation).to(device).type('torch.cuda.FloatTensor')
       
        #get action
        if discrete_actions:
            action = creature(obs)
            sample = (obs,action)
            action = action.max(-1)[1].item()
        else:
            action = creature(obs)
            sample = (obs,action)
            action = action.detach().cpu().numpy()
            
        #add current sample to all behavior samples
        
        behavior_samples.append(sample)
        
        observation, reward, done, _ = env.step(action)
        
        total_reward += reward
        
        if done:
            break
    
    #reshape behavior samples into tensors
    behavior_samples = random.sample(behavior_samples,min(n_behavior_samples,len(behavior_samples)))
    act,obs = zip(*behavior_samples)
    return total_reward, torch.stack(act,0), torch.stack(obs,0)

#measure fitness of entire population and return scores
def measure_population_fitness(population,max_steps = 1000,n_behavior_samples = 10):
    scores = []
    actions = []
    observations = []
    for idx,p in enumerate(population):
       #print("measuring fitness : {}".format(idx))
        fitness, act,obs = measure_fitness(p,max_steps = max_steps,n_behavior_samples=n_behavior_samples)
        scores.append(fitness)
        
        actions.append(act)
        observations.append(obs)

    return np.array(scores),(actions,observations)

In [4]:
def mutate(creature,mutation_rate=0.25):
    new = Creature().to(device)
    new.load_state_dict(creature.state_dict()) 
    for p in new.parameters():

        mutation = np.random.normal(scale = 0.07,size = p.data.shape)
        mutation *= np.random.choice([1, 0], p.data.shape,p=[mutation_rate,1-mutation_rate])
        mutation = torch.from_numpy(mutation).type('torch.FloatTensor').to(device)
        p.data += mutation
    return new


def mate(mom,dad,apply_mutation = True,dominance = 0.5,mutation_rate=0.2):
    child = Creature()
    
    enc_m1 = enc(get_params(mom).unsqueeze(0)).squeeze(0)
    enc_m2 = enc(get_params(dad).unsqueeze(0)).squeeze(0)
    

    r = np.random.choice([True, False], enc_m1.numel(),p=[dominance,1-dominance])
    mixed = np.zeros([enc_m1.numel()])
    mixed[r] = enc_m1.cpu().detach().numpy()[r]
    mixed[np.invert(r)] =  enc_m2.cpu().detach().numpy()[np.invert(r)]
    
    #if apply_mutation:
    #    mutation = np.random.normal(scale = 0.07,size = mixed.shape)
    #    mutation *= np.random.choice([1, 0], mixed.shape,p=[mutation_rate,1-mutation_rate])
    #    mixed += mutation
    
    mixed = torch.from_numpy(mixed).to(device).type("torch.cuda.FloatTensor").unsqueeze(0)
    decoded = dec(mixed)
    
    
    child = set_params(child,decoded.squeeze())
    
    if apply_mutation:
        child = mutate(child)
    return child

def get_pick_probabilities(p_fitness):
    normed = p_fitness- np.mean(p_fitness)
    normed -= np.min(normed)
    normed = np.power(normed, 0.5)
    pick_probabilities = normed/np.sum(normed)
    return pick_probabilities

def evolve(population,pf_fitness,mutate):
    p_fitness_positive = p_fitness - np.min(p_fitness) + 1
    pick_probabilities = get_pick_probabilities(pf_fitness)
    
    
    choice = np.random.choice(pick_probabilities.size,population_size, p = pick_probabilities)
    
    #print(np.sort(choice))
    new_population = []
    
    for p in range(len(population)-1):
        first_choice = population[choice[p]]
        second_choice = population[choice[p+1]]
        #more succesful(healthier?) creature has greater genetic dominance
        
        if p_fitness_positive[p] >= p_fitness_positive[p+1]:
            dominance = (p_fitness_positive[p+1]/p_fitness_positive[p])*0.7
            child = mate(first_choice,second_choice, mutate,dominance).to(device)
        else:
            dominance = (p_fitness_positive[p]/p_fitness_positive[p+1])*0.7
            child = mate(second_choice,first_choice, mutate,dominance).to(device)
            
        new_population.append(child)
    child = mate(population[0],population[len(population)-1]).to(device) 
    new_population.append(child)
    
    return new_population



In [5]:

#turn vector into model parameters
def set_params(model,data):
    idx = 0
    for p in model.parameters():
        view = data[idx:idx+p.numel()].view(p.shape)
        p.data = view
        idx+=p.numel()
    return model

def run_params(model,model_data,input):
    idx = 0
    out = input
    for i,p in enumerate(model.parameters()):
        view = model_data[idx:idx+p.numel()].view(p.size())
    
        
        idx+=p.numel()
        
        
        if i % 2 == 0:
            out = torch.matmul(out, view.transpose(0,1))
        else :
            out += view
            
    return out



#initialise autoencoder

lr = 0.001
enc = Encoder(input_num,hidden_num).to(device)  
enc_optimizer = torch.optim.Adam(enc.parameters(), lr=lr)

dec = Decoder(input_num,hidden_num).to(device)  
dec_optimizer = torch.optim.Adam(dec.parameters(), lr=lr)    

def train_autoencoder(population_, p_fitness, behavior_samples,n_behavior_samples,batch_size = 5):
    #run scenarios and compare to original behavior to calculate loss

    
    pick_probabilities = get_pick_probabilities(p_fitness)
    print(np.sort(pick_probabilities))
    choice = np.random.choice(pick_probabilities.size,population_size, p = pick_probabilities)
    population = np.array(population_)[choice]
    #np.random.shuffle(population)
    for i in range(len(population)//batch_size):

        enc_optimizer.zero_grad()
        dec_optimizer.zero_grad()

        batch = []
        for b in range(min(batch_size,len(population))):
            batch.append(get_params(population[(i*batch_size)+b]).unsqueeze(0))

        data = torch.cat(batch, dim=0).to(device)
        enc_out = enc(data)
        dec_out = dec(enc_out)

        new_loss_actions = []
        old_loss_actions = []
        for idx,d in enumerate(dec_out):
            loss_model = Creature().to(device)
            loss_model = set_params(loss_model,d)
            #new_loss_action = loss_model(behavior_samples[0][i+idx])
            #print(new_loss_action)

            new_loss_action = run_params(loss_model,d,behavior_samples[0][i+idx])

            zeros = torch.zeros([1,n_behavior_samples,new_loss_action.size(-1)])                
            zeros[:,0:new_loss_action.shape[0]] = new_loss_action

            new_loss_actions.append(zeros)

            old_loss_action = population[idx](behavior_samples[0][i+idx])

            zeros = torch.zeros([n_behavior_samples,old_loss_action.size(-1)])   
            zeros[0:old_loss_action.shape[0]] = old_loss_action

            old_loss_actions.append(zeros)


        new_loss_actions = torch.stack(new_loss_actions).squeeze(1)
        old_loss_actions = torch.stack(old_loss_actions)
        #print(old_loss_actions[0][0],new_loss_actions[0][0])
        # print(new_loss_actions,old_loss_actions)
        #loss = nn.L1Loss()(old_loss_actions, new_loss_actions)
        #loss = nn.MSELoss()(old_loss_actions, new_loss_actions)
        loss = nn.MSELoss()(dec_out, data)

        loss.backward()#retain_graph=True)
        enc_optimizer.step()
        dec_optimizer.step()
        #print(get_params(enc))
    return loss

#train_autoencoder(population,batch_size = 5,n_epochs = 15) 

In [None]:
#randomly inititialise starting population
population_size = 20
population = []
for p in range(population_size):
    population.append(Creature().to(device))
    
    
print("starting training")
n_generations = 100
batch_size = 20
    
for i in range(n_generations):
    
    #if i < 10:
    #    train_autoencoder(population,batch_size = batch_size,n_epochs = 100//(i+1))
    #else:
    n_behavior_samples = 50
    p_fitness, behavior_samples = measure_population_fitness(population, max_steps = 500, 
                                                             n_behavior_samples=n_behavior_samples)
    
    if i < 0:
        n_epochs = 100//(i+1)
    else:
        n_epochs = 4000
    for e in range(n_epochs):    
        loss = train_autoencoder(population, p_fitness, behavior_samples,n_behavior_samples, batch_size = batch_size)
        if loss < 1 and e > 5:
            break
        print("Epoch : {}  Loss : {}".format(e,loss))
        
    enc_optimizer = torch.optim.Adam(enc.parameters(), lr=lr)
    dec_optimizer = torch.optim.Adam(dec.parameters(), lr=lr)    
    
    population = evolve(population,p_fitness,False)
    
    #if len(population) <= 100:
    #    population.append(Creature().to(device))
    if i % 1 == 0:
        fitness = measure_fitness(population[np.argmax(p_fitness)],render = True)
    print("Generation {}  fitness : {}".format(i+1,np.max(p_fitness)))
    
#population, p_fitness = evolve(population)
#measure_fitness(population[np.argmax(p_fitness)],render = True)

starting training
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 0  Loss : 121.60360717773438
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 1  Loss : 122.37117004394531
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 2  Loss : 121.27346801757812
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.0685

Epoch : 33  Loss : 91.66342163085938
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 34  Loss : 95.24954986572266
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 35  Loss : 90.61337280273438
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 36  Loss : 94.49488067626953
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.0559479

Epoch : 68  Loss : 59.955928802490234
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 69  Loss : 61.51817321777344
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 70  Loss : 53.541954040527344
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 71  Loss : 53.87379837036133
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594

Epoch : 104  Loss : 35.77686309814453
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 105  Loss : 29.976903915405273
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 106  Loss : 28.193281173706055
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 107  Loss : 38.3285026550293
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05

Epoch : 137  Loss : 13.460651397705078
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 138  Loss : 14.494893074035645
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 139  Loss : 15.396072387695312
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 140  Loss : 14.429115295410156
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0

Epoch : 170  Loss : 8.32304573059082
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 171  Loss : 7.352869510650635
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 172  Loss : 7.167409896850586
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 173  Loss : 6.388106822967529
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.0559

Epoch : 206  Loss : 2.7482595443725586
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 207  Loss : 2.34830379486084
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 208  Loss : 3.193728446960449
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 209  Loss : 3.2864861488342285
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05

Epoch : 238  Loss : 1.200026512145996
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 239  Loss : 1.5525354146957397
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Epoch : 240  Loss : 1.233745813369751
[0.         0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119 0.03956119
 0.05594797 0.05594797 0.05594797 0.05594797 0.05594797 0.06852199
 0.10466908 0.11189595]
Generation 1  fitness : 16.0
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.05811412 0.05811412
 0.05811412 0.05811412 0.05811412 0.05811412 0.08218578

Epoch : 30  Loss : 3.6619575023651123
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.05811412 0.05811412
 0.05811412 0.05811412 0.05811412 0.05811412 0.08218578 0.10065661
 0.18377299 0.28469989]
Epoch : 31  Loss : 2.147502899169922
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.05811412 0.05811412
 0.05811412 0.05811412 0.05811412 0.05811412 0.08218578 0.10065661
 0.18377299 0.28469989]
Epoch : 32  Loss : 2.5491340160369873
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.05811412 0.05811412
 0.05811412 0.05811412 0.05811412 0.05811412 0.08218578 0.10065661
 0.18377299 0.28469989]
Epoch : 33  Loss : 1.803558111190796
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.05811412 0.05811412
 0.05811412 0.05811412 0.05811412 0.05811412 0.08218578 0.10065661
 0.183

Epoch : 14  Loss : 6.466629981994629
[0.         0.         0.         0.         0.02911582 0.02911582
 0.02911582 0.02911582 0.02911582 0.02911582 0.02911582 0.04117599
 0.04117599 0.04117599 0.04117599 0.04117599 0.05043009 0.08235199
 0.17225154 0.28527565]
Epoch : 15  Loss : 5.173461437225342
[0.         0.         0.         0.         0.02911582 0.02911582
 0.02911582 0.02911582 0.02911582 0.02911582 0.02911582 0.04117599
 0.04117599 0.04117599 0.04117599 0.04117599 0.05043009 0.08235199
 0.17225154 0.28527565]
Epoch : 16  Loss : 4.3166351318359375
[0.         0.         0.         0.         0.02911582 0.02911582
 0.02911582 0.02911582 0.02911582 0.02911582 0.02911582 0.04117599
 0.04117599 0.04117599 0.04117599 0.04117599 0.05043009 0.08235199
 0.17225154 0.28527565]
Epoch : 17  Loss : 4.416666030883789
[0.         0.         0.         0.         0.02911582 0.02911582
 0.02911582 0.02911582 0.02911582 0.02911582 0.02911582 0.04117599
 0.04117599 0.04117599 0.04117599 0.041175

Epoch : 9  Loss : 3.683825731277466
[0.         0.03228372 0.03228372 0.03228372 0.03228372 0.03228372
 0.03228372 0.03228372 0.04565608 0.04565608 0.04565608 0.04565608
 0.04565608 0.05591704 0.05591704 0.06456744 0.07907864 0.09131215
 0.09685116 0.10209009]
Epoch : 10  Loss : 3.6890194416046143
[0.         0.03228372 0.03228372 0.03228372 0.03228372 0.03228372
 0.03228372 0.03228372 0.04565608 0.04565608 0.04565608 0.04565608
 0.04565608 0.05591704 0.05591704 0.06456744 0.07907864 0.09131215
 0.09685116 0.10209009]
Epoch : 11  Loss : 2.3966104984283447
[0.         0.03228372 0.03228372 0.03228372 0.03228372 0.03228372
 0.03228372 0.03228372 0.04565608 0.04565608 0.04565608 0.04565608
 0.04565608 0.05591704 0.05591704 0.06456744 0.07907864 0.09131215
 0.09685116 0.10209009]
Epoch : 12  Loss : 2.648798942565918
[0.         0.03228372 0.03228372 0.03228372 0.03228372 0.03228372
 0.03228372 0.03228372 0.04565608 0.04565608 0.04565608 0.04565608
 0.04565608 0.05591704 0.05591704 0.064567

Epoch : 41  Loss : 1.063585638999939
[0.         0.03228372 0.03228372 0.03228372 0.03228372 0.03228372
 0.03228372 0.03228372 0.04565608 0.04565608 0.04565608 0.04565608
 0.04565608 0.05591704 0.05591704 0.06456744 0.07907864 0.09131215
 0.09685116 0.10209009]
Epoch : 42  Loss : 1.181473731994629
[0.         0.03228372 0.03228372 0.03228372 0.03228372 0.03228372
 0.03228372 0.03228372 0.04565608 0.04565608 0.04565608 0.04565608
 0.04565608 0.05591704 0.05591704 0.06456744 0.07907864 0.09131215
 0.09685116 0.10209009]
Generation 4  fitness : 18.0
[0.         0.         0.         0.02842774 0.02842774 0.02842774
 0.02842774 0.04020289 0.04020289 0.04020289 0.04020289 0.04923829
 0.04923829 0.05685548 0.06356636 0.06963345 0.07521273 0.11721057
 0.12060868 0.12391364]
Epoch : 0  Loss : 6.609247207641602
[0.         0.         0.         0.02842774 0.02842774 0.02842774
 0.02842774 0.04020289 0.04020289 0.04020289 0.04020289 0.04923829
 0.04923829 0.05685548 0.06356636 0.06963345 0.07521

Generation 6  fitness : 34.0
[0.         0.01505672 0.01844064 0.02129341 0.02380676 0.02816854
 0.03011343 0.03194012 0.04123452 0.04258682 0.04389749 0.04517015
 0.04640792 0.05428776 0.05532191 0.06388024 0.08038087 0.08450561
 0.11064382 0.16286329]
Epoch : 0  Loss : 0.24632209539413452
[0.         0.01505672 0.01844064 0.02129341 0.02380676 0.02816854
 0.03011343 0.03194012 0.04123452 0.04258682 0.04389749 0.04517015
 0.04640792 0.05428776 0.05532191 0.06388024 0.08038087 0.08450561
 0.11064382 0.16286329]
Epoch : 1  Loss : 4.648220062255859
[0.         0.01505672 0.01844064 0.02129341 0.02380676 0.02816854
 0.03011343 0.03194012 0.04123452 0.04258682 0.04389749 0.04517015
 0.04640792 0.05428776 0.05532191 0.06388024 0.08038087 0.08450561
 0.11064382 0.16286329]
Epoch : 2  Loss : 0.7969523072242737
[0.         0.01505672 0.01844064 0.02129341 0.02380676 0.02816854
 0.03011343 0.03194012 0.04123452 0.04258682 0.04389749 0.04517015
 0.04640792 0.05428776 0.05532191 0.06388024 0.0803

Generation 11  fitness : 20.0
[0.         0.         0.         0.03458113 0.03458113 0.03458113
 0.03458113 0.03458113 0.04890511 0.04890511 0.04890511 0.05989628
 0.05989628 0.05989628 0.06916227 0.06916227 0.06916227 0.06916227
 0.07732577 0.14671533]
Epoch : 0  Loss : 0.18134771287441254
[0.         0.         0.         0.03458113 0.03458113 0.03458113
 0.03458113 0.03458113 0.04890511 0.04890511 0.04890511 0.05989628
 0.05989628 0.05989628 0.06916227 0.06916227 0.06916227 0.06916227
 0.07732577 0.14671533]
Epoch : 1  Loss : 1.8390421867370605
[0.         0.         0.         0.03458113 0.03458113 0.03458113
 0.03458113 0.03458113 0.04890511 0.04890511 0.04890511 0.05989628
 0.05989628 0.05989628 0.06916227 0.06916227 0.06916227 0.06916227
 0.07732577 0.14671533]
Epoch : 2  Loss : 0.2791651785373688
[0.         0.         0.         0.03458113 0.03458113 0.03458113
 0.03458113 0.03458113 0.04890511 0.04890511 0.04890511 0.05989628
 0.05989628 0.05989628 0.06916227 0.06916227 0.06

Epoch : 5  Loss : 0.4552465081214905
[0.         0.         0.03273112 0.03273112 0.03273112 0.04628879
 0.04628879 0.04628879 0.04628879 0.04628879 0.05669196 0.05669196
 0.05669196 0.05669196 0.06546224 0.06546224 0.06546224 0.06546224
 0.07318901 0.10855684]
Generation 16  fitness : 22.0
[0.         0.03125642 0.03125642 0.03125642 0.04420325 0.04420325
 0.04420325 0.04420325 0.0541377  0.0541377  0.0541377  0.0541377
 0.0541377  0.0541377  0.06251283 0.06251283 0.06989147 0.06989147
 0.06989147 0.06989147]
Epoch : 0  Loss : 0.0008036635699681938
[0.         0.03125642 0.03125642 0.03125642 0.04420325 0.04420325
 0.04420325 0.04420325 0.0541377  0.0541377  0.0541377  0.0541377
 0.0541377  0.0541377  0.06251283 0.06251283 0.06989147 0.06989147
 0.06989147 0.06989147]
Epoch : 1  Loss : 1.9596394300460815
[0.         0.03125642 0.03125642 0.03125642 0.04420325 0.04420325
 0.04420325 0.04420325 0.0541377  0.0541377  0.0541377  0.0541377
 0.0541377  0.0541377  0.06251283 0.06251283 0.069

Epoch : 2  Loss : 0.5332669019699097
[0.         0.         0.03677089 0.03677089 0.03677089 0.03677089
 0.0520019  0.0520019  0.0520019  0.0520019  0.0520019  0.06368905
 0.06368905 0.06368905 0.06368905 0.06368905 0.06368905 0.06368905
 0.07354179 0.07354179]
Epoch : 3  Loss : 0.36603501439094543
[0.         0.         0.03677089 0.03677089 0.03677089 0.03677089
 0.0520019  0.0520019  0.0520019  0.0520019  0.0520019  0.06368905
 0.06368905 0.06368905 0.06368905 0.06368905 0.06368905 0.06368905
 0.07354179 0.07354179]
Epoch : 4  Loss : 0.3726639747619629
[0.         0.         0.03677089 0.03677089 0.03677089 0.03677089
 0.0520019  0.0520019  0.0520019  0.0520019  0.0520019  0.06368905
 0.06368905 0.06368905 0.06368905 0.06368905 0.06368905 0.06368905
 0.07354179 0.07354179]
Epoch : 5  Loss : 0.28970223665237427
[0.         0.         0.03677089 0.03677089 0.03677089 0.03677089
 0.0520019  0.0520019  0.0520019  0.0520019  0.0520019  0.06368905
 0.06368905 0.06368905 0.06368905 0.06368

Generation 25  fitness : 13.0
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.07621396 0.07621396
 0.07621396 0.07621396 0.10778282 0.10778282 0.10778282 0.10778282
 0.13200645 0.13200645]
Epoch : 0  Loss : 3.924767588614486e-05
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.07621396 0.07621396
 0.07621396 0.07621396 0.10778282 0.10778282 0.10778282 0.10778282
 0.13200645 0.13200645]
Epoch : 1  Loss : 0.8962211608886719
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.07621396 0.07621396
 0.07621396 0.07621396 0.10778282 0.10778282 0.10778282 0.10778282
 0.13200645 0.13200645]
Epoch : 2  Loss : 0.07968057692050934
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.07621396 0.07621396
 0.07621396 0.07621396 0.10778282 0.10778282 0.10778282 0.10778282
 0.13200645

Generation 30  fitness : 11.0
[0.         0.         0.         0.05025253 0.05025253 0.05025253
 0.05025253 0.05025253 0.05025253 0.05025253 0.05025253 0.05025253
 0.05025253 0.07106781 0.07106781 0.07106781 0.07106781 0.07106781
 0.07106781 0.07106781]
Epoch : 0  Loss : 0.3135145306587219
[0.         0.         0.         0.05025253 0.05025253 0.05025253
 0.05025253 0.05025253 0.05025253 0.05025253 0.05025253 0.05025253
 0.05025253 0.07106781 0.07106781 0.07106781 0.07106781 0.07106781
 0.07106781 0.07106781]
Epoch : 1  Loss : 0.6391530632972717
[0.         0.         0.         0.05025253 0.05025253 0.05025253
 0.05025253 0.05025253 0.05025253 0.05025253 0.05025253 0.05025253
 0.05025253 0.07106781 0.07106781 0.07106781 0.07106781 0.07106781
 0.07106781 0.07106781]
Epoch : 2  Loss : 0.22887635231018066
[0.         0.         0.         0.05025253 0.05025253 0.05025253
 0.05025253 0.05025253 0.05025253 0.05025253 0.05025253 0.05025253
 0.05025253 0.07106781 0.07106781 0.07106781 0.07

Epoch : 3  Loss : 0.14932872354984283
[0.         0.         0.         0.         0.04451691 0.04451691
 0.04451691 0.04451691 0.04451691 0.06295642 0.06295642 0.06295642
 0.06295642 0.06295642 0.07710555 0.07710555 0.07710555 0.07710555
 0.07710555 0.07710555]
Epoch : 4  Loss : 0.4881782829761505
[0.         0.         0.         0.         0.04451691 0.04451691
 0.04451691 0.04451691 0.04451691 0.06295642 0.06295642 0.06295642
 0.06295642 0.06295642 0.07710555 0.07710555 0.07710555 0.07710555
 0.07710555 0.07710555]
Epoch : 5  Loss : 0.38306620717048645
[0.         0.         0.         0.         0.04451691 0.04451691
 0.04451691 0.04451691 0.04451691 0.06295642 0.06295642 0.06295642
 0.06295642 0.06295642 0.07710555 0.07710555 0.07710555 0.07710555
 0.07710555 0.07710555]
Generation 35  fitness : 11.0
[0.         0.0424514  0.0424514  0.0424514  0.0424514  0.0424514
 0.0424514  0.0424514  0.0424514  0.06003535 0.06003535 0.06003535
 0.06003535 0.06003535 0.06003535 0.06003535 0.06

Epoch : 2  Loss : 0.09447141736745834
[0.         0.03883118 0.03883118 0.03883118 0.03883118 0.03883118
 0.05491558 0.05491558 0.05491558 0.05491558 0.05491558 0.05491558
 0.05491558 0.05491558 0.05491558 0.05491558 0.05491558 0.06725757
 0.06725757 0.06725757]
Epoch : 3  Loss : 0.18340174853801727
[0.         0.03883118 0.03883118 0.03883118 0.03883118 0.03883118
 0.05491558 0.05491558 0.05491558 0.05491558 0.05491558 0.05491558
 0.05491558 0.05491558 0.05491558 0.05491558 0.05491558 0.06725757
 0.06725757 0.06725757]
Epoch : 4  Loss : 0.43556299805641174
[0.         0.03883118 0.03883118 0.03883118 0.03883118 0.03883118
 0.05491558 0.05491558 0.05491558 0.05491558 0.05491558 0.05491558
 0.05491558 0.05491558 0.05491558 0.05491558 0.05491558 0.06725757
 0.06725757 0.06725757]
Epoch : 5  Loss : 0.32486146688461304
[0.         0.03883118 0.03883118 0.03883118 0.03883118 0.03883118
 0.05491558 0.05491558 0.05491558 0.05491558 0.05491558 0.05491558
 0.05491558 0.05491558 0.05491558 0.054

In [None]:
population_size = 3
population = []
for p in range(population_size):
    population.append(Creature().to(device))
    
    
p_fitness,behavior_samples = measure_population_fitness(population,max_steps = 500, n_behavior_samples = 2)


In [None]:
print(behavior_samples[0])
print(behavior_samples[1])
model = Creature().to(device)

In [None]:
print(population[0](behavior_samples[0][0]))

In [None]:
out = np.zeros([1,4])
w = np.zeros([4,5])

print(np.matmul(out,w))