In [1]:
import torch
import torch.nn as nn
import gym
from itertools import count
import numpy as np
import random
import torch.nn.functional as F

envs = ['CartPole-v1','Acrobot-v1','MountainCar-v0','Pendulum-v0']
env = gym.make(envs[0]).unwrapped
env2 = gym.make(envs[0]).unwrapped

discrete_actions = True

#TODO
#parralel fitness measuring


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [2]:


device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
class Creature(nn.Module):
    def __init__(self, hidden_num = 4):
        super(Creature, self).__init__()
    
        self.layer1 = nn.Linear(env.observation_space.shape[0], hidden_num)
        self.layer2 = nn.Linear(hidden_num, hidden_num)
        
        if discrete_actions:
            self.layer3 = nn.Linear(hidden_num, env.action_space.n)
        else:
            self.layer3 = nn.Linear(hidden_num, env.action_space.shape[0])
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        return out



class Encoder(nn.Module):
    def __init__(self, input_num, hidden_num):
        super(Encoder, self).__init__()
    
        self.layer1 = nn.Linear(input_num, 25)
        self.layer2 = nn.Linear(25, 15)
        self.layer3 = nn.Linear(15, hidden_num)
        
    def forward(self, x):
        out = F.relu(self.layer1(x))
        out = F.relu(self.layer2(out))
        out = self.layer3(out)
        return out

class Decoder(nn.Module):
    def __init__(self, input_num, hidden_num):
        super(Decoder, self).__init__()
    
        self.layer1 = nn.Linear(hidden_num, input_num)
        #self.layer2 = nn.Linear(15, 25)
        #self.layer3 = nn.Linear(25, input_num)
        
    def forward(self, x):
        #out = F.relu(self.layer1(x))
        #out = F.relu(self.layer2(out))
        out = self.layer1(x)
        return out


In [3]:
#get model parameters as vector
def get_params(model):
    params = []
    for p in model.parameters():
        view = p.view(p.numel())
        params.append(view)
    params = torch.cat(params, dim=0)
    return params

#turn vector into model parameters
def set_params(model,data):
    idx = 0
    for p in model.parameters():
        view = data[idx:idx+p.numel()].view(p.shape)
        p.data = view
        idx+=p.numel()
    return model



#initialise autoencoder
input_num = get_params(Creature()).numel()
hidden_num = 10

lr = 0.001
enc = Encoder(input_num,hidden_num).to(device)  
enc_optimizer = torch.optim.Adam(enc.parameters(), lr=lr)

dec = Decoder(input_num,hidden_num).to(device)  
dec_optimizer = torch.optim.Adam(dec.parameters(), lr=lr)

#def pick_by_fitness(population,p_fitness):
    

def train_autoencoder(population,p_fitness,batch_size = 5,n_epochs = 5):
    for e in range(n_epochs):
        for i in range(len(population)//batch_size):
            enc_optimizer.zero_grad()
            dec_optimizer.zero_grad()
            
            batch = []
            for b in range(batch_size):
                batch.append(get_params(population[(i*batch_size)+b]).unsqueeze(0))
                
            data = torch.cat(batch, dim=0).to(device)
            
            enc_out = enc(data)
            dec_out = dec(enc_out)
            
            loss =  nn.MSELoss()(dec_out, data)
            
            loss.backward()
            enc_optimizer.step()
            dec_optimizer.step()
            #print(get_params(enc))
            
            print(loss/batch_size)
                       
#train_autoencoder(population,batch_size = 5,n_epochs = 15) 

In [4]:
def measure_fitness(creature,render = False,max_steps = 1000):
    observation = env.reset()
    #creature fitness is cumulative reward in simulation
    total_reward = 0
    for i in range(max_steps):
        if render:
            
            env.render()
            
        #convert observation into tensor
        obs = torch.from_numpy(observation).to(device).type('torch.cuda.FloatTensor')
        
        #get action
        if discrete_actions:
            action = creature(obs).max(-1)[1].item()
        else:
            action = creature(obs).detach().cpu().numpy()
        observation, reward, done, _ = env.step(action)
        
        total_reward += reward
        
        if done:
            break
    return total_reward

#measure fitness of entire population and return scores
def measure_population_fitness(population,max_steps = 1000):
    scores = []
    for idx,p in enumerate(population):
       #print("measuring fitness : {}".format(idx))
        fitness = measure_fitness(p,max_steps = max_steps)
        scores.append(fitness)
    return np.array(scores)

In [5]:
def mutate(creature,mutation_rate=0.1):
    new = Creature().to(device)
    new.load_state_dict(creature.state_dict()) 
    for p in new.parameters():

        mutation = np.random.normal(scale = 0.07,size = p.data.shape)
        mutation *= np.random.choice([1, 0], p.data.shape,p=[mutation_rate,1-mutation_rate])
        mutation = torch.from_numpy(mutation).type('torch.FloatTensor').to(device)
        p.data += mutation
    return new


def mate(mom,dad,apply_mutation = True,dominance = 0.5,mutation_rate=0.2):
    child = Creature()
    
    enc_m1 = enc(get_params(mom))
    enc_m2 = enc(get_params(dad))
    

    r = np.random.choice([True, False], enc_m1.numel(),p=[dominance,1-dominance])
    mixed = np.zeros([enc_m1.numel()])
    mixed[r] = enc_m1.cpu().detach().numpy()[r]
    mixed[np.invert(r)] =  enc_m2.cpu().detach().numpy()[np.invert(r)]
    
    if apply_mutation:
        mutation = np.random.normal(scale = 0.07,size = mixed.shape)
        mutation *= np.random.choice([1, 0], mixed.shape,p=[mutation_rate,1-mutation_rate])
        mixed += mutation
    
    mixed = torch.from_numpy(mixed).to(device).type("torch.cuda.FloatTensor")
    
    decoded = dec(mixed)
    
    child = set_params(child,decoded)

    return child

#mom = Creature().to(device)
#dad = Creature().to(device)
#mate(mom,dad)

def evolve(population,pf_fitness,mutate = True):

    normed = p_fitness- np.mean(p_fitness)
    normed -= np.min(normed)
    normed = np.power(normed, 0.5)
    pick_probabilities = normed/np.sum(normed)
    
    
    choice = np.random.choice(pick_probabilities.size,population_size, p = pick_probabilities)
    
    #print(np.sort(choice))
    new_population = []
    
    for p in range(len(population)-1):
        child = mate(population[p],population[p+1], mutate).to(device)
        new_population.append(child)
    child = mate(population[0],population[len(population)-1]).to(device) 
    new_population.append(child)
    
    return new_population



In [6]:
#randomly inititialise starting population
population_size = 20
population = []
for p in range(population_size):
    population.append(Creature().to(device))
    
    
print("starting training")
n_generations = 100
batch_size = 5

for i in range(n_generations):
    #if i < 10:
    #    train_autoencoder(population,batch_size = batch_size,n_epochs = 100//(i+1))
    #else:
    p_fitness = measure_population_fitness(population,max_steps = 500)

    train_autoencoder(population,p_fitness,batch_size = batch_size,n_epochs = 15)
        
    population = evolve(population,p_fitness,True)
    if i % 1 == 0:
        fitness = measure_fitness(population[np.argmax(p_fitness)],render = True)
    print("Generation {}  fitness : {}".format(i+1,np.max(p_fitness)))
    
#population, p_fitness = evolve(population)
#measure_fitness(population[np.argmax(p_fitness)],render = True)

starting training
tensor(6.5368, device='cuda:0', grad_fn=<DivBackward0>)
tensor(6.1829, device='cuda:0', grad_fn=<DivBackward0>)
tensor(6.3407, device='cuda:0', grad_fn=<DivBackward0>)
tensor(6.0742, device='cuda:0', grad_fn=<DivBackward0>)
tensor(6.2185, device='cuda:0', grad_fn=<DivBackward0>)
tensor(5.8499, device='cuda:0', grad_fn=<DivBackward0>)
tensor(6.0586, device='cuda:0', grad_fn=<DivBackward0>)
tensor(5.8197, device='cuda:0', grad_fn=<DivBackward0>)
tensor(5.9715, device='cuda:0', grad_fn=<DivBackward0>)
tensor(5.5764, device='cuda:0', grad_fn=<DivBackward0>)
tensor(5.8144, device='cuda:0', grad_fn=<DivBackward0>)
tensor(5.5989, device='cuda:0', grad_fn=<DivBackward0>)
tensor(5.7650, device='cuda:0', grad_fn=<DivBackward0>)
tensor(5.3420, device='cuda:0', grad_fn=<DivBackward0>)
tensor(5.6011, device='cuda:0', grad_fn=<DivBackward0>)
tensor(5.4097, device='cuda:0', grad_fn=<DivBackward0>)
tensor(5.5884, device='cuda:0', grad_fn=<DivBackward0>)
tensor(5.1416, device='cuda:0'

tensor(0.0130, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0111, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0312, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0150, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0129, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0111, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0312, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0150, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0128, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0111, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0313, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0149, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0127, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0111, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0313, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0148, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0126, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0111, device='cuda:0', grad_fn=<DivBack

Generation 5  fitness : 11.0
tensor(0.0039, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0055, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0189, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0116, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0038, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0055, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0186, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0117, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0037, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0056, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0182, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0117, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0036, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0057, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0179, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0118, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0037, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0057, devi

tensor(0.0099, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0207, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0083, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0031, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0098, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0206, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0082, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0031, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0098, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0205, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0082, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0031, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0098, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0204, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0082, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0032, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0097, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0203, device='cuda:0', grad_fn=<DivBack

tensor(0.0065, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0080, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0252, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0145, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0063, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0080, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0248, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0144, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0062, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0081, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0243, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0143, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0061, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0081, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0237, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0143, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0061, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0081, device='cuda:0', grad_fn=<DivBack

Generation 13  fitness : 10.0
tensor(0.0114, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0210, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0394, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0107, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0113, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0209, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0390, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0106, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0114, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0207, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0385, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0104, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0116, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0205, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0379, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0103, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0118, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.0204, dev

KeyboardInterrupt: 