In [1]:
import torch
import torch.nn as nn
import gym
from itertools import count
import numpy as np
import random
import torch.nn.functional as F

envs = ['CartPole-v1','Acrobot-v1','MountainCar-v0','Pendulum-v0','BipedalWalker-v2']
env = gym.make(envs[0]).unwrapped

discrete_actions = True
#TODO
#parralel fitness measuring
from helper import *

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [2]:



device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
class Creature(nn.Module):
    def __init__(self):
        super(Creature, self).__init__()
    
        self.layer1 = nn.Linear(env.observation_space.shape[0], 6)
        self.layer2 = nn.Linear(6, 6)
        
        if discrete_actions:
            self.layer3 = nn.Linear(6, env.action_space.n)
        else:
            self.layer3 = nn.Linear(6, env.action_space.shape[0])
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        return out

class Generator(nn.Module):
    def __init__(self,output_num):
        super(Generator, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv1d(1, 16, 5, stride=1, padding=0),  
            nn.BatchNorm1d(16),
            nn.ReLU(True),
            nn.Conv1d(16, 8, 5, stride=2, padding=0),  
            nn.BatchNorm1d(8),
            nn.ReLU(True),
            nn.MaxPool1d(2, stride=2))
        
        self.layer2 = nn.Sequential(
            nn.Conv1d(8, 32, 5, stride=1,padding=0),  
            nn.BatchNorm1d(32),
            nn.ReLU(True),
            nn.Conv1d(32, 16, 5, stride=2,padding=0),  
            nn.BatchNorm1d(16),
            nn.ReLU(True),
            nn.MaxPool1d(2, stride=2))
                
        self.layer3 =  nn.Linear(16*8, output_num)
    def forward(self, out):
        out = out.unsqueeze(1)
        
        out = self.layer1(out)
        out = self.layer2(out)
        out = out.view(out.size(0),out.size(1)*out.size(2))
        
        out = self.layer3(out)
        
        return out

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv1d(1, 16, 5, stride=1, padding=0),  
            nn.BatchNorm1d(16),
            nn.ReLU(True),
            nn.Conv1d(16, 8, 5, stride=1, padding=0),  
            nn.BatchNorm1d(8),
            nn.ReLU(True),
            nn.MaxPool1d(2, stride=2))
        
        self.layer2 = nn.Sequential(
            nn.Conv1d(8, 32, 5, stride=1,padding=0),  
            nn.BatchNorm1d(32),
            nn.ReLU(True),
            nn.Conv1d(32, 16, 5, stride=1,padding=0),  
            nn.BatchNorm1d(16),
            nn.ReLU(True),
            nn.MaxPool1d(2, stride=2))
                
        self.layer3 =  nn.Linear(16*15, 1)
    def forward(self, out):
        out = out.unsqueeze(1)
        
        out = self.layer1(out)
        out = self.layer2(out)
        out = out.view(out.size(0),out.size(1)*out.size(2))
        
        out = F.sigmoid(self.layer3(out))
        return out
    

In [3]:



def mate(m,d,gen,apply_mutation = True,dominance = 0.5,mutation_rate=0.2):
    child = Creature()
    mom = get_params(m)
    dad = get_params(d)
    if apply_mutation:
        mom = mutate(mom,mutation_rate=0.2)
        dad = mutate(dad,mutation_rate=0.2)
    child = set_params(child,gen(torch.cat([dad,mom]).unsqueeze(0)).squeeze(0))
    #if apply_mutation:
     #   child = mutate(child,mutation_rate=0.2)
    return child

def evolve(population,gen,pf_fitness,mutate):
    p_fitness_positive = p_fitness - np.min(p_fitness) + 1
    pick_probabilities = get_pick_probabilities(pf_fitness)
    
    
    choice = np.random.choice(pick_probabilities.size,population_size, p = pick_probabilities)
    print(choice)
    #print(np.sort(choice))
    new_population = []
    
    for p in range(len(population)-1):
        first_choice = population[choice[p]]
        second_choice = population[choice[p+1]]
        #more succesful(healthier?) creature has greater genetic dominance
        
        if p_fitness_positive[p] >= p_fitness_positive[p+1]:
            dominance = (p_fitness_positive[p+1]/p_fitness_positive[p])*0.7
            child = mate(first_choice,second_choice,gen, mutate,dominance).to(device)
        else:
            dominance = (p_fitness_positive[p]/p_fitness_positive[p+1])*0.7
            child = mate(second_choice,first_choice,gen, mutate,dominance).to(device)
            
        new_population.append(child)
    child = mate(population[0],population[len(population)-1],gen, mutate).to(device) 
    new_population.append(child)
    
    return new_population



In [4]:

gen = Generator(86).to(device)
#gen(torch.zeros([10,86*2])).shape

dis = Discriminator().to(device)
#dis(torch.zeros([10,86])).shape

lr = 0.00001
gen_optimizer = torch.optim.Adam(gen.parameters(), lr=lr)
dis_optimizer = torch.optim.Adam(dis.parameters(), lr=lr)

def train_gan(population,batch_size = 20,n_epochs = 1000):
    for e in range(n_epochs):
        np.random.shuffle(population)
        for i in range(len(population)//batch_size):

            gen_optimizer.zero_grad()
            dis_optimizer.zero_grad()
            
            real_batch = []
            for b in range(batch_size):
                real_batch.append(get_params(population[(i*batch_size)+b]).unsqueeze(0))
            real_batch = torch.cat(real_batch, dim=0).to(device)

            if i % 5 == 0:
                #train discriminator on population
                dis_out = dis(real_batch)
                dis_error_real = nn.BCELoss()(dis_out,torch.ones(batch_size).to(device))
                dis_error_real.backward()
                #print("Discriminator loss real : {}".format(dis_error_real))

            #train discriminator on generator output
            mom = []
            dad = []
            child = []
            for b in range(batch_size):
                m = get_params(random.choice(population))
                d = get_params(random.choice(population))
                c = gen(torch.cat([m,d]).unsqueeze(0)).squeeze(0)

                mom.append(m)
                dad.append(d)
                child.append(c)
            mom = torch.stack(mom).to(device)
            dad = torch.stack(dad).to(device)
            child = torch.stack(child).to(device)
            dis_out = dis(child)
            
            
            if i % 2 == 0:
                dis_error_fake = nn.BCELoss()(dis_out,torch.zeros(batch_size).to(device)) 
                dis_error_fake.backward(retain_graph=True)
                #print("Discriminator loss generated : {}".format(dis_error_fake))
                
            
            #train generator
            mom_loss = nn.MSELoss()(child,mom)
            dad_loss = nn.MSELoss()(child,dad)
            if mom_loss > dad_loss:
                child_error = (mom_loss/dad_loss)-1
            else:
                child_error = (dad_loss/mom_loss)-1
            
            gen_error = nn.BCELoss()(dis_out,torch.ones(batch_size).to(device)) + (child_error)
            gen_error.backward()

            dis_optimizer.step()
            gen_optimizer.step()
            
        
            #print("Generator loss : {}".format(gen_error))
            #print("Child error : {}".format(child_error))
            #print("")

In [None]:
#randomly inititialise starting population
population_size = 20
population = []
for p in range(population_size):
    population.append(Creature().to(device))
    
    
print("starting training")
n_generations = 100
batch_size = 5
    
for i in range(n_generations):
    train_gan(population,n_epochs = 200)
    
    n_behavior_samples = 50
    p_fitness, behavior_samples = measure_population_fitness(population,env,device,discrete_actions, max_steps = 500, 
                                                             n_behavior_samples=n_behavior_samples)
    

    
    population = evolve(population,gen,p_fitness,False)
    
    if i % 1 == 0:
        fitness = measure_fitness(population[np.argmax(p_fitness)],env,device,discrete_actions,render = True)
    print("Generation {}  fitness : {}".format(i+1,np.max(p_fitness)))
    


starting training


  "Please ensure they have the same size.".format(target.size(), input.size()))


Generation 1  fitness : 14.0
Generation 2  fitness : 233.0
Generation 3  fitness : 11.0
