In [1]:
import torch
import torch.nn as nn
import gym
from itertools import count
import numpy as np
import random
import torch.nn.functional as F

envs = ['CartPole-v1','Acrobot-v1','MountainCar-v0','Pendulum-v0','BipedalWalker-v2']
env = gym.make(envs[0]).unwrapped

discrete_actions = True
#TODO
#parralel fitness measuring
from helper import *

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
class Creature(nn.Module):
    def __init__(self):
        super(Creature, self).__init__()
    
        self.layer1 = nn.Linear(env.observation_space.shape[0], 6)
        self.layer2 = nn.Linear(6, 6)
        
        if discrete_actions:
            self.layer3 = nn.Linear(6, env.action_space.n)
        else:
            self.layer3 = nn.Linear(6, env.action_space.shape[0])
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        return out

class Generator(nn.Module):
    def __init__(self,output_num):
        super(Generator, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv1d(1, 16, 5, stride=1, padding=0),  
            nn.BatchNorm1d(16),
            nn.ReLU(True),
            nn.Conv1d(16, 8, 5, stride=1, padding=0),  
            nn.BatchNorm1d(8),
            nn.ReLU(True),
            nn.MaxPool1d(2, stride=2))
        
        self.layer2 = nn.Sequential(
            nn.Conv1d(8, 32, 5, stride=1,padding=0),  
            nn.BatchNorm1d(32),
            nn.ReLU(True),
            nn.Conv1d(32, 16, 5, stride=1,padding=0),  
            nn.BatchNorm1d(16),
            nn.ReLU(True),
            nn.MaxPool1d(2, stride=2))
        
        self.layer3 = nn.Sequential(
            nn.Conv1d(16, 32, 5, stride=1,padding=0),  
            nn.BatchNorm1d(32),
            nn.ReLU(True),
            nn.Conv1d(32, 16, 5, stride=1,padding=0),  
            nn.BatchNorm1d(16),
            nn.ReLU(True),
            nn.MaxPool1d(2, stride=2))
        
        self.layer4 =  nn.Linear(16*14, output_num)
    def forward(self, out):
        out = out.unsqueeze(1)
        
        out = self.layer1(out)
        out = self.layer2(out)
        
        out = self.layer3(out)
        out = out.view(out.size(0),out.size(1)*out.size(2))
        
        out = self.layer4(out)
        
        return out

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv1d(1, 16, 5, stride=1, padding=0),  
            nn.BatchNorm1d(16),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(16, 8, 5, stride=1, padding=0),  
            nn.BatchNorm1d(8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.MaxPool1d(2, stride=1))
        
        self.layer2 = nn.Sequential(
            nn.Conv1d(8, 32, 5, stride=1,padding=0),  
            nn.BatchNorm1d(32),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(32, 16, 5, stride=1,padding=0),  
            nn.BatchNorm1d(16),
            nn.LeakyReLU(0.2, inplace=True),
            nn.MaxPool1d(2, stride=1))
        
        self.layer3 = nn.Sequential(
            nn.Conv1d(16, 32, 5, stride=1,padding=0),  
            nn.BatchNorm1d(32),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(32, 16, 5, stride=1,padding=0),  
            nn.BatchNorm1d(16),
            nn.LeakyReLU(0.2, inplace=True),
            nn.MaxPool1d(2, stride=1))
        
        self.layer4 = nn.Linear(16*59, 128)
        self.layer5 = nn.Sequential(       
            nn.Linear(128, 1),
            nn.Sigmoid())
    def forward(self, out):
        out = out.unsqueeze(1)
        
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0),out.size(1)*out.size(2))
        
        out = self.layer4(out)
        out = self.layer5(out)
        return out
#gen = Generator(86).to(device)

#gen(torch.zeros([10,86*2]).to(device)).shape

#dis = Discriminator().to(device)
#dis(torch.zeros([10,86]).to(device)).shape    

In [3]:
def mate(m,d,gen,apply_mutation = True,dominance = 0.5,mutation_rate=0.2):
    dom = torch.from_numpy(np.array(dominance)).to(device).unsqueeze(-1).type("torch.cuda.FloatTensor")
    child = Creature()
    mom = (m)
    dad = (d)
    #if apply_mutation:
    #    mom = mutate(mom,mutation_rate=0.1)
    #    dad = mutate(dad,mutation_rate=0.1)
    mom = get_params(mom)
    dad = get_params(dad)
    generated = gen(torch.cat([dad,mom,dom]).unsqueeze(0)).squeeze(0)
    child = set_params(child,generated)
    #if apply_mutation:
    #    child = mutate(child,mutation_rate=0.1)
    return child

def mutate(creature,mutation_rate=0.2):
    new = Creature().to(device)
    new.load_state_dict(creature.state_dict()) 
    for p in new.parameters():

        mutation = np.random.normal(scale = 0.07,size = p.data.shape)
        mutation *= np.random.choice([1, 0], p.data.shape,p=[mutation_rate,1-mutation_rate])
        mutation = torch.from_numpy(mutation).type('torch.FloatTensor').to(device)
        p.data += mutation
    return new

def evolve(population,gen,pf_fitness,mutate):
    p_fitness_positive = p_fitness - np.min(p_fitness) + 1
    pick_probabilities = get_pick_probabilities(pf_fitness)
    
    
    choice = np.random.choice(pick_probabilities.size,population_size, p = pick_probabilities)
    new_population = []
    
    for p in range(len(population)-1):
        first_choice = population[choice[p]]
        second_choice = population[choice[p+1]]
        #more succesful(healthier?) creature has greater genetic dominance
        f1 = p_fitness_positive[p]
        f2 = p_fitness_positive[p+1]
        if  f1>=f2 :
            dominance = (f2/f1) * 0.5
            np.clip(dominance,0.3,0.7)
            child = mate(second_choice,first_choice,gen, mutate,dominance).to(device)
        else:
            dominance = (f1/f2) * 0.5
            np.clip(dominance,0.3,0.7)
            child = mate(first_choice,second_choice,gen, mutate,dominance).to(device)
            
        new_population.append(child)
        
    child = mate(population[0],population[len(population)-1],gen, mutate).to(device) 
    new_population.append(child)
    
    return new_population



In [4]:



def train_gan(population,p_fitness,old_max,batch_size = 20,n_epochs = 1000):
    #mean = (old_mean+np.mean(p_fitness))/2
    #min_fit = np.mean(p_fitness)
    min_fit = np.sort(p_fitness)[int(p_fitness.size*0.75)]
    ranking = (p_fitness>=min_fit)*1
    #ranking = p_fitness - np.min(p_fitness)
    
    #max_fit = np.max(ranking)
        
    #ranking = ranking / max_fit
    #ranking = (ranking>0.5)*ranking
    print(ranking)
    ranking = torch.from_numpy(ranking).to(device).type("torch.cuda.FloatTensor")
    
    #print(ranking)
   # print(" ")
    for e in range(n_epochs):
        
        
        for i in range(len(population)//batch_size):

            gen_optimizer.zero_grad()
            dis_optimizer.zero_grad()
            
            real_batch = []
            for b in range(batch_size):
                real_batch.append(get_params(population[(i*batch_size)+b]).unsqueeze(0))
            real_batch = torch.cat(real_batch, dim=0).to(device)

            if e % 4 == 0:
                #train discriminator on population
                dis_out = dis(real_batch).squeeze(-1)
                #stack = [ranking[i*batch_size:(i*batch_size)+batch_size],torch.ones(batch_size).to(device)]
                #stack = torch.stack(stack)
                dis_error_real = nn.BCELoss()(dis_out, ranking[i*batch_size:(i*batch_size)+batch_size])#torch.ones(batch_size).to(device))
                #dis_error_real = nn.BCELoss()(dis_out,stack)
                
                dis_error_real.backward()
                #print("Discriminator loss real : {}".format(dis_error_real))
        
            #train discriminator on generator output
            mom = []
            dad = []
            child = []
            dominance = torch.from_numpy((np.random.rand(batch_size)*0.5) + 0.25).to(device).unsqueeze(-1)
            dominance = dominance.type("torch.cuda.FloatTensor")
            for b in range(batch_size):
                m = get_params(random.choice(population))
                d = get_params(random.choice(population))
                c_data = torch.cat([m,d,dominance[b]]).unsqueeze(0)
                #c_data = torch.cat([c_data,dominance[b]]).unsqueeze(0)
                c = gen(c_data).squeeze(0)

                mom.append(m)
                dad.append(d)
                child.append(c)
                
            mom = torch.stack(mom).to(device)
            dad = torch.stack(dad).to(device)
            child = torch.stack(child).to(device)
            dis_out = dis(child).squeeze(-1)
            
            if e % 2 == 0:
                dis_error_fake = nn.BCELoss()(dis_out,torch.zeros(dis_out.shape).to(device)) 
                dis_error_fake.backward(retain_graph=True)
                #print("Discriminator loss generated : {}".format(dis_error_fake))
                
            
            #train generator
            mom_loss = torch.pow(torch.sub(child,mom),2) * (dominance)
            dad_loss = torch.pow(torch.sub(child,dad),2) * (1-dominance)
            
            mom_loss = torch.mean(mom_loss)
            dad_loss = torch.mean(dad_loss)
            if mom_loss > dad_loss:
                child_error = torch.div(mom_loss,dad_loss)-1
            else:
                child_error = torch.div(dad_loss,mom_loss)-1
            
            
            child_error += (mom_loss + dad_loss)
            
            gen_error = nn.BCELoss()(dis_out,torch.ones(dis_out.shape).to(device)) + (child_error*0.1)  
            gen_error.backward()
            gen_optimizer.step()
            
            dis_optimizer.step()
            
            
        
            #print("Generator loss : {}".format(gen_error))
            #print("Child error : {}".format(child_error))
            #print("")
    

In [None]:
#randomly inititialise starting population
population_size = 50
population = []
for p in range(population_size):
    population.append(Creature().to(device))
    
gen = Generator(86).to(device)
#gen(torch.zeros([10,86*2])).shape

dis = Discriminator().to(device)
#dis(torch.zeros([10,86])).shape

lr = 0.0001
gen_optimizer = torch.optim.Adam(gen.parameters(), lr=lr)
dis_optimizer = torch.optim.Adam(dis.parameters(), lr=lr)

print("starting training")
n_generations = 100
batch_size = 50
for i in range(n_generations):
    #gen = Generator(86).to(device)
    #dis = Discriminator().to(device)
    gen_optimizer = torch.optim.Adam(gen.parameters(), lr=lr)
    dis_optimizer = torch.optim.Adam(dis.parameters(), lr=lr)
    
    
    
    n_behavior_samples = 50
    p_fitness, behavior_samples = measure_population_fitness(population,env,device,discrete_actions,
                                                             max_steps = 500, 
                                                             n_behavior_samples=n_behavior_samples)
    print(p_fitness)
    if i == 0:
        old_max = np.max(p_fitness)
    #print(p_fitness)
    #print((p_fitness>np.mean(p_fitness))*1)
    
    if i % 1 == 0:
        train_gan(population,p_fitness,old_max,batch_size = batch_size,n_epochs = 50)
        print("train")
    old_max = np.max(p_fitness)
    
    
    if i % 1 == 0:
        fitness = measure_fitness(population[np.argmax(p_fitness)],env,device,discrete_actions,render = True)
    population = evolve(population,gen,p_fitness,True)
    print("Generation {}  fitness : {}".format(i+1,np.max(p_fitness)))
    print("#################################")


starting training
[  9.   8.  17.  10.  10.   9.  21.  13.  83.   9.  10.   8.   9.  10.
   9.   8.  47.   9.   8.  10.  10.   9.  36.   8.  10.   8.  10.  10.
  13.   9.  27.  10.   8.   9.  35.   9.  10.  10.  31. 201.  11.  10.
  10.  10.   9.   9.  16.  10.  10.  10.]
[0 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 1 0 0
 0 1 1 1 0 0 0 0 0 1 0 0 0]


In [None]:
yeet  = np.arange(10) + 5
print(yeet)
yeet = np.delete(yeet,np.where(yeet<7)[0])
print(yeet)