In [1]:
import torch
import torch.nn as nn
import gym
import numpy as np
import random
import torch.nn.functional as F
from ops import *
from models import *
%matplotlib inline
import matplotlib.pyplot as plt
import random

envs = ['CartPole-v1','Acrobot-v1','MountainCar-v0','Pendulum-v0','BipedalWalker-v2','LunarLander-v2']
env = gym.make(envs[-2]).unwrapped
discrete_actions = False

if discrete_actions:
    creature_out_size = env.action_space.n
else:
    creature_out_size = env.action_space.shape[0]
    
import gc


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

input_num = (len(get_params(Creature(env.observation_space.shape[0],creature_out_size)))*2)
output_num = len(get_params(Creature(env.observation_space.shape[0],creature_out_size)))

mem_length = 10
num_mems = 5

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv1d(1, 16, 5, stride=1, padding=0),  
            nn.BatchNorm1d(16),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(16, 8, 5, stride=1, padding=0),  
            nn.BatchNorm1d(8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.MaxPool1d(2, stride=1))
        
        self.layer2 = nn.Sequential(
            nn.Conv1d(8, 32, 5, stride=1,padding=0),  
            nn.BatchNorm1d(32),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(32, 16, 5, stride=1,padding=0),  
            nn.BatchNorm1d(16),
            nn.LeakyReLU(0.2, inplace=True),
            nn.MaxPool1d(2, stride=1))
        
        self.layer3 = nn.Sequential(
            nn.Conv1d(16, 32, 5, stride=1,padding=0),  
            nn.BatchNorm1d(32),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(32, 16, 5, stride=1,padding=0),  
            nn.BatchNorm1d(16),
            nn.LeakyReLU(0.2, inplace=True),
            nn.MaxPool1d(2, stride=1))
        
        self.layer4 = nn.Linear(16*977, 128)
        self.layer5 = nn.Linear(128, mem_length)
        self.layer6 = nn.Sequential(       
            nn.Linear(mem_length, 1))#,
            #nn.Sigmoid())
    def forward(self, out,r):
        #out = torch.cat([out,r],-1)
        out = out.unsqueeze(1)
        
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0),out.size(1)*out.size(2))
        
        out = self.layer4(out)
        out = self.layer5(out)
        #out = out * r
        out = self.layer6(out)
        return out

all_a = 0

In [3]:
def evolve(population,out_size,use_gen,p_fitness,mutation_rate,mutation_scale):
    #Chose creatures based on fitness
    pick_probabilities = get_pick_probabilities(p_fitness)
    choice = np.random.choice(pick_probabilities.size,out_size+1, p = pick_probabilities)
    
    #mate and mutate creatures
    new_population = []
    for p in range(out_size):
        first_choice = population[choice[p]]
        #no incest
        second_choice = choice[np.where(choice!=choice[p])]
        if second_choice.size >len(population)/2:#!=0:
            second_choice = second_choice[random.randint(0,second_choice.size-1)]
            second_choice = population[second_choice]
        else:
            second_choice = population[choice[p+1]]
        
        par_fit = np.max([p_fitness[choice[p]],p_fitness[choice[p+1]]])
        
        child = mate(env,creature_out_size,all_a,device,first_choice,second_choice,
                     mutation_rate[choice[p]],mutation_rate[choice[p+1]],use_gen,mutation_scale).to(device)
        
        new_population.append(child)
        
    
    return new_population

In [4]:

def train_gan(population,p_fitness,batch_size = 20,n_epochs = 100):
    p_fitness = torch.from_numpy(p_fitness).type("torch.FloatTensor").to(device)
    
    gen_loss_all = []
    dis_loss_all = []
    rec_loss_all = []
    for e in range(n_epochs):
        #shuffle arrays in unison
        #ind = np.arange(len(population))
        #np.random.shuffle(ind)
        #population = np.array(population)[ind]
        #p_fitness = p_fitness[ind]
        for i in range(len(population)//batch_size):

            gen_optimizer.zero_grad()
            dis_optimizer.zero_grad()
            read_optimizer.zero_grad()
            
            real_batch = []
            #turn population into vectors
            for b in range(batch_size):
                real_batch.append(get_params(population[(i*batch_size)+b]).unsqueeze(0))
            real_batch = torch.cat(real_batch, dim=0).to(device)
            
            real_read = read_head(real_batch)
            real_read = read(memory.unsqueeze(0),real_read,device).squeeze(0)
            
            #train discriminator on population
            dis_out_r = dis(real_batch,real_read.squeeze(1)).squeeze(-1)
            rank = p_fitness[i*batch_size:(i*batch_size)+batch_size]
            dis_error_real = (nn.MSELoss()(dis_out_r,rank)) * 5
            dis_error_real.backward()

            #generate children from population
            child = gen_children(population,device,gen,batch_size,a = all_a)
            fake_read = read_head(child)
            fake_read = read(memory.unsqueeze(0),fake_read,device).squeeze(0)
            dis_out_f = dis(child,fake_read.squeeze(1)).squeeze(-1)
            
            #train discriminator on generator output
            #if torch.max(dis_out_f)>torch.min(p_fitness):
            dis_error_fake = torch.mean(dis_out_f)                 
            dis_error_fake.backward(retain_graph=True)
            dis_optimizer.step() 
            read_optimizer.step() 
            
            #train generator
            gen_error = -torch.mean(dis_out_f)
            total_gen_error = gen_error 
            total_gen_error.backward()
            gen_optimizer.step()
            
            
        #keep losses to draw graph    
        gen_loss_all.append(gen_error)
        dis_loss_all.append(dis_error_fake)
        rec_loss_all.append(dis_error_real)    
        #if e %  5 == 0:    
        #print("Discriminator loss real : {}".format(dis_error_real))
        #print("Discriminator loss generated : {}".format(dis_error_fake))
        #print("Generator loss : {}".format(gen_error))
        #print("Child error : {}".format(child_error*0.1))
        #print("")
    return gen_loss_all, dis_loss_all, rec_loss_all

def train_write(population,p_fitness,batch_size,n_epochs = 25):
    temp_mem = memory.repeat(batch_size,1,1)
    pf = torch.from_numpy(p_fitness).type("torch.FloatTensor").to(device)
    
    total_write_loss = []
    for e in range(n_epochs):
        write_optimizer.zero_grad()

        #turn population into vectors
        real_batch = []
        for b in range(batch_size):
            real_batch.append(get_params(population[b]).unsqueeze(0))
        real_batch = torch.cat(real_batch, dim=0).to(device)
        
        w,e,a = write_head(real_batch,memory.contiguous().view(memory.numel()))
        
        mem = write(temp_mem,w,e,a,device)
        dis_out = []
        for idx,m in enumerate(mem):
            real_read = read_head(real_batch[idx]).unsqueeze(0)
            real_read = read(m.unsqueeze(0),real_read,device).squeeze(0)
            d = dis(real_batch[idx].unsqueeze(0),real_read).squeeze(-1)
            dis_out.append(d)
        dis_out = torch.stack(dis_out).squeeze(1)
       
        write_loss = nn.MSELoss()(dis_out,pf)
        #write_loss = torch.mean(dis_out)
        write_loss.backward()
        write_optimizer.step()
    
        total_write_loss.append(write_loss)
    
    return total_write_loss

def write_to_memory(memory,batch_size):
    #write to memory
    real_batch = []
    for b in range(batch_size):
        real_batch.append(get_params(population[b]).unsqueeze(0))
    real_batch = torch.cat(real_batch, dim=0).to(device)
    w,e,a = write_head(real_batch,memory.contiguous().view(memory.numel()))
    yeet = memory.unsqueeze(0)
    for b in range(batch_size):
        yeet = write(yeet,w[b].unsqueeze(0),e[b].unsqueeze(0),a[b].unsqueeze(0),device)#.squeeze(0)
    return  yeet.squeeze(0).detach()

In [None]:
#randomly inititialise starting population
population_size = 2
max_population = 20

batch_size = population_size
out_size = population_size
population = []

for p in range(population_size):
    population.append(Creature(env.observation_space.shape[0],creature_out_size).to(device))

gen = Generator(input_num,output_num,device).to(device)
dis = Discriminator().to(device)

read_head = ReadHead(output_num,num_mems).to(device)
write_head = WriteHead(output_num,num_mems,mem_length).to(device)
read_optimizer = torch.optim.Adam(read_head.parameters(), lr=0.001,betas=(0.9,0.999))
write_optimizer = torch.optim.Adam(write_head.parameters(), lr=0.001,betas=(0.9,0.999))


lr = 0.0001
epsilon = 0.000001
print("starting training")
print(len(get_params(Creature(env.observation_space.shape[0],creature_out_size))))
n_generations = 300000

memory = torch.ones([num_mems,mem_length],requires_grad=False).to(device)

for i in range(n_generations):
    #reset learning rate decay after every generation
    gen_optimizer = torch.optim.Adam(gen.parameters(), lr=lr,betas=(0.9,0.999))
    dis_optimizer = torch.optim.Adam(dis.parameters(), lr=lr,betas=(0.9,0.999))
    
    #calculate population fitness
    p_fitness_ = measure_population_fitness(population,env,device,discrete_actions,min_reward=-100,
                                                             max_steps = 1000)
    
    
    print("population fitness : {}".format(p_fitness_))
    print("mean fit : {}".format(np.mean(p_fitness_)))
    
    #normalise population fitness
    centered = (p_fitness_-np.min(p_fitness_))**2
    p_fitness = ((centered - np.mean(centered))/np.sqrt(np.var(centered)+epsilon))
    
    write_loss = train_write(population,p_fitness,batch_size = batch_size,n_epochs = 50)
    #write_loss = 0
    memory = write_to_memory(memory,batch_size)
    
    #Train GAN
    gen_loss, dis_loss,rec_loss = train_gan(population,p_fitness,
              batch_size =batch_size,n_epochs = 50)
    
    
        
    
    #Every ten generations show progress
    if i %10 == 0 and i != 0:
        fitness = measure_fitness(population[np.argmax(p_fitness)],env,device,discrete_actions,min_reward=-100,
                                  render = True,max_steps = 500)
    
    #Scale of normal distribution used for mutation
    mutation_scale = 0.4
    
    #Calculate rate at which weights are mutated based on relative fitness
    centered = (-p_fitness_-np.min(-p_fitness_))
    mutation_rate = (centered/np.median(centered))*0.2
    mutation_rate = np.clip(mutation_rate,0,1)
    print("Mutation rate : {}".format(mutation_rate))
    
    #progressively grow population at start
    if out_size < max_population:
        out_size+=2
        batch_size = out_size
    
    #mate and mutate population
    population = evolve(population,out_size,gen,p_fitness,
                                        mutation_rate,mutation_scale)

    
    plt.plot(gen_loss,label='gen')
    plt.plot(dis_loss,label='dis_fake')
    plt.plot(rec_loss,label='dis_real')
    plt.legend()
    plt.show()
    plt.plot(write_loss,label='write')
    plt.legend()
    plt.show()
    print(memory)
    print("Generation {}  fitness : {}".format(i+1,np.max(p_fitness)))
    print("#################################")


In [None]:
,input_sizeenv = gym.make(envs[-2]).unwrapped

p_fitness = measure_population_fitness(population,env,device,discrete_actions,min_reward=-100,
                                                             max_steps = 200)

fitness = measure_fitness(population[np.argmax(p_fitness)],env,device,discrete_actions,min_reward=-100,
                                  render = True,max_steps = 5000000)

In [None]:
X = torch.randn(5).unsqueeze(0)
print(X)
X = X.repeat(10,1)
print(X.shape)