In [1]:
# genetic_openai.py 
# This is a genetic algorithm that tries to optimize the weights 
# of a simple neural network. The network chooses the action to  
# take based on the environment state. 
# A population of randomly initialized agents is introduced and  
# tested. A percentage of the agents with the top fitness score  
# is selected to be the parents of the next generation. The children 
# are crossovers of the parent networks - here called mutated - and 
# randomly changed parent network - here called randomized.

# Imports needed
import gym
import numpy as np

[2017-02-12 02:09:48,035] You have 'numpy' version 1.8.2 installed, but 'gym' requires at least 1.10.4. HINT: upgrade via 'pip install -U numpy'.


In [2]:
# Initializing the starting population
def init(input_size, output_size, layer_count, hidden_size, population_size):
    nets = [] # Randomly initialized nn's
    
    for i in range(population_size):
        net = []
        
        for j in range(layer_count+1):
            if j == 0:
                l0 = input_size
            else:
                l0 = hidden_size[j-1]
                
            if j >= layer_count:
                l1 = output_size
            else:
                l1 = hidden_size[j]
                
            w = 2*np.random.random((l0,l1))-1
            net.append(w)
        nets.append(net)
    return nets


In [3]:
# Run the provided network on the current state(inp) 
# if box return action index
# else return numpy float array 
def get_action(net,inp,box=False):
    
    # Nonlinear Sigmoid function 
    def nonlin(x):
        return 1 / (1 + np.exp(-x))
    
    o = inp
    for i in range(len(net)):
        w = net[i] # Network weights, shape=[layer_0 weights, layer_1 weights]
        o = np.dot(o,w) # Take the dotproduct of the input or output
                        # of the last layer to get the output of the
                        # new layer.
                
        # if box dont apply the sigmoid function on the last layer 
        # to get linear output
        if not box or i < len(net)-1:
            o = nonlin(o)
            
    if not box:
        return np.argmax(o) # numpy array
    return o # int index

# Compute the fitness of a provided network based on the environment.
# Stop after length frames
def train(net,length,env,box, render = False):
    observation = env.reset()
    done = False
    fit = 0
    i = 0
    while not done:
        if render:
            env.render()
        action = get_action(net,observation,box) # Feed the nn with the
                                                 # env state to get aciton
        observation, reward, done, info = env.step(action)
        fit += reward
        i += 1
        
        if i >= length:
            break
    return fit


In [4]:
# Clones a list of numpy arrays
def clone_net(net):
    return [x.copy() for x in net]

# Choose the parents of the next generation (nets are sorted based on fitness)
def choose_parents(nets, population_size = -1):
    if len(nets) == 0:
        return []
    
    # Size of whole population
    total_population_size = len(nets)
    
    # Amount of parents to choose
    if population_size == -1:
        population_size = len(nets)
        
    parents = []
    
    # Chooses more likley parents with the top fittess functions
    for i in range(population_size):
        r0 = np.power(np.random.uniform(), 2)
        r1 = np.power(np.random.uniform(), 2)
        parents.append((
            clone_net(nets[int(np.floor(r0*total_population_size))]), 
            clone_net(nets[int(np.floor(r1*total_population_size))])))
        
    return parents


In [5]:
# Crosses the parent pairs over with each other to get offspring
def mutate_neurons(parents):
    # Mutates two nets together, neuron by neuron not
    # weight by weight to retain the neurons functionality
    def _mutate(net0, net1):
        layer_count = len(net0)
        child = clone_net(net0)
        
        # For each layer in the network
        for l in range(layer_count):
            l0_len = net0[l].shape[0] # Neuron count in layer_0
            l1_len = net0[l].shape[1] # and layer_1
            
            # For each neuron
            for i in range(l1_len):
                # Choose the neuron randomly from net0 or net1
                if np.random.uniform() >= 0.5:
                    layer = net0[l]
                else: 
                    layer = net1[l]
                    
                # Add neuron to child
                for j in range(l0_len):
                    child[l][j,i] = layer[j,i]
        return child # Give birth ;)
    
    return [_mutate(x[0],x[1]) for x in parents]


In [6]:
# Randomizes the provided agent networks
def randomize(nets,percentage,max_change_rate):
    def _randomize(net,percentage,max_change_rate):
        
        for i in range(len(net)):
            w = net[i] # All weights in all layers
            rand = np.random.random(w.shape) # Random weight like array
            
            # Get range of indices that are not supposed to be changed
            y = np.where(rand > percentage) 
            
            rand = 2*rand-1 # Get rand between -1 and 1
            rand *= np.abs(rand) # Square it to make small weight changes more likely
            rand *= max_change_rate # Get it between the max change rate
            rand[y] = 0 # Set indices not supposed to change to 0
            w += rand # Add the random changes to the weights
    [_randomize(x,percentage,max_change_rate) for x in nets]
    

In [7]:
# Parameters
env_name = "CartPole-v0"
env = gym.make(env_name)

observation = env.reset()
obs_dim = len(observation) # Network input size

# Check of the environment is taking action indices
# or float activation. acion_len = Network output size
if type(env.action_space) == gym.spaces.discrete.Discrete:
    action_len = env.action_space.n
    box = False
else:
    action_len = env.action_space.shape[0]
    box = True


random_percentage = 0.2 # Percentage of weights to change
max_change_rate = 0.01 # Maximum random weight change rate
train_length = 500 # Frames after which each run is cutoff
population_size = 100 # Amount of agents per generation

# Percentage of parent crossovers in the next generation
parents_portion = 0.8 

print "Random-Percentage:",random_percentage
print "Maximum-Change-Rate:",max_change_rate
print "Max-Length-Of-Each-Run:",train_length
print "Population-Size:",population_size
print "Child-Percentage:",parents_portion


[2017-02-12 02:09:48,481] Making new env: CartPole-v0


Random-Percentage: 0.2
Maximum-Change-Rate: 0.01
Max-Length-Of-Each-Run: 500
Population-Size: 100
Child-Percentage: 0.8


In [8]:
# Let's get to the action
epochs = 20 # Runs 20 generations
hidden_size = [] # Hidden layer size (no hidden layers for CartPole-v0)

# Initializing the population
nets = init(
    obs_dim,
    action_len,
    len(hidden_size),
    hidden_size,
    population_size)

# Training loop
for _ in range(epochs):
    # Calculate fitness values for each agend
    fits = np.zeros((len(nets)),dtype=np.float32)
    for i in range(len(nets)):
        fits[i]  = train(nets[i],train_length,env,box,render=False)
        
    # Sort parents by fittness - the fittest at the top
    nets_inds = fits.argsort()[::-1]
    sorted_nets = [nets[i] for i in nets_inds]
    
    # Choose parent pairs of the next generation
    parents = choose_parents(
        sorted_nets,
        int(population_size*parents_portion))
    
    # Mutate the parents together to get the offspring
    mutated_childs = mutate_neurons(parents)
    
    # Keep the top parents
    parent_childs = sorted_nets[0:int(population_size*(1-parents_portion))]
    
    # Combine the children
    childs = mutated_childs
    childs.extend(parent_childs)
    
    # Randomize the weights of the children networks
    randomize(childs, random_percentage, max_change_rate)
    
    # Get the best fittness score and display the agent that achived it
    best_fit = np.argmax(fits)
    print "Population:",_,"Best-Fittness:",fits[best_fit]
    train(nets[best_fit],train_length,env,box,render=False)
    
    # Update the generation
    nets = [[x.copy() for x in net] for net in childs]
    

Population: 0 Best-Fittness: 200.0
Population: 1 Best-Fittness: 200.0
Population: 2 Best-Fittness: 200.0
Population: 3 Best-Fittness: 200.0
Population: 4 Best-Fittness: 200.0
Population: 5 Best-Fittness: 200.0
Population: 6 Best-Fittness: 200.0
Population: 7 Best-Fittness: 200.0
Population: 8 Best-Fittness: 200.0
Population: 9 Best-Fittness: 200.0
Population: 10 Best-Fittness: 200.0
Population: 11 Best-Fittness: 200.0
Population: 12 Best-Fittness: 200.0
Population: 13 Best-Fittness: 200.0
Population: 14 Best-Fittness: 200.0
Population: 15 Best-Fittness: 200.0
Population: 16 Best-Fittness: 200.0
Population: 17 Best-Fittness: 200.0
Population: 18 Best-Fittness: 200.0
Population: 19 Best-Fittness: 200.0
