In [20]:
'''
Created on Wednesday November 11, 2019

@author: Blake Richey

Implementation of NeuroEvolution Algorithm:
  Develop a neurel network and implement genetic algorithm that finds optimum
  weights, as an alternative to backpropogation
'''

import gym, operator
import os, datetime, random
import numpy             as np
import tensorflow        as tf
import matplotlib.pyplot as plt
from   time                          import time
from   tensorflow.keras.optimizers   import Adam
from   collections                   import deque
from   tensorflow.keras              import backend
from   sklearn.model_selection       import train_test_split
from   tensorflow.keras.applications import ResNet50
from   tensorflow.python.client      import device_lib
from   tensorflow.keras.models       import Sequential, Model
from   tensorflow.keras.callbacks    import TensorBoard, ModelCheckpoint
from   tensorflow.keras.layers       import Dense, Dropout, Conv2D, MaxPooling2D, \
    Activation, Flatten, BatchNormalization, LSTM


class NNEvo:

  def __init__(self, 
    tour=3, 
    cxrt=.2,
    mxrt=.01, 
    layers=1, 
    env=None,
    elitist=3,
    sharpness=1, 
    cxtype='avg',
    population=10, 
    transfer=False,
    generations=10, 
    selection='tour',
    fitness_goal=200,
    validation_size=0,
    activation='linear', 
    nodes_per_layer=[4]):

    '''
      config = {
        'tour': 3, 
        'cxrt': .2,
        'mxrt': .01,
        'layers': 1, 
        'env': None, 
        'elitist': 3,
        'sharpness': 1,
        'cxtype': 'avg',
        'population': 10, 
        'generations': 10, 
        'transfer': False,
        'selection': 'tour',
        'fitness_goal': 200,
        'validation_size': 0,
        'activation': 'linear', 
        'nodes_per_layer': [4], 
      }
    '''

    self.default_nodes   = 20
    self.env             = env
    self.mxrt            = mxrt        #chance of a single weight being mutated
    self.cxrt            = cxrt        #chance of parent being selected (crossover rate)
    self.best_fit        = None        #(model, fitness) with best fitness
    self.tour            = tour        #tournament sample size when using tour selection policy
    self.cxtype          = cxtype      #cross over type (gene splicing or avging)
    self.goal_met        = False       #holds model number that meets fitness goal
    self.num_layers      = layers      #qty of hidden layers
    self.elitist         = elitist     #n best models transitioned into nxt gen
    self.transfer        = transfer
    self.sharpness       = sharpness   #epochs to run when evaluating fitness
    self.selection_type  = selection   #selection type (cxrt/tour)
    self.activation      = activation  #activation type for output layer
    self.pop_size        = population  #number of neural nets in population
    self.generations     = generations 
    self.fitness_goal    = fitness_goal #goal for fitness (episode score) to reach
    self.validation_size = validation_size #number of episodes to run to validate a models success in reaching a fitness goal
    self.nodes_per_layer = nodes_per_layer #list of qty of nodes in each hidden layer
    self.num_features    = self.env.observation_space.shape[0]
    
    outputs = 1
    if hasattr(env.action_space, 'n'):
      outputs = self.env.action_space.n
    self.num_outputs     = outputs

    self.models = [] #list of individuals 
    self.pop    = [] #population (2d-list of weights)
    self.weight_shapes   = None
    self.weights_lengths = None
    self.plots = [] #points for matplotlib
    self.episodes = 0
    

  #--- Initialize Population --------------------------------------------------+
  def create_nn(self):
    '''Create individual of population'''

    model = Sequential()
    model.add(Dense(self.num_features, input_shape = (self.num_features,)))
    
    for layer in range(self.num_layers):

        try:
            nodes=self.nodes_per_layer[layer]
        except IndexError:
            nodes = None

        if nodes is None:
            nodes = self.default_nodes

        model.add(Dense(units = nodes, activation = 'relu'))
    
    #output layer
    model.add(Dense(units = self.num_outputs, activation = self.activation))
    model.compile(optimizer = Adam(lr=0.001), loss = 'mse', metrics=['accuracy'])

    #create deserialize dependencies
    if self.weight_shapes is None:
      model.summary()
      self.weight_shapes = []
      self.weights_lengths = []

      weights = model.get_weights()
      for x in weights:
        self.weight_shapes.append(x.shape)

        #generate indicies of weights to recreate weight structure from gene string
        length = len(x.reshape(1, -1)[0].tolist())
        if not self.weights_lengths:
          self.weights_lengths.append(length)
        else:
          self.weights_lengths.append(self.weights_lengths[len(self.weights_lengths)-1]+length)
      if self.mxrt is 1:
        self.mxrt = 1/( self.weights_lengths[-1] * 1.8 )
      print('Weight Lengths:', self.weights_lengths)
      print('Mutation Rate:', self.mxrt)
      print('Crossover Type:', self.cxtype)
      print('Selection Type:', self.selection_type)
      print('Sharpness:', self.sharpness)
    return model
  
  def create_transfer_cnn(self, ref_model=None, y_dim=192, x_dim=192, rgb=3, fcn_weights=None):
    '''creates resnet model. will load deserialized weights by passing in weights'''

    if not ref_model:
      model = ResNet50(weights='imagenet', include_top=False, input_shape=(y_dim, x_dim, rgb))
      for layer in model.layers:
        layer.trainable = False
      
      pretrained_weights = model.get_weights()

      flattened = Flatten()(model.output)
      #Add FCN
      for layer in range(self.num_layers):

        try:
            nodes=self.nodes_per_layer[layer]
        except IndexError:
            nodes = None

        if nodes is None:
            nodes = self.default_nodes

        if layer == 0:
          add_layer = Dense(units = nodes, activation = 'relu')(flattened)
        else:
          add_layer = Dense(units = nodes, activation = 'relu')(add_layer)
      
      output = Dense(units = self.num_outputs, activation = self.activation)(add_layer)

      model = Model(model.inputs, output)
      model.compile(Adam(lr=1e-3), 'categorical_crossentropy', metrics=['acc'])
    else:
      model = ref_model

      if fcn_weights:
        weights = model.get_weights()[-len(self.weight_shapes):]
        print('Deserialized weights length:', len(weights))
        for i, matrix in enumerate(weights):
          print('Original', matrix)
          matrix[:] = fcn_weights[i]
          print('Result', matrix)
    
    #create deserialize dependencies
    if self.weight_shapes is None:
      model.summary()
      self.weight_shapes = []
      self.weights_lengths = []

      weights = model.get_weights()
      self.full_weights_length = len(weights)
      self.pretrained_weights_length = len(pretrained_weights)
      for i in range(len(pretrained_weights), len(weights)):
        self.weight_shapes.append(weights[i].shape)

        #generate indicies of weights to recreate weight structure from gene string
        length = len(weights[i].reshape(1, -1)[0].tolist())
        if not self.weights_lengths:
          self.weights_lengths.append(length)
        else:
          self.weights_lengths.append(self.weights_lengths[len(self.weights_lengths)-1]+length)
      if self.mxrt is 1:
        self.mxrt = 1/( self.weights_lengths[-1] * 1.8 )
      print('Weight Shapes:', self.weight_shapes)
      print('Weight Lengths:', self.weights_lengths)
      print('Mutation Rate:', self.mxrt)
      print('Crossover Type:', self.cxtype)
      print('Selection Type:', self.selection_type)
      print('Sharpness:', self.sharpness)
    
    return model
  
  def create_population(self):
    for _ in range(self.pop_size):
      if self.transfer:
        model = self.create_transfer_cnn()
      else:
        model = self.create_nn()
      self.models.append(model)
      self.pop.append(self.serialize(model))
  #----------------------------------------------------------------------------+

  #--- Fitness Calculation ----------------------------------------------------+

  def quality(self, model):
    '''
      fitness function. Returns quality of model
      Runs 1 episode of environment
    '''
    total_rewards = []
    for epoch in range(self.sharpness):
      self.episodes += 1
      done = False
      rewards = []
      envstate = self.env.reset()
      while not done:
        action = self.predict(model, envstate)
        envstate, reward, done, info = self.env.step(action)
        rewards.append(reward)
      
      total_rewards.append(sum(rewards))
    
    return sum(total_rewards)/len(total_rewards)
  
  #----------------------------------------------------------------------------+
  
  #--- Breed Population -------------------------------------------------------+
  def selection(self):
    '''
      generate mating pool, tournament && elistist selection policy
    '''
    selection = []

    ranked = [] #ranked models, best to worst
    for i, model in enumerate(self.models):
      fitness = self.quality(model)
      ranked.append((i, fitness))
      if self.fitness_goal is not None and fitness >= self.fitness_goal:
        if self.validation_size:
          valid = self.validate(self.models[i])
        else:
          valid = True
        
        if valid:
          self.goal_met = self.models[i] #save model that met goal
          self.best_fit = (i, fitness)
          break

    if not self.goal_met:  #if goal met prepare to terminate
      ranked = sorted(ranked, key=operator.itemgetter(1), reverse=True)
      print('Ranked:', ranked)
      self.best_fit = ranked[0]

      for i in range(self.elitist):
        selection.append(ranked[i])

      if self.selection_type == 'tour':
        while len(selection) < self.pop_size:
          tourny = random.sample(ranked, self.tour)
          selection.append(max(tourny, key=lambda x:x[1]))

      elif self.selection_type == 'cxrt':
        while len(selection) < self.pop_size:
          for model in ranked:
            if random.random() < self.cxrt:
              selection.append(model)
            

    self.plots.append(self.best_fit)
    return selection

  def crossover(self, parents):
    children = [] #gene strings

    #keep elites
    for i in range(self.elitist):
      index = parents[i][0]
      children.append(self.pop[index])

    parents = random.sample(parents, len(parents)) #randomize breeding pool

    #breed rest
    i = 0 #parent number, genes to get
    while len(children) < self.pop_size:
      parent1 = parents[i]
      parent2 = parents[len(parents)-i-1]

      parent1_genes = self.pop[parent1[0]]
      parent2_genes = self.pop[parent2[0]]
      if self.cxtype == 'splice':
        if self.num_layers > 1:
          genes = []
          for index, len_ in enumerate(self.weights_lengths): #splice each layer
            if index == 0:
              range_ = (0, len_)
            else:
              range_ = (self.weights_lengths[index-1], len_)

            #splice genes
            start = range_[0]
            end = range_[1]
            geneA = random.randrange(start, end)
            geneB = random.randrange(geneA, end+1)
            geneA -= start
            geneB -= start

            genes.append(splice_list(parent1_genes[start:end], parent2_genes[start:end], geneA, geneB))
          child = flatten(genes)
        else:
          geneA = random.randrange(0, len(parent1_genes))
          geneB = random.randrange(geneA, len(parent1_genes)+1)

          child = splice_list(parent1_genes, parent2_genes, geneA, geneB)
      else:
        child = ((np.array(parent1_genes) + np.array(parent2_genes)) / 2).tolist()
      
      children.append(child)
      i+=1
    
    return children
  
  def mutate(self, population):
    for ind, individual in enumerate(population):
      for i, gene in enumerate(individual):
        mxrt = self.mxrt
#        if self.pop_size > 10:
#          if ind == len(population) - 1: #Randomly initialize last child
#            mxrt = 1
        if random.random() < mxrt:
          individual[i] = random.uniform(-1, 1)
    
    return population
  #----------------------------------------------------------------------------+
  
  #--- Train/Evaluate ---------------------------------------------------------+

  def train(self, filename=None):
    self.create_population()
    print('Population created', len(self.pop))

    if filename:
      self.models[0].load_weights(filename)
      self.pop[0] = self.serialize(self.models[0])
      print('Model loaded from', filename)

    for i in range(self.generations):
      print('\nGeneration:', i+1, '/', self.generations)
      parents = self.selection()
      if not self.goal_met:
        print('Goal not met. Parents selected.')
        print('Best fit:', self.best_fit)
        children = self.crossover(parents)
        print('Breeding done.')
        new_pop = self.mutate(children)
        print('Mutations done.')
        
        print('New pop:', len(new_pop))
        self.pop = new_pop
        for i, individual in enumerate(new_pop):
          self.models[i].set_weights(self.deserialize(individual))
      else:
        print(f'Goal met! Episodes: {self.episodes}')
        self.goal_met.save_weights('best_model.h5')
        print('Best results saved to best_model.h5')
        break
    
    if not self.goal_met:
      if self.best_fit:
        self.models[self.best_fit[0]].save_weights('best_model.h5')
        print('Best results saved to best_model.h5')


  def evaluate(self, filename=None, epochs=0):
    if self.goal_met or filename:
      #load model
      if filename:
        model = self.create_nn()
        model.load_weights(filename)
        print(f'Weights loaded from {filename}')
      else:
        model = self.goal_met

      epoch = 0
      total_rewards = []
      #display results
      while (True, epoch<epochs)[epochs>0]:
        done = False
        rewards = []
        envstate = self.env.reset()
        while not done:
          action = self.predict(model, envstate)
          envstate, reward, done, info = self.env.step(action)
          if not epochs:
            self.env.render()
          rewards.append(reward)

        print('Reward:', sum(rewards))
        total_rewards.append(sum(rewards))
        rewards = []
        epoch+=1
      print('Epochs:', epoch, 'Average reward:', sum(total_rewards)/len(total_rewards))
  #----------------------------------------------------------------------------+

  #--- Validate Fitness -------------------------------------------------------+
  def validate(self, model):
    print('Validating Model...', end='')
    
    total_rewards = []
    n_epochs = self.validation_size
    #test results
    for epoch in range(n_epochs):
      done = False
      rewards = []
      envstate = self.env.reset()
      while not done:
        action = self.predict(model, envstate)
        envstate, reward, done, info = self.env.step(action)
        rewards.append(reward)

      total_rewards.append(sum(rewards))
    print(sum(total_rewards)/len(total_rewards))
    return sum(total_rewards)/len(total_rewards) >= self.fitness_goal
  #----------------------------------------------------------------------------+

  #--- Graph Functions --------------------------------------------------------+

  def show_plot(self):
    y = [self.plots[i][1] for i in range(len(self.plots))] #best fitness
    x = [i for i in range(len(self.plots))] #generation

    plt.plot(x, y, label='Best fitness')
    plt.legend(loc=4)
    plt.show()
    
  #----------------------------------------------------------------------------+

  #--- Helper Functions -------------------------------------------------------+

  def predict(self, model, envstate):
    ''' decide best action for model. '''
    qvals = model.predict(self.adj_envstate(envstate))[0] 
    if self.num_outputs == 1:
      action = qvals #continuous action space
    else:
      action = np.argmax(qvals) #discrete action space
    
    return action

  def adj_envstate(self, envstate):
    return envstate.reshape(1, -1)

  def serialize(self, model):
    '''
      serializes model's weights into a gene string
    '''
    
    if self.transfer:
        weights = model.get_weights()[-len(self.weight_shapes):]
    else:
        weights = model.get_weights()
    flattened = []
    for arr in weights:
      flattened+=arr.reshape(1, -1)[0].tolist()
    
    return flattened

  def deserialize(self, genes):
    '''
      deserializes gene string into weights
    '''
    shapes = self.weight_shapes
    lengths = self.weights_lengths
    
    weights = []
    for i, val in enumerate(lengths):
      if i == 0:
        begin = 0
      else:
        begin = lengths[i-1]
      print(begin, val, len(genes[begin:val]))
      weights.append(np.array(genes[begin:val]).reshape(shapes[i]))
    
    return weights

def splice_list(list1, list2, index1, index2):
  '''
    combined list1 and list2 taking splice from list1 with starting index `index1`
    and ending index `index2`
  '''
  if index1 == 0:
    splice = list1[index1:index2+1]
    splice += list2[index2+1:len(list1)]
  else:
    splice = list2[:index1] + list1[index1:index2+1]
    splice += list2[index2+1:len(list1)]
  
  return splice

def flatten(L):
  'flatten 2d list'
  flat = []
  for l in L:
    flat += l
  
  return flat
#------------------------------------------------------------------------------+


env = gym.make('CartPole-v0')
print('Environment created')
# print(hasattr(env.action_space, 'n'))

config = {
  'tour': 3, 
  'cxrt': .2,
  'mxrt': .01,
  'layers': 4, 
  'env': env, 
  'elitist': 3,
  'sharpness': 1,
  'cxtype': 'avg',
  'population': 1, 
  'generations': 10, 
  'transfer': True,
  'selection': 'tour',
  'fitness_goal': 200,
  'validation_size': 0,
  'activation': 'softmax', 
  'nodes_per_layer': [2, 64, 32, 3], 
}

agents = NNEvo(**config)
agents.create_population()
print('Population created')


# start = time()
# train()
# end = time()
# print('Time training:', end-start)
# evaluate()

Environment created
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 192, 192, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 198, 198, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 96, 96, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 96, 96, 64)   256         conv1[0][0]                      
________________________________________________________________________

Weight Shapes: [(73728, 2), (2,), (2, 64), (64,), (64, 32), (32,), (32, 3), (3,), (3, 2), (2,)]
Weight Lengths: [147456, 147458, 147586, 147650, 149698, 149730, 149826, 149829, 149835, 149837]
Mutation Rate: 0.01
Crossover Type: avg
Selection Type: tour
Sharpness: 1
Population created


In [21]:
agents.mxrt = .1
print('Serialized', agents.serialize(agents.models[0])[-10:])
prior = agents.pop[0]
print('Prior', prior[-10:])
new_pop = agents.mutate([agents.pop[0]])
print('New pop', prior == new_pop[0], new_pop[0][-10:])
print('Deserialized weights', (agents.deserialize(new_pop[0]))[2:])
agents.create_transfer_cnn(ref_model=agents.models[0], fcn_weights=agents.deserialize(new_pop[0]))
print(agents.serialize(agents.models[0])[-10:])

Serialized [0.0, 0.0, 0.8034635782241821, -0.5858814716339111, 1.0408613681793213, 0.7532509565353394, 0.7962251901626587, 0.4386863708496094, 0.0, 0.0]
Prior [0.0, 0.0, 0.8034635782241821, -0.5858814716339111, 1.0408613681793213, 0.7532509565353394, 0.7962251901626587, 0.4386863708496094, 0.0, 0.0]
New pop True [0.0, 0.0, -0.9264164773663355, -0.5858814716339111, 1.0408613681793213, 0.7532509565353394, 0.7962251901626587, 0.4386863708496094, 0.0, 0.0]
0 147456 147456
147456 147458 2
147458 147586 128
147586 147650 64
147650 149698 2048
149698 149730 32
149730 149826 96
149826 149829 3
149829 149835 6
149835 149837 2
Deserialized weights [array([[-0.16468155,  0.15559876, -0.15382959,  0.12855414,  0.2315985 ,
         0.14620185, -0.0895633 ,  0.22191668, -0.7845502 ,  0.04234806,
         0.0857614 , -0.06249149, -0.25560695,  0.2332449 ,  0.1819177 ,
         0.02610868, -0.17330413,  0.07122463,  0.6927256 , -0.12565024,
        -0.15498292, -0.08325811,  0.18999344,  0.0995146 , -

In [7]:
print(agents.weight_shapes)
print(agents.weights_lengths)

[(73728, 2), (2,), (2, 64), (64,), (64, 32), (32,), (32, 3), (3,), (3, 2), (2,)]
[147456, 147458, 147586, 147650, 149698, 149730, 149826, 149829, 149835, 149837]


In [None]:
  def deserialize(self, genes):
    '''
      deserializes gene string into weights
    '''
    shapes = self.weight_shapes
    lengths = self.weights_lengths
    
    weights = []
    for i, val in enumerate(lengths):
      if i == 0:
        begin = 0
      else:
        begin = lengths[i-1]
      weights.append(np.array(genes[begin:val]).reshape(shapes[i]))
    
    return weights