In [1]:
from tensorflow.keras.models import Model, clone_model
from tensorflow.keras.layers import Input, Dense, Conv2D, LeakyReLU, Flatten, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist, cifar10
from tensorflow.keras.initializers import RandomNormal
from copy import deepcopy
import numpy as np
import random
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from numpy.random import choice
import copy

In [2]:
#load cifar10 dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

#convert labels to one hot encodings
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

#normalize the data
x_train = x_train/255.0
x_test = x_test/255.0

x_test_display = x_test
y_test_display = y_test

x_train = x_train.reshape(x_train.shape + (1,))
x_test = x_test.reshape(x_test.shape + (1,))

In [3]:
x_train, y_train = shuffle(x_train, y_train)
x_small = x_train[:5000]
y_small = y_train[:5000]
x_train, y_train = shuffle(x_train, y_train)

In [4]:
class conv_net:
    def __init__(self,
                 input_shape, 
                conv_layer_filters,
                conv_layer_kernel_size,
                conv_layer_strides,
                initializer_mean,
                intializer_stdev):
        self.input_shape = input_shape
        self.conv_layer_filters = conv_layer_filters
        self.conv_layer_kernel_size = conv_layer_kernel_size
        self.conv_layer_strides = conv_layer_strides
        self.initializer_mean = initializer_mean
        self.intializer_stdev = intializer_stdev
        self._build()
        
    
    def _build(self):
        input_layer = Input(shape=self.input_shape)
        x = input_layer
        for i in range(len(self.conv_layer_filters)):
            x = Conv2D(filters=self.conv_layer_filters[i],
                      kernel_size=self.conv_layer_kernel_size[i],
                      strides=self.conv_layer_strides[i],
                       padding='same',
                      kernel_initializer=RandomNormal(mean=self.initializer_mean, stddev=self.intializer_stdev)\
                      )(x)
            x = BatchNormalization()(x)
            x = LeakyReLU()(x)
        
        x = Flatten()(x)
        output_layer = Dense(10, activation='softmax', \
                             kernel_initializer=RandomNormal(mean=self.initializer_mean, stddev=self.intializer_stdev)\
                            )(x)
        self.model = Model(input_layer, output_layer)
        
    def compile(self):
        self.model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
        
    def train(self, x_train, y_train, epochs=1):
        self.model.fit(x_train,
                        y_train,
                        epochs=epochs,
                        batch_size=32, shuffle=True)
    
    def evaluate(self, x_test, y_test):
        return self.model.evaluate(x_test, y_test)

In [8]:
class darwin_net:
    def __init__(self, neural_net,
                num_genomes_per_gen,
                num_generations,
                x_train, y_train,
                keep_n_best,
                mutation_prob=0.2):
        self.neural_net = neural_net
        self.num_genomes_per_gen = num_genomes_per_gen
        self.num_generations = num_generations
        
        self.x_train = x_train
        self.y_train = y_train
        self.generation = 1
        #if keep_n_best is int then that many best genomes will be retained in each generation
        #if keep_n_best is a float (between 0 and 1) then that % of best  genomes will be retained in each generation
        self.keep_n_best = keep_n_best
        self.mutation_prob = mutation_prob
        self.genomes = self._build()
        self.run()
        
    def build_neural_net(self, initializer_mean, initializer_stdev):
        cnet = self.neural_net(input_shape=self.x_train.shape[1:],\
                         conv_layer_filters=[16, 32, 32],\
                        conv_layer_kernel_size=[3,3,3],\
                        conv_layer_strides=[1,1,1],\
                        initializer_mean = initializer_mean,\
                        intializer_stdev = initializer_stdev)
        cnet.compile()
        return cnet
        
    def _build(self):
        #create num_genomes_per_gen number of conv_nets in a loop with random initialization
        genomes = []
        initializer_mean = 0.
        initializer_stdev = 1.
        for i in range(self.num_genomes_per_gen):
            if i>0:
                initializer_mean += 0.01
                initializer_stdev += 0.01
            cnet = self.build_neural_net(initializer_mean, initializer_stdev)
            genomes.append(cnet)
        return genomes
    
    
    
    def evaluate_generation(self):
        results = []
        self.x_train, self.y_train = shuffle(self.x_train, self.y_train)
        for i in range(len(self.genomes)):
            genome = self.genomes[i]
            
            results.append(genome.evaluate(self.x_train, self.y_train)+[i])
        return results
    
    
    def keep_best_genomes(self, results):
        results = sorted(results, key=lambda x: x[1], reverse=True)
        
        #keep the top N %
        if type(self.keep_n_best) == int:
            if self.keep_n_best >= self.num_genomes_per_gen:
                self.keep_n_best = self.num_genomes_per_gen-1
        else:
            if self.keep_n_best > 0.9999:
                self.keep_n_best = 0.9
            self.keep_n_best = int(self.num_genomes_per_gen*self.keep_n_best)

        results = results[:self.keep_n_best]
        return results
    
    def _make_model_copy(self, model, model_copy=None):
        start_time = time.time()
        #create a new model and copy the weights of the passed model to the new model
        if not model_copy:
            model_copy = self.build_neural_net(initializer_mean=0., initializer_stdev=1.)
            ##model_copy = tf.keras.models.clone_model(model)
            ##model_copy.model.set_weights(model.model.get_weights())
        for i in range(len(model_copy.model.layers)):
            model_copy.model.layers[i].set_weights(model.model.layers[i].get_weights())
        #print("_make_model_copy " , time.time()-start_time)
        return model_copy
    
    def _get_random_slice_point(self, model):
        #get a random slice point to slice the model to achieve the crossover between 2 models
        #the slice point can only be a conv layer or a dense layer (cannot be input, batchnorm or dropout layers)
        while True:
            random_slice_point_layer = random.randint(1,len(model.model.layers)-1)
            layer_name = model.model.layers[random_slice_point_layer].name
            if 'conv' in layer_name or 'dense' in layer_name:
                break
        return random_slice_point_layer
    
    def _do_crossover(self, model1, model2, slice_point):
        start_time = time.time()
        #do crossover only if random prob is < crossover_prob
        if random.random() < self.crossover_prob:
            #swap all the layers in 2 models after the slice_point layer
            for i in range(slice_point, len(model1.model.layers)):
                model1_layer_weights = copy.deepcopy(model1.model.layers[i].get_weights())
                model1.model.layers[i].set_weights(model2.model.layers[i].get_weights())
                model2.model.layers[i].set_weights(model1_layer_weights)
        #print("_do_crossover " , time.time()-start_time)
        return model1, model2
    
    def crossover(self, parents, model_copy = []):
        start_time = time.time()
        #make copy of the parents
        model1 = self.genomes[parents[0]]
        model2 = self.genomes[parents[1]] 
        
        if len(model_copy)==0:
            model1_copy = self._make_model_copy(model1)
            model2_copy = self._make_model_copy(model2)
        elif len(model_copy)==1:
            model1_copy = self._make_model_copy(model1, model_copy[0])
            model2_copy = self._make_model_copy(model2)
        else:
            model1_copy = self._make_model_copy(model1, model_copy[0])
            model2_copy = self._make_model_copy(model2,  model_copy[1])
        #get a slice point to swap layers
        slice_point = self._get_random_slice_point(model1)
        
        #swap layers across slice point
        model1_copy, model2_copy = self._do_crossover(model1_copy, model2_copy, slice_point)
        
        #put the new crossover models to the genome for next generation
        self.genomes.append(model1_copy)
        self.genomes.append(model2_copy)
        #print("crossover " , time.time()-start_time)
        
    def mutate(self, dont_mutate_parents=True):
        #mutate some neurons in the models
        #if dont_mutate_parents is False the parent models are mutated 
        start_index = self.keep_n_best if dont_mutate_parents else 0
        for i in range(start_index, len(self.genomes)):
            #mutate layers
            for j in range(len(self.genomes[0].model.layers)):
                if random.random() < self.mutation_prob:
                    #adjust the bias multiplying a random number in the random -2:2
                    #I don't know why everyone follows the above rule of -2:+2. 
                    #Have to investigate more
                    layer_name = self.genomes[i].model.layers[j].name
                    if 'conv' in layer_name or 'dense' in layer_name:
                        weights = self.genomes[i].model.layers[j].get_weights()
                        #add random values to weights
                        weights[0] += weights[0] * (random.random() - 0.5) * 3 + (random.random() - 0.5)
                        #add random values to bias
                        weights[1] += weights[1] * (random.random() - 0.5) * 3 + (random.random() - 0.5)
                        self.genomes[i].model.layers[j].set_weights(weights)
                    
    def create_next_generation(self, results):
        self.generation += 1
        results = self.keep_best_genomes(results)
        probs = [r[1] for r in results]
        probs_sum = sum(probs)
        prob_dist = [p/probs_sum for p in probs]
        
        selected_genomes = [self.genomes[i] for _,_,i in results]
        rejected_genomes = [genome for genome in self.genomes if genome not in selected_genomes]
        self.genomes = selected_genomes
        
        for i in range((self.num_genomes_per_gen-len(results))//2):
            draw2 = choice(len(results), size=2, replace=True, p=prob_dist)
            #use rejected_genomes as model copy instead of creating a new model (saves time) 
            self.crossover(draw2, model_copy=[rejected_genomes.pop() for _ in range(2)])
        self.mutate(dont_mutate_parents=True)
        
    def run(self):
        for i in range(self.num_generations-1):
            print('Executing generation : ', (i+1))
            results = self.evaluate_generation()
            self.create_next_generation(results)
            

In [9]:
dnet = darwin_net(neural_net=conv_net,
                num_genomes_per_gen=10,
                num_generations=100,
                x_train=x_small, y_train=y_small,
                keep_n_best=3,
                mutation_prob=0.5,
                 crossover_prob=0.5)

Executing generation :  1
Executing generation :  2
Executing generation :  3
Executing generation :  4
Executing generation :  5
Executing generation :  6
Executing generation :  7
Executing generation :  8
Executing generation :  9
Executing generation :  10
Executing generation :  11
Executing generation :  12
Executing generation :  13
Executing generation :  14
Executing generation :  15
Executing generation :  16
Executing generation :  17
Executing generation :  18
Executing generation :  19


Executing generation :  20
Executing generation :  21
Executing generation :  22
Executing generation :  23
Executing generation :  24
Executing generation :  25
Executing generation :  26
Executing generation :  27
Executing generation :  28
Executing generation :  29
Executing generation :  30
Executing generation :  31
Executing generation :  32
Executing generation :  33
Executing generation :  34
Executing generation :  35
Executing generation :  36
Executing generation :  37
Executing generation :  38


Executing generation :  39
Executing generation :  40
Executing generation :  41
Executing generation :  42
Executing generation :  43
Executing generation :  44
Executing generation :  45
Executing generation :  46
Executing generation :  47
Executing generation :  48
Executing generation :  49
Executing generation :  50
Executing generation :  51
Executing generation :  52
Executing generation :  53
Executing generation :  54
Executing generation :  55
Executing generation :  56
Executing generation :  57


Executing generation :  58
Executing generation :  59
Executing generation :  60
Executing generation :  61
