# Configuration of Convolutional Neural Network Hyperparameters Using Genetic

#### Import libraries and packages

In [1]:
import random
from random import Random
import tensorflow as tf
from keras import layers
from keras import models
from keras.callbacks import EarlyStopping
from keras import optimizers
from keras.utils.np_utils import to_categorical


#### prepare data

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

train_images = x_train.astype('float32')/255
test_images = x_test.astype('float32')/255

train_labels=to_categorical(y_train)
test_labels=to_categorical(y_test)

val_images=train_images[:10000]
partial_images=train_images[10000:]

val_labels=train_labels[:10000]
partial_labels=train_labels[10000:]

print(f'''
Train:       X: {partial_images.shape}
             y: {partial_labels.shape}

Validation:  X: {val_images.shape}
             y: {val_labels.shape}

Test:        X: {test_images.shape}
             y: {test_labels.shape}
''')

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz

Train:       X: (40000, 32, 32, 3)
             y: (40000, 10)

Validation:  X: (10000, 32, 32, 3)
             y: (10000, 10)

Test:        X: (10000, 32, 32, 3)
             y: (10000, 10)



#### Definitions

In [3]:
POSSIBLE_LAYERS_N = [3,4,5,6,7,8,9,10]
POSSIBLE_FILTERS = [16, 32, 64, 128, 256]
POSSIBLE_KERNELS = [3,5,7]
N_LAYERS_MUTATE_PROBA = 0.2  # probability of changing number of layers of a chromosome


class CNNChromo:
    def __init__(self, random: 'Random', chromo: list = None):
        if chromo is None:
            n = random.choice(POSSIBLE_LAYERS_N)
            self.chromo = [n]
            for i in range(n):
                self.chromo.extend([
                    random.choice(POSSIBLE_FILTERS),  # L_i
                    random.choice(POSSIBLE_KERNELS),  # K_i
                ])
        else:
            self.chromo = chromo.copy()

        self.accuracy = None
        self.fitness = None

    def copy(self):
        copy_cnn_chromo = CNNChromo(Random(), chromo=self.chromo.copy())
        copy_cnn_chromo.fitness = self.fitness
        copy_cnn_chromo.accuracy = self.accuracy
        return copy_cnn_chromo

    def crossover_sequential(self, other_chromo: 'CNNChromo', random: 'Random'):
        n1 = self.chromo[0]
        chromo1 = self.chromo.copy()

        n2 = other_chromo.chromo[0]
        chromo2 = other_chromo.chromo.copy()

        pivot = random.randint(1, min(n1, n2) * 2 - 1)
        ofsp1 = chromo2[:pivot + 1]
        ofsp1.extend(chromo1[pivot + 1:])

        ofsp2 = chromo1[:pivot + 1]
        ofsp2.extend(chromo2[pivot + 1:])

        ofsp1[0] = len(ofsp1) // 2
        ofsp2[0] = len(ofsp2) // 2

        return CNNChromo(random, ofsp1), CNNChromo(random, ofsp2)

    def crossover_binary(self, other_chromo: 'CNNChromo', random: 'Random'):
        n1 = self.chromo[0]
        ofsp1 = self.chromo.copy()

        n2 = other_chromo.chromo[0]
        ofsp2 = other_chromo.chromo.copy()

        binary_list_len = max(n1, n2) * 2
        binary_list = [random.randint(0, 1) for _ in range(binary_list_len + 1)]

        for index in range(1, binary_list_len):
            if index < len(ofsp1) and index < len(ofsp2):
                if binary_list[index] == 1:
                    ofsp1[index] = other_chromo.chromo[index]
                    ofsp2[index] = self.chromo[index]

        return CNNChromo(random, ofsp1), CNNChromo(random, ofsp2)

    def mutate(self, random: 'Random'):
        mutated_chromo = self.chromo.copy()

        if random.random() < N_LAYERS_MUTATE_PROBA:  # change number of layers
            rand_n = random.choice(POSSIBLE_LAYERS_N)
            if rand_n < mutated_chromo[0]:  # decrease layers number
                mutated_chromo = mutated_chromo[: 1 + rand_n * 2]
                mutated_chromo[0] = rand_n
            else:  # increase layers number
                for i in range(rand_n - mutated_chromo[0]):
                    mutated_chromo.extend([
                        random.choice(POSSIBLE_FILTERS),  # L_i
                        random.choice(POSSIBLE_KERNELS),  # K_i
                    ])
                mutated_chromo[0] = rand_n

        else:  # change a single parameter
            rand_index = random.randint(1, mutated_chromo[0] * 2)
            if rand_index % 2 == 0:  # its index of a kernel_size
                mutated_chromo[rand_index] = random.choice(POSSIBLE_KERNELS)
            else:  # its index of a filters_number
                mutated_chromo[rand_index] = random.choice(POSSIBLE_FILTERS)

        return CNNChromo(random, mutated_chromo)


class GeneticCNN_finder:
    def __init__(self, max_gen, cross_prob, mutation_prob, max_population,
                 survive_percent, random_state, accuracy_function):
        self.max_gen = max_gen
        self.cross_prob = cross_prob
        self.mutation_prob = mutation_prob
        self.max_population = max_population
        self.survive_percent = survive_percent
        self.random = Random(random_state)
        self.accuracy_function = accuracy_function

    def selection(self, population):  # Roulette wheel selection method
        population_fitness = [p.fitness for p in population]
        total = sum(population_fitness)
        percentage = [round((x / total) * 100) for x in population_fitness]
        selection_wheel = []
        for pop_index, num in enumerate(percentage):
            selection_wheel.extend([pop_index] * num)
        parent1_ind = self.random.choice(selection_wheel)
        parent2_ind = self.random.choice(selection_wheel)
        return population[parent1_ind], population[parent2_ind]

    def run(self):
        generation = 0
        population = [CNNChromo(self.random) for _ in range(self.max_population)]
        elit = None

        while generation < self.max_gen:
            # ---------- obtain validation accuracy
            for p in population:
                p.accuracy = self.accuracy_function(p.chromo)

            # ---------- calculate each individual fitness
            if generation < self.max_gen / 2:
                sum_accuracies = sum([p.accuracy for p in population])
                for p in population:
                    p.fitness = p.accuracy / sum_accuracies
            else:
                population.sort(key=lambda p: p.accuracy, reverse=True)
                n = len(population)
                sum_ranks = (n * (n + 1)) / 2
                for i in range(n):
                    p = population[i]
                    r = i + 1
                    p.fitness = (n + 1 - r) / sum_ranks

            # ---------- get individual with best fitness
            elit = population[0]
            for p in population:
                if p.fitness > elit.fitness:
                    elit = p

            children_list = []
            next_population = []

            # ---------- crossover
            for _ in range(self.max_population):
                if self.random.random() <= self.cross_prob:
                    parent1, parent2 = self.selection(population)
                    if (generation / self.max_gen) < self.random.random():
                        children_list.extend(parent1.crossover_sequential(parent2, self.random))
                    else:
                        children_list.extend(parent1.crossover_binary(parent2, self.random))

            # ---------- mutation
            if generation < self.max_gen / 2:
                next_population = children_list
            else:
                survivors = population[:round(len(population) * self.survive_percent)]
                next_population.extend(survivors)
                next_population.extend(children_list)
                next_population.append(elit)

            for i in range(len(next_population)):
                if self.random.random() < self.mutation_prob:
                    next_population[i] = next_population[i].mutate(self.random)

            # ---------- fit size of next_population
            diff_n = len(next_population) - self.max_population
            if diff_n < 0:
                for _ in range(-diff_n):
                    next_population.append(CNNChromo(self.random))
            elif diff_n > 0:
                for _ in range(diff_n):
                    rnd_index = self.random.randint(0, len(next_population) - 1)
                    next_population.pop(rnd_index)

            # ---------- ensure existence of elit
            if elit not in next_population:
                rnd_index = self.random.randint(0, len(next_population) - 1)
                next_population[rnd_index] = elit

            population = next_population
            generation += 1

        return elit


#### Run Genetic

In [4]:
from datetime import datetime

map = {}

def get_CNN(chromo):
  model = models.Sequential()

  if chromo[0] == 3:
    model.add(layers.Conv2D(chromo[1], chromo[2], input_shape=(32,32,3), activation = "relu"))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[3], chromo[4], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[5], chromo[6], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(256, activation="relu"))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(10, activation="softmax"))

  elif chromo[0] == 4:
    model.add(layers.Conv2D(chromo[1], chromo[2], input_shape=(32,32,3), activation = "relu"))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[3], chromo[4], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[5], chromo[6], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[7], chromo[8], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(256, activation="relu"))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(10, activation="softmax"))

  elif chromo[0] == 5:
    model.add(layers.Conv2D(chromo[1], chromo[2], input_shape=(32,32,3), activation = "relu"))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[3], chromo[4], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[5], chromo[6], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[7], chromo[8], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[9], chromo[10], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(512, activation="relu"))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(10, activation="softmax"))

  elif chromo[0] == 6:
    model.add(layers.Conv2D(chromo[1], chromo[2], input_shape=(32,32,3), activation = "relu"))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[3], chromo[4], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[5], chromo[6], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[7], chromo[8], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[9], chromo[10], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[10], chromo[11], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(512, activation="relu"))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(10, activation="softmax"))

  elif chromo[0] == 7:
    model.add(layers.Conv2D(chromo[1], chromo[2], input_shape=(32,32,3), activation = "relu"))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[3], chromo[4], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[5], chromo[6], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[7], chromo[8], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[9], chromo[10], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[11], chromo[12], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[13], chromo[14], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(512, activation="relu"))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(10, activation="softmax"))
  elif chromo[0] == 8:
    model.add(layers.Conv2D(chromo[1], chromo[2], input_shape=(32,32,3), activation = "relu"))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[3], chromo[4], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[5], chromo[6], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[7], chromo[8], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[9], chromo[10], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[11], chromo[12], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[13], chromo[14], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[15], chromo[16], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(512, activation="relu"))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(10, activation="softmax"))

  elif chromo[0] == 9:
    model.add(layers.Conv2D(chromo[1], chromo[2], input_shape=(32,32,3), activation = "relu"))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[3], chromo[4], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[5], chromo[6], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[7], chromo[8], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[9], chromo[10], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[11], chromo[12], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[13], chromo[14], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[15], chromo[16], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[17], chromo[18], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(512, activation="relu"))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(10, activation="softmax"))

  elif chromo[0] == 10:
    model.add(layers.Conv2D(chromo[1], chromo[2], input_shape=(32,32,3), activation = "relu"))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[3], chromo[4], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[5], chromo[6], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[7], chromo[8], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[9], chromo[10], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[11], chromo[12], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[13], chromo[14], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(chromo[15], chromo[16], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[17], chromo[18], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(chromo[19], chromo[20], activation = "relu", padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(512, activation="relu"))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(10, activation="softmax"))

  return model


def accuracy_tester(chromo):
    start_time = datetime.now()
    print(f'evaluating chromo: {chromo} ')

    if tuple(chromo) not in map :

      model = get_CNN(chromo)

      es = EarlyStopping(monitor="val_accuracy", patience = 5)
      op = optimizers.Adam(0.01)
      model.compile(loss = "categorical_crossentropy", optimizer = op, metrics = ["accuracy"])
      model.fit(partial_images, partial_labels,
                validation_data=(val_images, val_labels),
                epochs=12, callbacks = [es], verbose=0)

      loss, accuracy = model.evaluate(test_images, test_labels)

      time_elapsed = datetime.now() - start_time
      print(f'===> time-taken: [{time_elapsed}]     test-accuracy: [{accuracy*100:.2f}%] \n')
      map[tuple(chromo)] = accuracy
    return map[tuple(chromo)]



gcf = GeneticCNN_finder(
    max_gen=5,

    cross_prob=0.2,
    mutation_prob=0.3,
    max_population=6,
    survive_percent=0.4,

    random_state=0,
    accuracy_function=accuracy_tester
)

elit = gcf.run()

evaluating chromo: [9, 128, 3, 64, 7, 128, 5, 64, 5, 64, 7, 32, 7, 32, 5, 32, 3, 256, 5] 
===> time-taken: [0:05:31.779749]     test-accuracy: [74.50%] 

evaluating chromo: [5, 64, 3, 16, 7, 64, 5, 256, 3, 64, 5] 
===> time-taken: [0:02:26.928101]     test-accuracy: [72.69%] 

evaluating chromo: [8, 256, 7, 32, 7, 128, 5, 256, 5, 16, 7, 16, 3, 128, 7, 16, 7] 
===> time-taken: [0:04:01.737524]     test-accuracy: [70.67%] 

evaluating chromo: [10, 64, 3, 64, 7, 16, 3, 256, 3, 32, 3, 256, 5, 16, 3, 64, 7, 128, 3, 64, 7] 
===> time-taken: [0:03:31.808522]     test-accuracy: [75.10%] 

evaluating chromo: [7, 16, 7, 64, 7, 32, 7, 256, 7, 64, 5, 16, 7, 128, 5] 
===> time-taken: [0:03:27.664434]     test-accuracy: [74.42%] 

evaluating chromo: [6, 64, 3, 32, 3, 16, 7, 64, 5, 16, 3, 32, 3] 
===> time-taken: [0:01:33.895876]     test-accuracy: [74.02%] 

evaluating chromo: [10, 64, 3, 64, 7, 16, 3, 256, 3, 32, 3, 256, 5, 16, 3, 64, 7, 128, 3, 64, 7] 
evaluating chromo: [4, 128, 7, 64, 3, 32, 3, 

In [5]:

final_model = get_CNN(elit.chromo)
final_model.summary()

Model: "sequential_21"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_142 (Conv2D)         (None, 30, 30, 256)       7168      
                                                                 
 batch_normalization_142 (Ba  (None, 30, 30, 256)      1024      
 tchNormalization)                                               
                                                                 
 conv2d_143 (Conv2D)         (None, 30, 30, 64)        802880    
                                                                 
 batch_normalization_143 (Ba  (None, 30, 30, 64)       256       
 tchNormalization)                                               
                                                                 
 max_pooling2d_69 (MaxPoolin  (None, 15, 15, 64)       0         
 g2D)                                                            
                                                     

In [6]:
print(f'''FINAL RESULT:
    chromosome  : {elit.chromo}
    accuracy    : {elit.accuracy*100:.2f}%
 \n\n''')

FINAL RESULT:
    chromosome  : [6, 256, 3, 64, 7, 256, 5, 16, 5, 256, 5, 16, 3]
    accuracy    : 80.34%
 


