**ENAS ALGORITHM**

**Required libraries**

In [2]:
import numpy as np
import copy
import sys
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
from keras.utils.layer_utils import count_params
from random import randint, choice, choices, random, sample
from math import floor

**Preparing data**

In [3]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32")[::2] / 255
x_test = x_test.astype("float32") / 255

# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train[::2], num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (30000, 28, 28, 1)
30000 train samples
10000 test samples


**Global parameters**

In [10]:
FEAT_SIZES = (2, 4, 6, 10, 14, 22, 30, 46, 62)
MAX_KERPOOL_SIZE = 5 #Maximum kernel and pool size
# MAX_STRIDE_SIZE = 2
MAX_BLOCK_SIZE = 3 #Maximum conv layers in VGG block
MAX_NETWORK_SIZE = 3 #Maimum blocks in network
ACTIVATIONS = ['relu', 'sigmoid', 'tanh']

BEST_LIMIT = 5 #How many best individuals shuld be saved

MUT_PROB = 0.5 #Basic mutation probability
LAY_PROB = 0.4 #Chance to add or delete a layer in the block
BLOCK_PROB = 0.3 #Chance to add or delete a block in the network

BACH_SIZE = 128
EPOCHS = 5

**Convolution layer**

In [5]:
class ConvLayer():

    def __init__(self):
        self.feature_size = choice(FEAT_SIZES)
        self.kernel_size_x = randint(1, MAX_KERPOOL_SIZE)
        self.kernel_size_y = randint(1, MAX_KERPOOL_SIZE)
        self.activation = choice(ACTIVATIONS)
        # To prevent 1x1 kernel size
        while self.kernel_size_x == 1 and self.kernel_size_y == 1:
            self.kernel_size_x = randint(1, MAX_KERPOOL_SIZE)
            self.kernel_size_y = randint(1, MAX_KERPOOL_SIZE)
        # self.kernel_stride_x = randint(1, MAX_KERPOOL_SIZE)
        # self.kernel_stride_y = randint(1, MAX_KERPOOL_SIZE)

    def layer(self, model):
        model.add(layers.Conv2D(self.feature_size, kernel_size=(self.kernel_size_x, self.kernel_size_y), padding='same'))
        # model.add(layers.BatchNormalization(axis=1))
        model.add(layers.Activation(activation=self.activation))
        # model.add(layers.Dropout(0.5))

    def mutate(self):
        for key, value in vars(self).items():
            if key == 'feature_size':
                rand = random()
            if rand < MUT_PROB:
                if key == 'feature_size':
                    # So it changes even if on the varge of the range
                    index = FEAT_SIZES.index(value)
                    if index == 0:
                        jump = 1
                    elif index == len(FEAT_SIZES)-1:
                        jump = -1
                    else:
                        jump = choice([-1,1])
                    self.feature_size = FEAT_SIZES[index+jump]
                elif key == 'activation':
                    # It cannot pick the same function
                    act_list = copy.deepcopy(ACTIVATIONS)
                    act_list.remove(self.activation)
                    self.activation = choice(act_list)
                else:
                    # So it changes even if on the varge of the range
                    if value == 1:
                        jump = 1
                    elif value == MAX_KERPOOL_SIZE:
                        jump = -1
                    else:
                        jump = choice([-1, 1])
                    vars(self)[key] = value + jump
        

    

**Pooling layer**

In [6]:
class PoolingLayer():
    
    def __init__(self):
        self.pool_size_x = randint(2, MAX_KERPOOL_SIZE)
        self.pool_size_y = randint(2, MAX_KERPOOL_SIZE)
        self.pool_type = choice(['max', 'avg'])
        # self.pool_stride_x = randint(1, MAX_KERPOOL_SIZE)
        # self.pool_stride_y = randint(1, MAX_KERPOOL_SIZE)

    def layer(self):
        if self.pool_type == 'max':
            return layers.MaxPooling2D(pool_size=(self.pool_size_x, self.pool_size_y), strides=(2, 2))
        elif self.pool_type == 'avg':
            return layers.AveragePooling2D(pool_size=(self.pool_size_x, self.pool_size_y), strides=(2, 2))

    def mutate(self):
        for key, value in vars(self).items():
            if random() < MUT_PROB:
                if key == 'pool_type':
                    if value == 'max':
                        self.pool_type = 'avg'
                    else:
                        self.pool_type = 'max'
                else:
                    # So it changes even if on the varge of the range
                    if value == 1:
                        jump = 1
                    elif value == MAX_KERPOOL_SIZE:
                        jump = -1
                    else:
                        jump = choice([-1, 1])
                    vars(self)[key] = value + jump

**VGG block**

In [7]:
class ConvBlock():

    def __init__(self):
        self.convs = [ConvLayer() for _ in range(randint(1, MAX_BLOCK_SIZE))]
        self.pooling = PoolingLayer()
        
    def block(self, model):
        for layer in self.convs:
            layer.layer(model)
        model.add(self.pooling.layer())

    def mutate(self):
        size = len(self.convs)
        rand = random()
        if rand < LAY_PROB:
            jump = choice([-1,1])
            if size == MAX_BLOCK_SIZE or (jump == -1 and size > 1):
                self.convs.remove((choice(self.convs)))
            if size == 1 or (jump == 1 and size < MAX_BLOCK_SIZE):
                self.convs.insert(randint(0, size), ConvLayer())
            self.pooling.mutate()
        [layer.mutate() for layer in self.convs]

**Network**

In [8]:
class Network():
    
    def __init__(self, blocks=None):
        if blocks is None:
            self.blocks = [ConvBlock() for _ in range(randint(1, MAX_NETWORK_SIZE))]
        else:
            self.blocks = blocks[0]
            [self.blocks.append(block) for block in blocks[1]]

    def evaluate(self):
        print([[layer.feature_size for layer in block.convs] for block in self.blocks])
        model = keras.Sequential([keras.Input(shape=input_shape)])
        [block.block(model) for block in self.blocks]
        model.add(layers.Flatten())
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(num_classes))
        model.add(layers.BatchNormalization(axis=1))
        model.add(layers.Activation(activation='softmax'))

        model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

        trainable_params = count_params(model.trainable_weights)
        
        history = model.fit(x_train, y_train, batch_size=BACH_SIZE, epochs=EPOCHS, validation_split=0.2, verbose=0)
        score = model.evaluate(x_test, y_test, verbose=0)
        
        self.val_score = history.history['val_accuracy'][EPOCHS-1] - (3**((0.5*trainable_params+30000)/5000))/15000000
        self.test_score = score[1] - (3**((0.5*trainable_params+30000)/5000))/15000000

    def mutate(self):
        size = len(self.blocks)
        rand = random()
        if rand < BLOCK_PROB:
            jump = choice([-1,1])
            if size == MAX_NETWORK_SIZE or (jump == -1 and size != 1):
                self.blocks.remove((choice(self.blocks)))
            if size == 1 or (jump == 1 and size != MAX_NETWORK_SIZE):
                self.blocks.insert(randint(0, size), ConvBlock())
        [block.mutate() for block in self.blocks]

    def print_info(self):
        return [[layer.feature_size for layer in block.convs] for block in self.blocks]

**Evolutionary algorithm**

In [None]:
class ENAS:
    
    def __init__(self, pop_size=1):
        self.pop_size = pop_size
        self.population = [Network() for _ in range(pop_size)]
        self.val_bests = []
        self.test_bests = []

        self.avg_blocks = []
        # self.avg_layer_density = []
        self.best_val = []
        self.best_test = []

        self.initialization()

    def initialization(self):
        print('INITIALIZATION'.center(100, '-'))
        for i, network in enumerate(self.population):
            print('SPECIMEN ' + str(i+1) + '/' + str(self.pop_size))
            try:
                network.evaluate()
            except:
                network.val_score = 0
                network.test_score = 0
            if i < BEST_LIMIT:
                self.val_bests.append((network, 0))
                self.val_bests.sort(key=lambda x: x[0].val_score)
                self.test_bests.append([network, 0])
                self.test_bests.sort(key=lambda x: x[0].test_score)
            else:
                if network.val_score > self.val_bests[0][0].val_score:
                    del self.val_bests[0]
                    self.val_bests.append([network, 0])
                    self.val_bests.sort(key=lambda x: x[0].val_score)
                if network.test_score > self.test_bests[0][0].test_score:
                    del self.test_bests[0]
                    self.test_bests.append([network, 0])
                    self.test_bests.sort(key=lambda x: x[0].test_score)
        self.update()         
        self.cycle_bests_info()

    def update(self):
        self.best_val.append(self.val_bests[-1][0].val_score)
        self.best_test.append(self.test_bests[-1][0].test_score)

        blocks = [len(net.blocks) for net in self.population]
        self.avg_blocks.append(sum(blocks)/self.pop_size)


    def evolve(self, cycles):
        for cycle in range(cycles):
            print(('CYCLE ' + str(cycle+1) + '/'+ str(cycles)).center(100, '-'))
            new_population = self.crossover(self.selection())
            for i, network in enumerate(new_population):
                print('SPECIMEN ' + str(i+1) + '/' + str(self.pop_size))
                network.mutate()
                try:
                    network.evaluate()
                except:
                    network.val_score = 0
                    network.test_score = 0
                if network.val_score > self.val_bests[0][0].val_score:
                    del self.val_bests[0]
                    self.val_bests.append((network, cycle+1))
                    self.val_bests.sort(key=lambda x: x[0].val_score)
                if network.test_score > self.test_bests[0][0].test_score:
                    del self.test_bests[0]
                    self.test_bests.append([network, cycle+1])
                    self.test_bests.sort(key=lambda x: x[0].test_score)
            self.population = [sorted(self.population, key=lambda x: x.val_score)[-1]]
            self.population.extend(sorted(new_population, key=lambda x: x.val_score)[1:])
            self.update()
            self.cycle_bests_info()
        plt.figure(1)
        plt.plot(np.arange(0, cycles+1, 1), self.best_val, 'r', np.arange(0, cycles+1, 1), self.best_test)
        plt.show()
        plt.figure(2)
        plt.plot(self.avg_blocks)
        plt.show()
            
    def selection(self):
        new_population = {}
        for _ in range(self.pop_size):
            tournament = choices(self.population, k=2)
            if tournament[0].val_score >= tournament[1].val_score:
                if tournament[0] in new_population.keys():
                    new_population[tournament[0]] += 1
                else:
                    new_population[tournament[0]] = 1
            else:
                if tournament[1] in new_population.keys():
                    new_population[tournament[1]] += 1
                else:
                    new_population[tournament[1]] = 1
        return new_population

    def crossover(self, population):
        population = copy.deepcopy(population)
        # One-block specimens
        new_population = []
        for net, val in population.items():
            if len(net.blocks) == 1:
                for _ in range(val):
                    new_population.append(copy.deepcopy(net))
        # Multi-blocks specimens
        mb_nets = []
        mb_vals = []
        for net, val in population.items():
            if len(net.blocks) > 1:
                mb_nets.append(net)
                mb_vals.append(val)
        probabilities = [val/sum(mb_vals) for val in mb_vals]
        # If only one multi-blocks kind
        if len(mb_nets) == 1:
            for _ in range(mb_vals[0]):
                new_population.append(copy.deepcopy(mb_nets[0]))
            return new_population
        for i in range(floor((self.pop_size-len(new_population))/2)):
            parents = np.random.choice(mb_nets, 2, replace=True, p=probabilities)
            cut = randint(1, min([len(network.blocks) for network in parents])-1)
            child_1 = Network([copy.deepcopy(parents[0].blocks[0:cut]), copy.deepcopy(parents[1].blocks[cut:])])
            child_2 = Network([copy.deepcopy(parents[1].blocks[0:cut]), copy.deepcopy(parents[0].blocks[cut:])])
            new_population.append(child_1)
            new_population.append(child_2)
        # If odd number left
        if len(new_population) < self.pop_size:
            parents = np.random.choice(mb_nets, 2, replace=True, p=probabilities)
            cut = randint(1, min([len(network.blocks) for network in parents])-1)
            child_1 = Network([copy.deepcopy(parents[0].blocks[0:cut]), copy.deepcopy(parents[1].blocks[cut:])])
            new_population.append(child_1)
        return new_population

    def cycle_bests_info(self):
        print([(round(net.val_score, 5), cycle) for net, cycle in self.val_bests])
        print([(round(net.test_score, 5), cycle) for net, cycle in self.test_bests])

# sys.stdout = open('results.txt', "w")
# sys.stdout.close()



**Running the script**

In [None]:
population_size = 20
iterations = 50

test = ENAS(population_size)
test.evolve(iterations)