In [12]:
import numpy as np
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout
from keras.datasets.mnist import load_data
from keras.optimizers import Adam, Adadelta, Adagrad, Adamax, SGD, RMSprop
from tensorflow.keras import backend as K
import tensorflow as tf

class Classifier:
    
  def __init__(self, train_digits=None, train_labels=None, validation_digits=None, validation_labels=None,test_digits=None,test_labels=None,verbose=1):
    
    self.train_digits       = train_digits
    self.train_labels       = train_labels 
    self.test_digits        = test_digits 
    self.test_labels        = test_labels 
    self.validation_digits  = validation_digits
    self.validation_labels  = validation_labels

    self.verbose    = verbose

    self.numClasses = 10
    
    self.height     = self.train_digits.shape[1]
    self.width      = self.train_digits.shape[2]
    self.channels   = 1

    self.model      = None
   

  def fit(self, test=False, batchSize=64, nbEpochs=10):
    if not test:
      self.model.fit(self.train_digits, 
                    self.train_labels, 
                    batch_size=batchSize, 
                    epochs=nbEpochs,
                    verbose=self.verbose
                  )
    else:
      self.model.fit(self.train_digits, 
                    self.train_labels, 
                    batch_size=batchSize, 
                    epochs=nbEpochs,
                    verbose=self.verbose,
                    validation_data=(self.validation_digits, self.validation_labels)
                  )

  def clear(self):
    K.clear_session()
    # tf.reset_default_graph()
    tf.compat.v1.reset_default_graph()

    
  def evaluate(self, test=False, batchSize=64):
    if not test:
      scores = self.model.evaluate(self.validation_digits, self.validation_labels, batch_size=batchSize, verbose=self.verbose)
    else:
      scores = self.model.evaluate(self.test_digits, self.test_labels, batch_size=batchSize, verbose=self.verbose)
    return dict(zip(self.model.metrics_names, scores))

  # TODO: ESTUDAR OS OTIMIZADORES PARA VER SE SÃO ADEQUADOS
  def checkOptimizer(self, parameters):
    opt = None
    if parameters['optimizer'] == 'Adam':
        opt = Adam(learning_rate=parameters['learningRate'])

    if parameters['optimizer'] == 'Sgd':
        opt = SGD(learning_rate=parameters['learningRate'])
    
    if parameters['optimizer'] == 'RMSprop':
        opt = RMSprop(learning_rate=parameters['learningRate'])
    
    if parameters['optimizer'] == 'Adadelta':
        opt = Adadelta(learning_rate=parameters['learningRate'])
    
    if parameters['optimizer'] == 'Adagrad':
        opt = Adagrad(learning_rate=parameters['learningRate'])
    
    if parameters['optimizer'] == 'Adamax':
        opt = Adamax(learning_rate=parameters['learningRate'])
    
    return opt

  def configureArchitecture(self, parameters):
    self.model = Sequential()
    # CONVOLUTIONAL LAYER 1
    self.model.add(Conv2D(filters=parameters['cnnSize_1'], kernel_size=(3,3), activation='relu', padding='same', input_shape=(self.height, self.width, self.channels)))
    # MAXPOOLING LAYER 1
    self.model.add(MaxPool2D(pool_size=(2,2)))
    # CONVOLUTIONAL LAYER 2
    self.model.add(Conv2D(filters=parameters['cnnSize_2'], kernel_size=(3,3), activation='relu', padding='same'))
    # MAXPOOLING LAYER 2
    self.model.add(MaxPool2D(pool_size=(2,2)))
    # CONVOLUTIONAL LAYER 3
    self.model.add(Conv2D(filters=parameters['cnnSize_3'], kernel_size=(3,3), activation='relu', padding='same'))
    # MAXPOOLING LAYER 3
    self.model.add(MaxPool2D(pool_size=(2,2)))
    self.model.add(Flatten())
    # FULLY CONNECTED LAYERS
    self.model.add(Dense(128, activation='relu'))
    # OUTPUT LAYER
    self.model.add(Dense(self.numClasses, activation='softmax'))

    opt = self.checkOptimizer(parameters)

    self.model.compile( loss='categorical_crossentropy',
                        optimizer=opt,
                        metrics=['accuracy'])




In [13]:
import numpy as np
from random import random, randint, uniform, choice, choices, sample
from tqdm import tqdm
from time import time
import sys


class GeneticAlgorithm:

    def __init__(self,
        parameters,
        fitnessFunction,
        population_size,
        generations,
        elitism         = 0.1,
        crossover_rate  = 0.8,
        crossoverPoint  = None,
        mutation_rate   = 0.25,
        random_selection_rate = 0.01
    ):
        self.elitism = elitism
        self.generations = generations
        self.populationSize = population_size
        self.crossoverRate = crossover_rate
        self.mutationRate  = mutation_rate
        self.randoSelectionRate = random_selection_rate
        
        self.parametersRange = list(parameters.values())
        self.fitnessFunction = fitnessFunction
        
        # CROSSOVER
        self.crossoverPoint = int(len(parameters)/2) if crossoverPoint == None else crossoverPoint

        # AUX
        self.precision = 7

        # self.population = self.createPopulation()
    

    # POPULATIONAL SECTION
    def createIndividual(self):
        '''
        Individual is represented as a possible solution 
        to the problem.

        In this case a solution is an array with values of
        the selected hyperparameters.

        A probability distribution (random, gaussian, uniform) is the best way to generate
        values inside a range of possible values
        '''
        return [round(uniform(*parameter), 3) if type(parameter) == tuple else choice(parameter)
            for parameter in self.parametersRange]
    
    def individualFormat(self, individual):
        return tuple(individual)
    
    def createPopulation(self):
        '''
        Create an initial random population according with the
        parameters of the problem and its valid values
        '''
        print("Creating initial random population...")
        population = []
        while len(population) < self.populationSize:
            ind = self.createIndividual()
            print(ind)
            population.append(ind)
        return population

    #   FITNESS SECTION
    def fitness(self, individual):
        '''
        function fitness = evaluate_individual
        '''
        ind = self.individualFormat(individual)
        return self.fitnessFunction(ind)


    def sortByFitness(self, population):
        scores = [self.fitness(individual) for individual in tqdm(population, desc="Measuring Population Fitness", file=sys.stdout)]
        return [x for _, x in sorted(zip(scores, population), key=lambda p: p[0], reverse=True)]

    def populationFitness(self, population):
        return [self.fitness(individual) for individual in tqdm(population, desc="Measuring Population Fitness", file=sys.stdout)]    
    
    def orderPopulation(self, scores, population):
        self.scores, self.population = [list(t) for t in zip(*sorted(zip(scores, population)))]   


    def grade(self, list_fit=None):
        '''
        Find minimum fitness for a population.
        '''
        if not list_fit:
            list_fit = self.scores
        try:
            return np.nanmin([fit for fit in self.scores])
        except:
            return np.nan
    
    # REPRODUCTION SECTION
    def crossover(self, individual1, individual2):

        child1 = individual1.copy()
        child2 = individual2.copy()

        if np.random.uniform(0,1) < self.crossoverRate:
            child1 = individual1[:self.crossoverPoint] + individual2[self.crossoverPoint:]
            child2 = individual2[:self.crossoverPoint] + individual1[self.crossoverPoint:]

        return child1, child2


    # MUTATION SECTION
    def mutation(self, individual):
        if np.random.uniform(0,1) < self.mutationRate:
            locus = randint(0, len(individual)-1)
            parameter = self.parametersRange[locus]
            individual[locus] = uniform(*parameter) if type(parameter) == tuple else choice(parameter)



    # GENERATIONAL SECTION
    def evolve(self):

        # ELITISMO
        elitismSize = int(self.populationSize*self.elitism)
        # orderedPop = self.sortByFitness(population)
        newGeneration = [ind for ind in tqdm(self.population[:elitismSize], desc="Applying Elitism", file=sys.stdout)]

        while len(newGeneration) < self.populationSize:
            
            # RANDOM SELECTION (DIVERSITY)
            for individual in tqdm(self.population[elitismSize:], desc="Random Selection", file=sys.stdout):
                if np.random.uniform(0,1) < self.randoSelectionRate:
                    newGeneration.append(individual)
        
            # RANDOM MUTATION (DIVERSITY)
            for individual in tqdm(self.population[elitismSize:], desc="Random Mutation", file=sys.stdout):
                self.mutation(individual)
                newGeneration.append(individual)

            # CROSSOVER
            ind1, ind2 = sample(self.population, 2)

            child1, child2 = self.crossover(ind1, ind2)

            if np.random.uniform(0,1) < self.mutationRate:
                randomSelection = choice([child1, child2])
                self.mutation(randomSelection)
                newGeneration.append(randomSelection)            
            newGeneration.append(child1)
            newGeneration.append(child2)

        # EVALUATE POPULATION
        generationScores = self.populationFitness(newGeneration)
        generationbestFitness = self.grade(generationScores) 

        print("Best fitness of this generation:", generationbestFitness)

        self.orderPopulation(generationScores, newGeneration)
        self.bestFitness = generationbestFitness

        

    def populationInfo(self, population):
        pass
        

    def run(self):
        
        counter = 0
        # CREATE INITIAL RANDOM POPULATION
        self.population = self.createPopulation()

        # EVALUATE INITIAL POPULATION
        self.scores = self.populationFitness(self.population)
        self.bestFitness = self.grade() 
        print("Initial best fitness:", self.bestFitness)
        
        # ORGANIZING POPULATION BY FITNESS
        self.orderPopulation(self.scores, self.population)
        
        while counter < self.generations:
            print(f"\n  Running iteration {(counter+1)}/{self.generations}")

            self.evolve()

            counter += 1
        
        return self.bestFitness, self.population
  

In [14]:
import numpy as np
from keras.utils import to_categorical
from keras.datasets.mnist import load_data
import matplotlib.pyplot as plt
from math import *


# AUXILIARY FUNCTIONS
def convertPow2(num):
  '''
    Convert num to the closest power of 2
  '''
  return int(pow(2, ceil(log2(abs(num)))))

def convertRange(num, bounds):
  '''
    Clip number to the bounds
  '''
  num = round(abs(num), num_digits)
  return np.clip(num, *bounds)

def load_dataset():
        (train_digits, train_labels), (test_digits, test_labels) = load_data()
        return (train_digits, train_labels), (test_digits, test_labels)

def load_dataset_with_validation(rate=0.10):
    """
    Load dataset setting apart some validation data
    @args:
        - rate: Percentage of training data to validation
    """

    (train_digits, train_labels), (test_digits, test_labels) = load_dataset()
    
    # RESHAPE DATA
    train_data = reshapeDataset(train_digits)
    test_data  = reshapeDataset(test_digits)

    # RESCALE DATA
    train_data = rescaleDataset(train_data)
    test_data  = rescaleDataset(test_data)

    # ONE-HOT ENCODING
    train_labels_cat = encodingDataset(train_labels)
    test_labels_cat  = encodingDataset(test_labels)

    # SHUFFLE THE TRAINING DATASET
    for _ in range(5):
        indexes = np.random.permutation(len(train_data))
    
    train_data          = train_data[indexes]
    train_labels_cat    =  train_labels_cat[indexes]

    splitPnt = int(rate*len(train_data))

    validation_data         = train_data[:splitPnt,:]
    validation_labels_cat   = train_labels_cat[:splitPnt,:]

    train_data2         = train_data[splitPnt:,:]
    train_labels_cat2   = train_labels_cat[splitPnt:,:]

    return train_data2, train_labels_cat2, test_data, test_labels_cat, validation_data, validation_labels_cat
  
def reshapeDataset(data):
    """
    Reshaping data to CNN standard
    """
    height      = data.shape[1]
    width       = data.shape[2]
    channels    = 1

    return np.reshape(data, (data.shape[0], height, width, channels))

def rescaleDataset(data):
    """
    Rescaling data
    """
    return data.astype('float32')/255

def encodingDataset(dataLabels, numClasses=10):
    """
    ONE-HOT ENCODING
    @args:
        - dataLabels
        - numClasses
    
    @output:
        - List of classes
    """    
    return to_categorical(dataLabels, numClasses)

def showRandomImages(data, labels):
    """
    Exhibit 14 random samples from dataset
    """
    
    np.random.seed(123)

    rand_14 = np.random.randint(0, data.shape[0], 14)
    sample_digits = data[rand_14]
    sample_labels = labels[rand_14]

    num_rows, num_cols = 2,7

    f, ax = plt.subplots(num_rows, num_cols, figsize=(12,5),
                        gridspec_kw={'wspace':0.03, 'hspace':0.01}, 
                        squeeze=True)

    for r in range(num_rows):
        for c in range(num_cols):
            image_index = r * 7 + c
            ax[r,c].axis("off")
            ax[r,c].imshow(sample_digits[image_index], cmap='gray')
            ax[r,c].set_title('No. %d' % sample_labels[image_index])
    plt.show()



In [11]:
import sys
# from utils import *
# from classifier import Classifier
# from evolutionaryAlgorithms import GeneticAlgorithm as GA


from datetime import datetime
from os import makedirs
import traceback
import shutil
import sys
import numpy as np
from getpass import getpass
import smtplib
import logging
from time import time



def run(**kwargs):
    algorithm = kwargs.get('algorithm')
    dataset = kwargs.get('dataset')

    if kwargs.get('algorithm') == 'GA':
        parameters = kwargs.get('parameters')
        popSize = kwargs.get('population_size')
        generations = kwargs.get('generations')

        # evolver = GA(fitness, parameters, popSize, generations, history)
        evolver = GeneticAlgorithm(parameters=parameters, fitnessFunction=fitness, population_size=popSize, generations=generations)
    else:
        pass
        
    best, population= evolver.run()
    print("Best Solution after "+str(generations)+" generations...")
    print(
        "Learning Rate: "+str(population[0][0]) +
        "\n Optimizer: " + str(population[0][1]) +
        "\n cnnSize_1: "+str(population[0][2]) +
        "\n cnnSize_2: " +str(population[0][3]) +
        "\n cnnSize_3: " +str(population[0][4])
    )
    print("Fitness (loss)" +str(best))

    # # create results dir
    # timestamp = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')
    # path = f"{algorithm}/results_NMIST_{timestamp}"
    # makedirs(path)
        
    # with open(f"{path}/{algorithm}_results.txt", "w+") as f:

    #     results = []
    #     loss = []


    #     for i in range(generations):
    #         print(f"\nRunning execution {(i+1)}/{generations}")
    #         # Run Evolver
    #         print('BEST GENE', best['gene'])
    #         # Calculate loss
    #         gen_loss = evolver.fitness(best['gene'], test=True, batch_size=batch_size, epochs=epochs)
    #         print('gen_loss', gen_loss)
    #         loss.append(gen_loss)
    #         # Store results
    #         results.append({
    #             'best': best,
    #             'gen_loss': gen_loss,
    #             'hist': hist, 
    #             'pop': evolver.pop, 
    #             'fit': evolver.fit, 
    #             'history': evolver.history
    #         })
    #     f.write(str(results))
    
    # # Calculate stats
    # mean = np.nanmean(loss)
    # std = np.nanstd(loss)
    
    # # Store stats         
    # with open(f"{path}/report.txt", "w+") as f:
    #   f.write(f"{algorithm} - Mean: {mean} | Std: {std}\n")
      
    # print("Success")
    # print(str(loss))
    # print(str(results))
    



def main():
    pass


if __name__ == '__main__':

    # GLOBAL GA PARAMETERS  
    GENERATIONS             = 2
    POPULATION_SIZE         = 4
    MUTATION_RATE           = 0.4
    CROSSOVER_RATE          = 0.8

    # GLOBAL CNN PARAMETERS
    EPOCHS                  = 2
    BATCH_SIZE              = 256


    # Hiperparametros:
    # - Learning rate
    # - Funcao de otimizacao
    # - Tamanho camada 1
    # - Tamanho camada 2
    # - Tamanho camada 3

    # Fitness:
    # - loss
    # - accuracy

    # Intervalo do tamanho da camada [2, 1024]

    parameters = {
        'learningRate': (0.001, 0.1),
        'optimizer': ['Adam', 'Sgd', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax'],
        'cnnSize_1': [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024],
        'cnnSize_2': [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024],
        'cnnSize_3': [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
    }

    normParam = {
        'cnnSize_1': convertPow2,
        'cnnSize_2': convertPow2,
        'cnnSize_3': convertPow2
    }

    train_data, train_labels_cat, \
    test_data, test_labels_cat, \
    validation_data, validation_labels_cat = load_dataset_with_validation()

    # Instantiate CNN Clssifier with the MNIST dataset
    cnn = Classifier(
        train_digits        =train_data,
        train_labels        =train_labels_cat,
        validation_digits   =validation_data,
        validation_labels   =validation_labels_cat,
        test_digits         =test_data,
        test_labels         =test_labels_cat,
        verbose             =1)

    def fitness(individual, test=False):
        cnn.clear()

        cnn.configureArchitecture(dict(zip(parameters.keys(), individual)) if not isinstance(individual, dict) else individual)

        cnn.fit(batchSize=BATCH_SIZE, nbEpochs=EPOCHS)

        results = cnn.evaluate(test)
        return results['loss']

    run(algorithm='GA', dataset='NMIST',fitness=fitness, parameters=parameters, population_size=POPULATION_SIZE, generations=GENERATIONS)

    









Creating initial random population...
[0.036, 'RMSprop', 4, 8, 1024]
[0.092, 'Adadelta', 2, 2, 1024]
[0.003, 'RMSprop', 16, 16, 16]
[0.046, 'RMSprop', 128, 64, 128]

Measuring Population Fitness:   0%|          | 0/4 [00:00<?, ?it/s][AEpoch 1/2
Epoch 2/2

Measuring Population Fitness:  25%|██▌       | 1/4 [00:06<00:19,  6.55s/it][AEpoch 1/2
Epoch 2/2

Measuring Population Fitness:  50%|█████     | 2/4 [00:12<00:12,  6.50s/it][AEpoch 1/2
Epoch 2/2

Measuring Population Fitness:  75%|███████▌  | 3/4 [00:16<00:05,  5.50s/it][AEpoch 1/2
Epoch 2/2

Measuring Population Fitness: 100%|██████████| 4/4 [00:24<00:00,  6.13s/it]
Initial best fitness: 0.09661066901683807

  Running iteration 1/2

Applying Elitism: 0it [00:00, ?it/s]

Random Selection: 100%|██████████| 4/4 [00:00<00:00, 34169.48it/s]

Random Mutation: 100%|██████████| 4/4 [00:00<00:00, 31359.28it/s]

Measuring Population Fitness:   0%|          | 0/8 [00:00<?, ?it/s][AEpoch 1/2
Epoch 2/2

Measuring Population Fitness:  12%|█▎ 