# Generación automatica de arquitectura de CNN mediante Programación Genética Cartesiana

### Alumno : Gustavo Ayrton Bitocchi
### Director : Diego Alexis Evin
### Universidad Austral Cohorte 2020/21
### Trabajo final de Maestria
------------------------------------------------------------------------------------------------------

#### El siguiente notebook fue ejecutado durante 10 dias utilizando una instancia ml.p3.2xlarge en AWS SageMaker y, previamente, guardando el conjunto de datos en un Bucket S3.

## Bibliotecas

In [None]:
# Instalacion de bibliotecas

pip install cloudpathlib

In [None]:
# Importacion de bibliotecas

import random
import os
import cv2
import csv
import time
import math
import copy
import pickle
import traceback
import sys
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

import boto3
import botocore

import multiprocessing.pool
import multiprocessing as mp

import torch
import torch.nn.parallel
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data

import torchvision
import torchvision.transforms as tt
import torchvision.models as models
import torchvision.datasets as dset
import torchvision.utils as vutils

from torch.nn import init
from torch.autograd import Variable
from torch.utils.data import Dataset, random_split, DataLoader

from torchvision.datasets import ImageFolder
from torchvision.utils import make_grid

from sklearn.metrics import confusion_matrix, roc_auc_score
from matplotlib.image import imread
from collections import OrderedDict
from cloudpathlib import CloudPath 

## Descarga del conjunto de datos desde AWS S3

In [None]:
# Descargamos conjunto de entrenamiento de bucket S3

cp = CloudPath("s3://xray-tesis-austral-bucket/Conjunto de datos/Entrenamiento/")
cp.download_to("Entrenamiento")

## Definicion de clases que contienen implementacion de CNN generadas por CGP

In [None]:
# Definicion de clase de entrenamiento de CNN autogenerada

class CNN_train():
    def __init__(self, neural_network_generated, epoch_number, batch_size, learning_rate, train_directory, number_workers, train_size_percentage):
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.neural_network_generated = neural_network_generated
        self.gpu_id = 0
        self.epoch_number = epoch_number
        self.train_directory = train_directory
        self.number_workers = number_workers
        self.train_size_percentage = train_size_percentage

    def __call__(self):
        # Obtenemos dataloaders de entrenamiento y validacion
        train_dataloader, test_dataloader = get_traininig_validation_dataloaders(self.batch_size, self.train_directory, self.number_workers, self.train_size_percentage)
        print('Cantidad de ejemplos de entrenamiento:', len(train_dataloader))
        print('Arquitectura a entrenar:', self.neural_network_generated)
        print('ID de la GPU utilizada:', self.gpu_id)
        print('Cantidad de Epocas:', self.epoch_number)
        print('Tamaño de Lote:', self.batch_size)
        print('Tasa de Aprendizaje:', self.learning_rate)

        torch.backends.cudnn.benchmark = True # Activamos modo Benchmark ya que el tamaño de entrada no varia, por lo cual mejoraremos el rendimiento en ejecucion
        torch.cuda.empty_cache() # Liberamos memoria cache
        
        model = CGP_TO_CNN(self.neural_network_generated) # Convertimos representacion CGP a CNN
        model.apply(self.weights_init_kaiming) # Aplicamos pesos iniciales al modelo por el metodo de He
        model.cuda(self.gpu_id) # Movemos modelo a GPU asignada

        # Definimos criterio que nos permitira evaluar el rendimiento del modelo (Aplicando pesos por las clases desbalanceadas)
        criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor([3901/(1349+3901), 1349/(1349+3901)]))
        criterion.cuda(self.gpu_id) # Movemos criterio a GPU asignada

        # Creamos optimizador Adam con β_1=0.9, β_2=0.999, ε=1.0 x 10^(-8)
        optimizer = optim.Adam(model.parameters(), lr = self.learning_rate)

        input = torch.FloatTensor(self.batch_size, 3, 224, 224) # Input de entrada definiendo tamaño de la imagen 224x224x3
        input = input.cuda(self.gpu_id)

        label = torch.LongTensor(self.batch_size)
        label = label.cuda(self.gpu_id)

        # Iteramos epoca por epoca
        for epoch in range(1, self.epoch_number+1):
            print('Epoca', epoch)
            start_time = time.time()
            training_loss = 0
            labels = []
            predictions = []

            for module in model.children():
                module.train(True)

            for _, (data, target) in enumerate(train_dataloader):
                data = data.cuda(self.gpu_id)
                target = target.cuda(self.gpu_id)

                input.resize_as_(data).copy_(data)
                input_ = Variable(input)

                label.resize_as_(target).copy_(target)
                label_ = Variable(label)

                optimizer.zero_grad()

                try:
                    output = model(input_, None)
                except:
                    traceback.print_exc()
                    return 0.

                criterion_loss = criterion(output, label_)
                training_loss += criterion_loss.data
                criterion_loss.backward()

                optimizer.step()

                label_ = label_.cpu()
                labels.extend(label_.data.tolist())

                _, predicted = torch.max(output.data, 1)
                predicted = predicted.cpu()
                predictions.extend(predicted.tolist())

            print('Conjunto de Entrenamiento : Perdida Promedio : {:.4f}'.format(training_loss))
            print('Conjunto de Entrenamiento : AUC-ROC Promedio : {:.4f}'.format(roc_auc_score(labels, predictions)))
            print('Tiempo de Entrenamiento: ', time.time()-start_time)

            if epoch == self.epoch_num:
              for module in model.children():
                module.train(False)  
              validation_loss = self.validate_model(model, criterion, input, label, test_dataloader)

        torch.save(model.state_dict(), './modelo_%d.pth' % int(self.gpu_id))

        return validation_loss

    def get_traininig_validation_dataloaders(self, batch_size, train_directory, number_workers, train_size_percentage):
      random_seed = 2020
      shuffle = True
      pin_memory = True

      # Definimos semilla para repetir resultados
      torch.manual_seed(random_seed);

      # Aplicamos diversas transformaciones al conjunto de entrenamiento para evitar el sobreajuste
      training_dataset = ImageFolder(train_directory, 
                                     transform=tt.Compose([tt.Resize(255),
                                                           tt.CenterCrop(224),
                                                           tt.RandomHorizontalFlip(),
                                                           tt.RandomRotation(10),
                                                           tt.RandomGrayscale(),
                                                           tt.RandomAffine(translate=(0.05,0.05), degrees=0),
                                                           tt.ToTensor()]))

      # Realizamos separacion entre conjunto de entrenamiento y validacion
      train_size = round(len(training_dataset)*train_size_percentage)
      val_size = len(training_dataset) - train_size

      training_set, validation_set = random_split(training_dataset, [train_size, val_size])

      training_dataloader = DataLoader(training_set, batch_size, shuffle = shuffle, num_workers = number_workers, pin_memory = pin_memory)
      validation_dataloader = DataLoader(validation_set, batch_size, shuffle = shuffle, num_workers = number_workers, pin_memory = pin_memory)

      return (training_dataloader, validation_dataloader)

    def weights_init_kaiming(self, model):
      classname = model.__class__.__name__
      if classname.find('Conv2d') != -1:
        init.kaiming_normal_(model.weight.data, a=0, mode='fan_in')
      elif classname.find('Linear') != -1:
        init.kaiming_normal_(model.weight.data, a=0, mode='fan_in')
      elif classname.find('BatchNorm2d') != -1:
        init.uniform_(model.weight.data, 0.02, 1.0)
        init.constant_(model.bias.data, 0.0)

    def validate_model(self, model, criterion, input, label, test_dataloader):
        validation_loss = 0
        labels = []
        predictions = []

        for _, (data, target) in enumerate(test_dataloader):
            data = data.cuda(self.gpu_id)
            target = target.cuda(self.gpu_id)

            input.resize_as_(data).copy_(data)
            input_ = Variable(input)

            label.resize_as_(target).copy_(target)
            label_ = Variable(label)

            try:
                with torch.no_grad():
                  output = model(input_, None)
            except:
                traceback.print_exc()
                return 0.

            criterion_loss = criterion(output, label_)
            validation_loss += criterion_loss.data

            label_ = label_.cpu()
            labels.extend(label_.data.tolist())

            _, predicted = torch.max(output.data, 1)
            predicted = predicted.cpu()
            predictions.extend(predicted.tolist())

        validation_roc_auc_score = roc_auc_score(labels, predictions)
        print('Conjunto de Validacion : Perdida Promedio : {:.4f}'.format(validation_loss))
        print('Conjunto de Validacion : AUC-ROC Promedio : {:.4f}'.format(validation_roc_auc_score))

        return validation_roc_auc_score

In [None]:
# Clase que representa bloque Convolucional     

class ConvBlock(nn.Module):
    def __init__(self, in_size, out_size, kernel, stride):
        super(ConvBlock, self).__init__()
        pad_size = kernel // 2
        self.conv1 = nn.Sequential(nn.Conv2d(in_size, out_size, kernel, stride=stride, padding=pad_size, bias=False),
                                       nn.BatchNorm2d(out_size),
                                       nn.ReLU(inplace=True),)

    def forward(self, inputs):
        outputs = self.conv1(inputs)
        return outputs

# Clase que representa bloque Residual     

class ResBlock(nn.Module):
    def __init__(self, in_size, out_size, kernel, stride):
        super(ResBlock, self).__init__()
        pad_size = kernel // 2
        self.conv1 = nn.Sequential(nn.Conv2d(in_size, out_size, kernel, stride=stride, padding=pad_size, bias=False),
                                       nn.BatchNorm2d(out_size),
                                       nn.ReLU(inplace=True),
                                       nn.Conv2d(out_size, out_size, kernel, stride=stride, padding=pad_size, bias=False),
                                       nn.BatchNorm2d(out_size))
        self.relu = nn.ReLU(inplace=True)

    def forward(self, inputs1, inputs2):
        x = self.conv1(inputs1)
        in_data = [x, inputs2]
        small_ch_id, large_ch_id = (0, 1) if in_data[0].size(1) < in_data[1].size(1) else (1, 0)
        offset = int(in_data[large_ch_id].size()[1] - in_data[small_ch_id].size()[1])
        if offset != 0:
            tmp = in_data[large_ch_id].data[:, :offset, :, :]
            tmp = Variable(tmp).clone()
            in_data[small_ch_id] = torch.cat([in_data[small_ch_id], tmp * 0], 1)
        out = torch.add(in_data[0], in_data[1])
        return self.relu(out)

# Clase que representa bloque Sum             

class Sum(nn.Module):
    def __init__(self):
        super(Sum, self).__init__()

    def forward(self, inputs1, inputs2):
        in_data = [inputs1, inputs2]
        if (in_data[0].size(2) - in_data[1].size(2)) != 0:
            small_in_id, large_in_id = (0, 1) if in_data[0].size(2) < in_data[1].size(2) else (1, 0)
            pool_num = math.floor(in_data[large_in_id].size(2) / in_data[small_in_id].size(2))
            for _ in range(pool_num-1):
                in_data[large_in_id] = F.max_pool2d(in_data[large_in_id], 2, 2, 0)
        small_ch_id, large_ch_id = (0, 1) if in_data[0].size(1) < in_data[1].size(1) else (1, 0)
        offset = int(in_data[large_ch_id].size()[1] - in_data[small_ch_id].size()[1])
        if offset != 0:
            tmp = in_data[large_ch_id].data[:, :offset, :, :]
            tmp = Variable(tmp).clone()
            in_data[small_ch_id] = torch.cat([in_data[small_ch_id], tmp * 0], 1)
        out = torch.add(in_data[0], in_data[1])
        return out

# Clase que representa bloque Concat        

class Concat(nn.Module):
    def __init__(self):
        super(Concat, self).__init__()

    def forward(self, inputs1, inputs2):
        in_data = [inputs1, inputs2]
        if (in_data[0].size(2) - in_data[1].size(2)) != 0:
            small_in_id, large_in_id = (0, 1) if in_data[0].size(2) < in_data[1].size(2) else (1, 0)
            pool_num = math.floor(in_data[large_in_id].size(2) / in_data[small_in_id].size(2))
            for _ in range(pool_num-1):
                in_data[large_in_id] = F.max_pool2d(in_data[large_in_id], 2, 2, 0)
        return torch.cat([in_data[0], in_data[1]], 1)

# Clase que transforma representacion CGP a CNN

class CGP_TO_CNN(nn.Module):
    def __init__(self, cgp):
        super(CGP_TO_CNN, self).__init__()
        self.cgp = cgp
        self.pool_size = 2
        self.arch = OrderedDict()
        self.encode = []
        self.channel_num = [None for _ in range(500)]
        self.size = [None for _ in range(500)]
        self.channel_num[0] = 3 # Cantidad de canales
        self.size[0] = 224 # Tamaño de la imagen
        i = 0
        for name, in1, in2 in self.cgp:
            if name == 'input' in name:
                i += 1
                continue
            elif name == 'full':
                self.encode.append(nn.Linear(self.channel_num[in1]*self.size[in1]*self.size[in1], 2))
            elif name == 'Max_Pool' or name == 'Avg_Pool':
                self.channel_num[i] = self.channel_num[in1]
                self.size[i] = int(self.size[in1] / 2)
                key = name.split('_')
                func = key[0]
                if func == 'Max':
                    self.encode.append(nn.MaxPool2d(2,2))
                else:
                    self.encode.append(nn.AvgPool2d(2,2))
            elif name == 'Concat':
                self.channel_num[i] = self.channel_num[in1] + self.channel_num[in2]
                small_in_id, large_in_id = (in1, in2) if self.size[in1] < self.size[in2] else (in2, in1)
                self.size[i] = self.size[small_in_id]
                self.encode.append(Concat())
            elif name == 'Sum':
                small_in_id, large_in_id = (in1, in2) if self.channel_num[in1] < self.channel_num[in2] else (in2, in1)
                self.channel_num[i] = self.channel_num[large_in_id]
                small_in_id, large_in_id = (in1, in2) if self.size[in1] < self.size[in2] else (in2, in1)
                self.size[i] = self.size[small_in_id]
                self.encode.append(Sum())
            else:
                key = name.split('_')
                down =     key[0]
                func =     key[1]
                out_size = int(key[2])
                kernel   = int(key[3])
                if down == 'S':
                    if func == 'ConvBlock':
                        self.channel_num[i] = out_size
                        self.size[i] = self.size[in1]
                        self.encode.append(ConvBlock(self.channel_num[in1], out_size, kernel, stride=1))
                    else:
                        in_data = [out_size, self.channel_num[in1]]
                        small_in_id, large_in_id = (0, 1) if in_data[0] < in_data[1] else (1, 0)
                        self.channel_num[i] = in_data[large_in_id]
                        self.size[i] = self.size[in1]
                        self.encode.append(ResBlock(self.channel_num[in1], out_size, kernel, stride=1))
                else:
                    sys.exit('Error')
            i += 1

        self.layer_module = nn.ModuleList(self.encode)
        self.outputs = [None for _ in range(len(self.cgp))]

    def main(self,x):
        outputs = self.outputs
        outputs[0] = x
        nodeID = 1
        for layer in self.layer_module:
            if isinstance(layer, ConvBlock):
                outputs[nodeID] = layer(outputs[self.cgp[nodeID][1]])
            elif isinstance(layer, ResBlock):
                outputs[nodeID] = layer(outputs[self.cgp[nodeID][1]], outputs[self.cgp[nodeID][1]])
            elif isinstance(layer, torch.nn.modules.linear.Linear):
                tmp = outputs[self.cgp[nodeID][1]].view(outputs[self.cgp[nodeID][1]].size(0), -1)
                outputs[nodeID] = layer(tmp)
            elif isinstance(layer, torch.nn.modules.pooling.MaxPool2d) or isinstance(layer, torch.nn.modules.pooling.AvgPool2d):
                if outputs[self.cgp[nodeID][1]].size(2) > 1:
                    outputs[nodeID] = layer(outputs[self.cgp[nodeID][1]])
                else:
                    outputs[nodeID] = outputs[self.cgp[nodeID][1]]
            elif isinstance(layer, Concat) or isinstance(layer, Sum):
                outputs[nodeID] = layer(outputs[self.cgp[nodeID][1]], outputs[self.cgp[nodeID][2]])
            else:
                sys.exit("Error")
            nodeID += 1
        return outputs[nodeID-1]

    def forward(self, x, t):
        return self.main(x)

In [None]:
# Creamos clase que define la estructura de la CNN desarrollada mediante CGP

class CGP_Structure_Info(object):
    def __init__(self, rows, columns, level_back, min_active_num, max_active_num):
        self.input_num = 1

        # Tipos de bloques
        self.func_type = ['S_ConvBlock_32_1',    'S_ConvBlock_32_3',   'S_ConvBlock_32_5',
                          'S_ConvBlock_128_1',    'S_ConvBlock_128_3',   'S_ConvBlock_128_5',
                          'S_ConvBlock_64_1',     'S_ConvBlock_64_3',    'S_ConvBlock_64_5',
                          'S_ResBlock_32_1',     'S_ResBlock_32_3',    'S_ResBlock_32_5',
                          'S_ResBlock_128_1',     'S_ResBlock_128_3',    'S_ResBlock_128_5',
                          'S_ResBlock_64_1',      'S_ResBlock_64_3',     'S_ResBlock_64_5',
                          'Concat', 'Sum',
                          'Max_Pool', 'Avg_Pool']
                          
        self.func_in_num = [1, 1, 1,
                            1, 1, 1,
                            1, 1, 1,
                            1, 1, 1,
                            1, 1, 1,
                            1, 1, 1,
                            2, 2,
                            1, 1]

        self.out_num = 1
        self.out_type = ['full']
        self.out_in_num = [1]

        self.rows = rows # Filas
        self.columns = columns # Columnas
        self.node_num = rows * columns # Cantidad de nodos
        self.level_back = level_back # Niveles hacia atras
        self.min_active_num = min_active_num # Minimo nodos activos
        self.max_active_num = max_active_num # Maximo nodos activos

        self.func_type_num = len(self.func_type)
        self.out_type_num = len(self.out_type)
        self.max_in_num = np.max([np.max(self.func_in_num), np.max(self.out_in_num)])

In [None]:
# Creamos Daemon Processes para paralelizar la generacion de la arquitectura (si se cuenta con GPUs adicionales)

class NoDaemonProcess(mp.Process):
    def _get_daemon(self):
        return False
    def _set_daemon(self, value):
        pass
    daemon = property(_get_daemon, _set_daemon)

class NoDaemonProcessPool(multiprocessing.pool.Pool):
    Process = NoDaemonProcess

class CNN_Generated_Evaluation(object):
    def __init__(self, gpu_number, epoch_number, batch_size, learning_rate, number_workers, train_size_percentage, train_directory):
        self.gpu_number = gpu_number
        self.epoch_number = epoch_number
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.number_workers = number_workers
        self.train_size_percentage = train_size_percentage
        self.train_directory = train_directory

    def __call__(self, neural_network_lists):
        neural_network_lists_length = len(neural_network_lists)
        evaluations = np.zeros(neural_network_lists_length)

        for i in np.arange(0, neural_network_lists_length, self.gpu_number):
            process_number = np.min((i + self.gpu_number, neural_network_lists_length)) - i # Creamos x cantidad de procesos para paralelizar la ejecucion (x es el minimo entre la cantidad de GPUs y la cantidad de CNNs generadas)
            pool = NoDaemonProcessPool(process_number)
            arg_data = [(self.cnn_generated_eval, neural_network_lists[i+j], j) for j in range(process_number)]
            evaluations[i:i+process_number] = pool.map(self.arg_wrapper_mp, arg_data)
            pool.terminate()

        return evaluations

    def arg_wrapper_mp(self, args):
      return args[0](*args[1:])
      
    def cnn_generated_eval(self, neural_network_generated, gpu_id):
      train = CNN_train(neural_network_generated, gpu_id, self.epoch_number, self.batch_size, self.learning_rate, self.train_directory, self.number_workers, self.train_size_percentage)
      evaluation_with_neural_network_generated = train()

      return evaluation_with_neural_network_generated

In [None]:
# Definimos clase y metodos de individuo

class Individual(object):
    def __init__(self, net_info):
        self.net_info = net_info
        self.gene = np.zeros((self.net_info.node_num + self.net_info.out_num, self.net_info.max_in_num + 1)).astype(int)
        self.is_active = np.empty(self.net_info.node_num + self.net_info.out_num).astype(bool)
        self.is_pool = np.empty(self.net_info.node_num + self.net_info.out_num).astype(bool)
        self.init_gene_with_conv()

    def init_gene_with_conv(self):
        arch = ['S_ConvBlock_64_3']
       
        input_layer_num = int(self.net_info.input_num / self.net_info.rows) + 1
        output_layer_num = int(self.net_info.out_num / self.net_info.rows) + 1
        layer_ids = [((self.net_info.columns - 1 - input_layer_num - output_layer_num) + i) // (len(arch)) for i in range(len(arch))]
        prev_id = 0
        current_layer = input_layer_num
        block_ids = []
        
        for i, idx in enumerate(layer_ids):
            
            current_layer += idx
            n = current_layer * self.net_info.rows + np.random.randint(self.net_info.rows)
            block_ids.append(n)
            self.gene[n][0] = self.net_info.func_type.index(arch[i])
            col = np.min((int(n / self.net_info.rows), self.net_info.columns))
            max_connect_id = col * self.net_info.rows + self.net_info.input_num
            min_connect_id = (col - self.net_info.level_back) * self.net_info.rows + self.net_info.input_num \
                if col - self.net_info.level_back >= 0 else 0
            
            self.gene[n][1] = prev_id
            for j in range(1, self.net_info.max_in_num):
                self.gene[n][j + 1] = min_connect_id + np.random.randint(max_connect_id - min_connect_id)
            
            prev_id = n + self.net_info.input_num
            
        n = self.net_info.node_num
        type_num = self.net_info.func_type_num if n < self.net_info.node_num else self.net_info.out_type_num
        self.gene[n][0] = np.random.randint(type_num)
        col = np.min((int(n / self.net_info.rows), self.net_info.columns))
        max_connect_id = col * self.net_info.rows + self.net_info.input_num
        min_connect_id = (col - self.net_info.level_back) * self.net_info.rows + self.net_info.input_num \
            if col - self.net_info.level_back >= 0 else 0
        
        self.gene[n][1] = prev_id
        for i in range(1, self.net_info.max_in_num):
            self.gene[n][i + 1] = min_connect_id + np.random.randint(max_connect_id - min_connect_id)        
        block_ids.append(n) 
           
        for n in range(self.net_info.node_num + self.net_info.out_num):
            
            if n in block_ids:
                continue
            
            type_num = self.net_info.func_type_num if n < self.net_info.node_num else self.net_info.out_type_num
            self.gene[n][0] = np.random.randint(type_num)
            col = np.min((int(n / self.net_info.rows), self.net_info.columns))
            max_connect_id = col * self.net_info.rows + self.net_info.input_num
            min_connect_id = (col - self.net_info.level_back) * self.net_info.rows + self.net_info.input_num \
                if col - self.net_info.level_back >= 0 else 0
            for i in range(self.net_info.max_in_num):
                self.gene[n][i + 1] = min_connect_id + np.random.randint(max_connect_id - min_connect_id)

        self.check_active()

    def __check_course_to_out(self, n):
        if not self.is_active[n]:
            self.is_active[n] = True
            t = self.gene[n][0]
            if n >= self.net_info.node_num:
                in_num = self.net_info.out_in_num[t]
            else:
                in_num = self.net_info.func_in_num[t]

            for i in range(in_num):
                if self.gene[n][i+1] >= self.net_info.input_num:
                    self.__check_course_to_out(self.gene[n][i+1] - self.net_info.input_num)

    def check_active(self):
        self.is_active[:] = False
        for n in range(self.net_info.out_num):
            self.__check_course_to_out(self.net_info.node_num + n)
    
    def check_pool(self):
        is_pool = True
        pool_num = 0
        for n in range(self.net_info.node_num + self.net_info.out_num):
            if self.is_active[n]:
                if self.gene[n][0] > 19:
                    is_pool = False
                    pool_num += 1
        return is_pool, pool_num

    def __mutate(self, current, min_int, max_int):
        mutated_gene = current
        while current == mutated_gene:
            mutated_gene = min_int + np.random.randint(max_int - min_int)
        return mutated_gene

    def mutation(self, mutation_rate):
        active_check = False

        for n in range(self.net_info.node_num + self.net_info.out_num):
            t = self.gene[n][0]
            type_num = self.net_info.func_type_num if n < self.net_info.node_num else self.net_info.out_type_num
            if np.random.rand() < mutation_rate and type_num > 1:
                self.gene[n][0] = self.__mutate(self.gene[n][0], 0, type_num)
                if self.is_active[n]:
                    active_check = True
            col = np.min((int(n / self.net_info.rows), self.net_info.columns))
            max_connect_id = col * self.net_info.rows + self.net_info.input_num
            min_connect_id = (col - self.net_info.level_back) * self.net_info.rows + self.net_info.input_num \
                if col - self.net_info.level_back >= 0 else 0
            in_num = self.net_info.func_in_num[t] if n < self.net_info.node_num else self.net_info.out_in_num[t]
            for i in range(self.net_info.max_in_num):
                if np.random.rand() < mutation_rate and max_connect_id - min_connect_id > 1:
                    self.gene[n][i+1] = self.__mutate(self.gene[n][i+1], min_connect_id, max_connect_id)
                    if self.is_active[n] and i < in_num:
                        active_check = True

        self.check_active()
        return active_check

    def neutral_mutation(self, mutation_rate):
        for n in range(self.net_info.node_num + self.net_info.out_num):
            t = self.gene[n][0]
            type_num = self.net_info.func_type_num if n < self.net_info.node_num else self.net_info.out_type_num
            if not self.is_active[n] and np.random.rand() < mutation_rate and type_num > 1:
                self.gene[n][0] = self.__mutate(self.gene[n][0], 0, type_num)
            col = np.min((int(n / self.net_info.rows), self.net_info.columns))
            max_connect_id = col * self.net_info.rows + self.net_info.input_num
            min_connect_id = (col - self.net_info.level_back) * self.net_info.rows + self.net_info.input_num \
                if col - self.net_info.level_back >= 0 else 0
            in_num = self.net_info.func_in_num[t] if n < self.net_info.node_num else self.net_info.out_in_num[t]
            for i in range(self.net_info.max_in_num):
                if (not self.is_active[n] or i >= in_num) and np.random.rand() < mutation_rate \
                        and max_connect_id - min_connect_id > 1:
                    self.gene[n][i+1] = self.__mutate(self.gene[n][i+1], min_connect_id, max_connect_id)

        self.check_active()
        return False

    def count_active_node(self):
        return self.is_active.sum()

    def copy(self, source):
        self.net_info = source.net_info
        self.gene = source.gene.copy()
        self.is_active = source.is_active.copy()
        self.eval = source.eval

    def active_net_list(self):
        net_list = [["input", 0, 0]]
        active_cnt = np.arange(self.net_info.input_num + self.net_info.node_num + self.net_info.out_num)
        active_cnt[self.net_info.input_num:] = np.cumsum(self.is_active)

        for n, is_a in enumerate(self.is_active):
            if is_a:
                t = self.gene[n][0]
                if n < self.net_info.node_num:
                    type_str = self.net_info.func_type[t]
                else: 
                    type_str = self.net_info.out_type[t]

                connections = [active_cnt[self.gene[n][i+1]] for i in range(self.net_info.max_in_num)]
                net_list.append([type_str] + connections)
        return net_list

In [None]:
# Definimos clase y metodos que contiene las funciones de CGP

class CGP(object):
    def __init__(self, net_info, evaluation_function, offsprings, max_evaluations, mutation_rate):
        self.offsprings = offsprings # Cantidad de descendientes
        self.pop = [Individual(net_info) for _ in range(1 + self.offsprings)]
        self.evaluation_function = evaluation_function
        self.max_pool_num = 5 # log2(Tamaño de la imagen = 224) - 2
        self.num_gen = 0
        self.num_eval = 0
        self.modified_evolution(max_evaluations, mutation_rate) # Ejecutamos evolucion modificada

    def evaluation(self, pop, eval_flag):
        net_lists = []
        active_index = np.where(eval_flag)[0]
        for i in active_index:
            net_lists.append(pop[i].active_net_list())

        fp = self.evaluation_function(net_lists)
        for i, j in enumerate(active_index):
            pop[j].eval = fp[i]
        evaluations = np.zeros(len(pop))
        for i in range(len(pop)):
            evaluations[i] = pop[i].eval

        self.num_eval += len(net_lists)
        return evaluations

    def log_data(self, net_info_type, start_time):
        log_list = [self.num_gen, self.num_eval, time.time()-start_time, self.pop[0].eval, self.pop[0].count_active_node()]
        if net_info_type == 'active_only':
            log_list.append(self.pop[0].active_net_list())
        elif net_info_type == 'full':
            log_list += self.pop[0].gene.flatten().tolist()
        else:
            pass
        return log_list

    def log_data_children(self, net_info_type, start_time, pop):
        log_list = [self.num_gen, self.num_eval, time.time()-start_time, pop.eval, pop.count_active_node()]
        if net_info_type == 'active_only':
            log_list.append(pop.active_net_list())
        elif net_info_type == 'full':
            log_list += pop.gene.flatten().tolist()
        else:
            pass
        return log_list

    def modified_evolution(self, max_eval, mutation_rate):
        with open('child.txt', 'w') as fw_c :
            writer_c = csv.writer(fw_c, lineterminator='\n')
            start_time = time.time()
            eval_flag = np.empty(self.offsprings)
            active_num = self.pop[0].count_active_node()
            _, pool_num= self.pop[0].check_pool()
            self.evaluation([self.pop[0]], np.array([True]))
            print(self.log_data(net_info_type='active_only', start_time=start_time))

            while self.num_gen < max_eval:
                self.num_gen += 1
                for i in range(self.offsprings):
                    eval_flag[i] = False
                    self.pop[i + 1].copy(self.pop[0])
                    active_num = self.pop[i + 1].count_active_node()
                    _, pool_num= self.pop[i + 1].check_pool()
                    while not eval_flag[i] or active_num < self.pop[i + 1].net_info.min_active_num or pool_num > self.max_pool_num:
                        self.pop[i + 1].copy(self.pop[0])
                        eval_flag[i] = self.pop[i + 1].mutation(mutation_rate)
                        active_num = self.pop[i + 1].count_active_node()
                        _, pool_num= self.pop[i + 1].check_pool()

                evaluations = self.evaluation(self.pop[1:], eval_flag=eval_flag)
                best_arg = evaluations.argmax()
                f = open('arch_child.txt', 'a')
                writer_f = csv.writer(f, lineterminator='\n')
                for c in range(1 + self.offsprings):
                    writer_c.writerow(self.log_data_children(net_info_type='full', start_time=start_time, pop=self.pop[c]))
                    writer_f.writerow(self.log_data_children(net_info_type='active_only', start_time=start_time, pop=self.pop[c]))
                f.close()
                if evaluations[best_arg] > self.pop[0].eval:
                    self.pop[0].copy(self.pop[best_arg + 1])
                else:
                    self.pop[0].neutral_mutation(mutation_rate) 

                print(self.log_data(net_info_type='active_only', start_time=start_time))
                fw = open('./log_cgp.txt', 'a')
                writer = csv.writer(fw, lineterminator='\n')
                writer.writerow(self.log_data(net_info_type='full', start_time=start_time))
                fa = open('arch.txt', 'a')
                writer_a = csv.writer(fa, lineterminator='\n')
                writer_a.writerow(self.log_data(net_info_type='active_only', start_time=start_time))
                fw.close()
                fa.close()

## Ejecucion del proceso CGP para la generacion automatica de una arquitectura CNN

In [None]:
# Cargamos informacion respecto a la estructura CGP que tendra en cuenta a la hora de construir la CNN
network_cgp_info = CGP_Structure_Info(rows = 5, 
                                      columns = 30, 
                                      level_back = 10, 
                                      min_active_num = 1, 
                                      max_active_num = 30)

# Guardamos informacion respecto a la estructura CGP
with open('network_cgp_info.pickle', mode='wb') as f:
  pickle.dump(network_cgp_info, f)

# Generamos funcion para el entrenamiento/evaluacion de la CNN autogenerada
evaluation_function = CNN_Generated_Evaluation(gpu_number = 1, 
                                               epoch_number = 20, 
                                               batch_size = 16, 
                                               learning_rate = 0.01, 
                                               number_workers = 4, 
                                               train_size_percentage = 0.7, 
                                               train_directory = 'Entrenamiento/')

# Se comienza con el proceso CGP llevando a cabo evolucion modificada
CGP(net_info = network_cgp_info, 
    evaluation_function = evaluation_function, 
    offsprings = 1,
    max_evaluations = 1000,
    mutation_rate = 0.1)