In [1]:
## Algoritmo genético para encontrar melhores parametros

## Carregamento das bases

In [2]:
from info_dataset import *
import random
import numpy as np
#from EchoStateNetwork import EchoStateNetwork
from esn import ESN
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from scipy import signal
import pandas as pd

In [3]:
pd.set_option('display.max_columns', 500)

In [4]:
# Desativa a notação científica
np.set_printoptions(suppress=True)

## Obter dados

In [5]:
# Obter dados ATIVOS. Pacientes que tiveram coma e tiveram desfecho de vida.
filesname = get_name_files_from(PATH_DATA['ativos'])
dataset_ativos = get_dataset_from_path_filename(PATH_DATA['ativos'], filesname)
len(dataset_ativos)

42

In [6]:
# Obter dados OME. Pacientes que tiveram coma e tiveram desfecho de Morte incefálica.
filesname = get_name_files_from(PATH_DATA['ome'])
dataset_ome = get_dataset_from_path_filename(PATH_DATA['ome'], filesname)
len(dataset_ome)

15

In [7]:
## Percentual de desfecho de morte celebral:
round((len(dataset_ome) / (len(dataset_ome) + len(dataset_ativos)))*100, 3)

26.316

In [8]:
[fs['fs'][0][0] for fs in dataset_ativos]

[400,
 400,
 400,
 400,
 400,
 400,
 400,
 600,
 600,
 600,
 600,
 600,
 600,
 600,
 600,
 600,
 400,
 600,
 600,
 600,
 600,
 200,
 100,
 100,
 100,
 100,
 100,
 100,
 200,
 200,
 200,
 200,
 200,
 200,
 200,
 200,
 200,
 200,
 200,
 200,
 200,
 200]

## Padronizar dimensionalidade.

**Inicialmente padronizar as dimensionalidades para a menor**

In [9]:
def padroniza_hz(dataset):
    min_hz = min([fs['fs'][0][0] for fs in dataset])
    incremento = 1/min_hz
    
    for enum, data in enumerate(dataset):
        
        
        # Reamostrar para 100 Hz
        resampled_signal_xn = signal.decimate(data['XN'], data['fs'][0][0]//min_hz)
        resampled_signal_time = np.arange(0, data['XN'][0].shape/min_hz, incremento)
        
        dataset[enum]['fs']=min_hz
        dataset[enum]['XN'] = resampled_signal_xn
        dataset[enum]['t'] = resampled_signal_time
    return dataset

In [10]:
dataset_ome = padroniza_hz(dataset_ome)
[fs['fs'] for fs in dataset_ome]

[100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]

In [11]:
dataset_ativos = padroniza_hz(dataset_ativos)
[fs['fs'] for fs in dataset_ativos]

[100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100,
 100]

## Seleciona os dados escolhidos pelo especialista

In [12]:
def get_signals_for_interval(dataset, label, seconds=2):
    
    dataset_signals = []
    for enum, data in enumerate(dataset):
        print('- Processing data: {}'.format(enum))
        interval_lists = [(interval.min(), interval.max()) for interval in data['epochsTime'][0]]
        hz = data['fs']
        for i_min, i_max in interval_lists:
            print('\t- Process --> imin: {} - imax: {}'.format(i_min, i_max))
            error_adjustment = 1 / hz
            indexes = np.where(((data['t'] >= i_min) & (data['t'] < i_max+error_adjustment) ))[0]
            if len(indexes) > seconds * hz:
                indexes = indexes[:int(seconds * hz)]

            
            print('\t\t- Values: {}'.format(data['XN'][:, indexes].shape))
            dataset_signals.append(data['XN'][:, indexes])
        print('Now dataset is: {}'.format(np.asarray(dataset_signals).shape))
        
    return np.asarray(dataset_signals), [label] * len(dataset_signals)

In [13]:
## Junta os sinais considerando a quantidade de hz e quantidade de segundos
signals_alive, labels_alive = get_signals_for_interval(dataset_ativos, 0, seconds=1)
signals_alive.shape

- Processing data: 0
	- Process --> imin: 20.0 - imax: 22.0
		- Values: (20, 100)
	- Process --> imin: 22.0 - imax: 24.0
		- Values: (20, 100)
	- Process --> imin: 24.0 - imax: 26.0
		- Values: (20, 100)
	- Process --> imin: 27.0 - imax: 29.0
		- Values: (20, 100)
	- Process --> imin: 30.0 - imax: 32.0
		- Values: (20, 100)
	- Process --> imin: 32.0 - imax: 34.0
		- Values: (20, 100)
	- Process --> imin: 36.0 - imax: 38.0
		- Values: (20, 100)
	- Process --> imin: 39.0 - imax: 41.0
		- Values: (20, 100)
	- Process --> imin: 41.0 - imax: 43.0
		- Values: (20, 100)
	- Process --> imin: 43.0 - imax: 45.0
		- Values: (20, 100)
Now dataset is: (10, 20, 100)
- Processing data: 1
	- Process --> imin: 4.5 - imax: 6.5
		- Values: (20, 100)
	- Process --> imin: 6.5 - imax: 8.5
		- Values: (20, 100)
	- Process --> imin: 14.5 - imax: 16.5
		- Values: (20, 100)
	- Process --> imin: 16.5 - imax: 18.5
		- Values: (20, 100)
	- Process --> imin: 18.5 - imax: 20.5
		- Values: (20, 100)
	- Process --> im

(420, 20, 100)

In [14]:
## Junta os sinais considerando a quantidade de hz e quantidade de segundos
signals_brain_death, labels_brain_death = get_signals_for_interval(dataset_ome, 1, seconds=1)
signals_brain_death.shape

- Processing data: 0
	- Process --> imin: 13.0 - imax: 15.0
		- Values: (20, 100)
	- Process --> imin: 20.0 - imax: 22.0
		- Values: (20, 100)
	- Process --> imin: 30.0 - imax: 32.0
		- Values: (20, 100)
	- Process --> imin: 41.0 - imax: 43.0
		- Values: (20, 100)
	- Process --> imin: 51.0 - imax: 53.0
		- Values: (20, 100)
	- Process --> imin: 59.0 - imax: 61.0
		- Values: (20, 100)
	- Process --> imin: 66.0 - imax: 68.0
		- Values: (20, 100)
	- Process --> imin: 72.0 - imax: 74.0
		- Values: (20, 100)
	- Process --> imin: 76.0 - imax: 78.0
		- Values: (20, 100)
	- Process --> imin: 80.0 - imax: 82.0
		- Values: (20, 100)
Now dataset is: (10, 20, 100)
- Processing data: 1
	- Process --> imin: 10.0 - imax: 12.0
		- Values: (20, 100)
	- Process --> imin: 13.8 - imax: 15.8
		- Values: (20, 100)
	- Process --> imin: 25.0 - imax: 27.0
		- Values: (20, 100)
	- Process --> imin: 27.0 - imax: 29.000000000000004
		- Values: (20, 100)
	- Process --> imin: 31.8 - imax: 33.800000000000004
		- Val

(150, 20, 100)

In [15]:
## Faz o agrupamento das duas classes ALIVE e BRAIN_DEATH
signals_concat = np.concatenate([signals_alive, signals_brain_death])
labels_concat = np.concatenate([labels_alive, labels_brain_death])

In [16]:
signals_concat.shape

(570, 20, 100)

In [17]:
import random

In [18]:
def suffle_list(x, y):
    # Combina as listas em uma lista de pares
    pares = list(zip(x, y))

    # Embaralha a lista de pares
    random.shuffle(pares)
    
    return zip(*pares)

In [19]:
signals_concat, labels_concat = suffle_list(signals_concat, labels_concat)

In [20]:
signals_concat = np.asarray(signals_concat)
labels_concat = np.asarray(labels_concat)

In [21]:
# Dividir os dados em treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(signals_concat, labels_concat, test_size=0.2, random_state=42)

In [22]:
from sklearn.preprocessing import OneHotEncoder
# Criar instância do OneHotEncoder
onehotencoder = OneHotEncoder()

# Aplicar o One Hot Encoder nos dados
y_train = onehotencoder.fit_transform(np.array(y_train).reshape(-1, 1)).toarray()
y_test = onehotencoder.transform(np.array(y_test).reshape(-1, 1)).toarray()

In [23]:
train_X_normalized = X_train[:, :, :-1] #scaler.fit_transform()
train_Y_normalized = X_train[:, :, 1:] #scaler.fit_transform()

test_X_normalized = X_test[:, :, :-1] #scaler.fit_transform()
test_Y_normalized = X_test[:, :, 1:] #scaler.fit_transform()

train_X_normalized = train_X_normalized.reshape((train_X_normalized.shape[0], -1))
train_Y_normalized = train_Y_normalized.reshape((train_Y_normalized.shape[0], -1))

test_X_normalized = test_X_normalized.reshape((test_X_normalized.shape[0], -1))
test_Y_normalized = test_Y_normalized.reshape((test_Y_normalized.shape[0], -1))

In [24]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [25]:
train_X_normalized = np.transpose(X_train, (0,2,1)).reshape((X_train.shape[0], -1))#[:, :-1]
train_Y_normalized = np.transpose(X_train, (0,2,1)).reshape((X_train.shape[0], -1))#[:, 1:]

test_X_normalized = np.transpose(X_test, (0,2,1)).reshape((X_test.shape[0], -1))#[:, :-1]
test_Y_normalized = np.transpose(X_test, (0,2,1)).reshape((X_test.shape[0], -1))#[:, 1:]

In [26]:
np.transpose(X_train, (0,2,1)).shape

(456, 100, 20)

In [27]:
train_Y_normalized.shape

(456, 2000)

## Cria arquitetura para otimização da ESN - NN

In [28]:
template_architecture = {
        'esn': {
            'reservoir_size': random.randint(10, 4000),
            'spectral_radius': random.uniform(.1, 1),
            'sparsity': random.uniform(.1, 1)
        },
        'nn': {
            'size': random.randint(1, 10),
            'units': lambda nn_size: random.sample(range(1,1000), nn_size),
            'activations': lambda nn_size: random.choices(['relu', 'sigmoid', 'softmax'], k=nn_size)
        }
    }

In [29]:
template_architecture

{'esn': {'reservoir_size': 1807,
  'spectral_radius': 0.3857098512515069,
  'sparsity': 0.19951162302477451},
 'nn': {'size': 9,
  'units': <function __main__.<lambda>(nn_size)>,
  'activations': <function __main__.<lambda>(nn_size)>}}

In [30]:
def create_template():

    template_architecture = {
        'esn': {
            'reservoir_size': random.randint(10, 4000),
            'spectral_radius': random.uniform(.1, 1),
            'sparsity': random.uniform(.1, 1)
        },
        'nn': {
            'size': random.randint(1, 10),
            'units': lambda nn_size: random.sample(range(1,1000), nn_size),
            'activations': lambda nn_size: random.choices(['relu', 'sigmoid', 'softmax'], k=nn_size)
        }
    }
    
    # Obtendo os valores de units e activations
    nn_size = template_architecture['nn']['size']
    units = template_architecture['nn']['units'](nn_size)
    activations = template_architecture['nn']['activations'](nn_size)
    
    # Atribuindo os valores obtidos a um novo objeto
    template_architecture['nn']['units'] = units
    template_architecture['nn']['activations'] = activations

    return template_architecture


In [31]:
def bin_array_to_dec(bin_array):
    return int(''.join([str(value) for value in list(bin_array)]), 2)

### Montagem da fita genética do modelo

In [32]:
## Objeto que define os tamanhos máximos da de cada atributo na fita genética
max_range_architecture = {
    'esn': {
        'reservoir_size': 256,
#        'spectral_radius': 100,
#        'sparsity': 100
    },
        'nn': {
        'units1': 500,
        'activations1': 1,
        'units2': 255,
        'activations2': 1,
        'units3': 255,
        'activations3': 1,
        'units4': 120,
        'activations4': 1,
        # 'units5': 1000,
        # 'activations5': 1,
        # 'units6': 1000,
        # 'activations6': 1,
        # 'units7': 1000,
        # 'activations7': 1,
#        'loss': 'binary_crossentropy',
#        'optimizer': 'adam'
    }
}
max_size_hidden_nn = 7

In [33]:
## Cria um objeto que define o tamanho binário da presença de cada parametro na fita genética.
max_range_architecture_binary = {}
max_units_architecture_binary = {}
length_genetic_tape = 0
def recursive_loop(obj, key_parents=None):
    global max_units_architecture_binary, length_genetic_tape,max_size_hidden_nn, max_range_architecture_binary
    if isinstance(obj, dict):
        for key, value in obj.items():
            recursive_loop(value, '{}-{}'.format('' if key_parents == None else key_parents, key))
    else:   
        binary = bin(obj)
        # if 'nn' in key_parents: 
        #     max_binary = ([1]* len(binary[2:])) * max_size_hidden_nn
        #     space_on_tape = (len(binary[2:]) * max_size_hidden_nn) 
        #     length_genetic_tape += space_on_tape
        # else:
        max_binary = ([1]* len(binary[2:]))
        space_on_tape = len(binary[2:])
        length_genetic_tape += space_on_tape
            
        max_units_architecture_binary[key_parents] = space_on_tape
        max_range_architecture_binary[key_parents] = bin_array_to_dec(max_binary)

In [34]:
recursive_loop(max_range_architecture)

In [35]:
max_units_architecture_binary

{'-esn-reservoir_size': 9,
 '-nn-units1': 9,
 '-nn-activations1': 1,
 '-nn-units2': 8,
 '-nn-activations2': 1,
 '-nn-units3': 8,
 '-nn-activations3': 1,
 '-nn-units4': 7,
 '-nn-activations4': 1}

In [36]:
def get_obj_of_position_params_tape(max_units_architecture_binary, length_genetic_tape):
    positions_tape = range(length_genetic_tape)
    obj_position = {}
    position_selected = []
    
    for key, value in max_units_architecture_binary.items():
        position = random.sample([x for x in positions_tape if x not in position_selected], value)
        position_selected += position
        obj_position[key] = position
    return obj_position

In [37]:
## Definições
map_genetic_tape = get_obj_of_position_params_tape(max_units_architecture_binary, length_genetic_tape)
pop_size = 50
pop_random_perc = .04
generation = 200
n_parents_survive = 4
n_reinsertion = 40
inputs_size = train_X_normalized.shape[1]

max_validation = 1
reservoir_size = bin_array_to_dec([1]*max_units_architecture_binary['-esn-reservoir_size'])
units = [reservoir_size] + [bin_array_to_dec([1]*value) for key, value in max_units_architecture_binary.items() if 'units' in key] + [2]
max_units = sum([h1 * h2 for h1, h2 in zip(units[:-1], units[1:])])
max_reservoir_size = (inputs_size * reservoir_size) + (reservoir_size**2)

#data_population = pd.DataFrame()

In [38]:
# def init_population(pop_size):
#     global length_genetic_tape
    
#     genetic_tape_population = [[random.randint(0, 100) for value in range(length_genetic_tape)] for value in range(pop_size)]
#     genetic_tape_population = (np.asarray(genetic_tape_population) <= 10) * 1

#     return np.asarray(genetic_tape_population)

In [39]:
def init_population(pop_size):
    global length_genetic_tape
    
    genetic_tape_population = [[random.randint(0, 1) for value in range(length_genetic_tape)] for value in range(pop_size)]

    return np.asarray(genetic_tape_population)

In [40]:
def build_esn_nn_with_genetic_tape(genetic_tape):
    global map_genetic_tape, max_range_architecture_binary, inputs_size
    
    ## Constrói a ESN.
    reservoir_size = bin_array_to_dec(genetic_tape[map_genetic_tape['-esn-reservoir_size']])
    outputs = 1
    spectral_radius = .8#(bin_array_to_dec(genetic_tape[map_genetic_tape['-esn-spectral_radius']]) / max_range_architecture_binary['-esn-spectral_radius'])
    sparsity = 0#(bin_array_to_dec(genetic_tape[map_genetic_tape['-esn-sparsity']]) / max_range_architecture_binary['-esn-sparsity'])

    if reservoir_size<=0:
        reservoir_size = 1
    
    esn = ESN(n_inputs=inputs_size, # Número de eletrodos
      n_outputs=inputs_size,
      n_reservoir=reservoir_size,
      spectral_radius=spectral_radius,
      sparsity=sparsity
    )

    ## Constrói a rede neural
    nn_units_key = [value for value in max_units_architecture_binary if 'units' in value]
    nn_activation_key = [value for value in max_units_architecture_binary if 'activation' in value]


    def get_activation(activation):
        if activation == 0:
            return 'relu'
        elif activation == 1:
            return 'softmax'
     
    model = Sequential()
    first = True
    for key_units, key_activation in zip(nn_units_key, nn_activation_key):
        units = bin_array_to_dec(genetic_tape[map_genetic_tape[key_units]])
        activation = bin_array_to_dec(genetic_tape[map_genetic_tape[key_activation]])
        
        if first and units > 0:
            model.add(Dense(units=units, activation=get_activation(activation), input_shape=(inputs_size, ) ))
            first = False
        elif units > 0:
            model.add(Dense(units=units, activation=get_activation(activation) ))


    model.add(Dense(units=2, activation='softmax'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    #model.summary()

    return esn, model

In [41]:
def save_in_data_population(generation, individual, genetic_tape, time_exec, test_validation, esn_params, nn_params, df=None):
    global max_units_architecture_binary, map_genetic_tape
    ## Constrói a ESN.
    reservoir_size = bin_array_to_dec(genetic_tape[map_genetic_tape['-esn-reservoir_size']])
    outputs = 1
    spectral_radius = .8#(bin_array_to_dec(genetic_tape[map_genetic_tape['-esn-spectral_radius']]) / max_range_architecture_binary['-esn-spectral_radius'])
    sparsity = .8#(bin_array_to_dec(genetic_tape[map_genetic_tape['-esn-sparsity']]) / max_range_architecture_binary['-esn-sparsity'])
    
    row = {
        'generation': [generation],
        'individual': [individual],
        'reservoir_size': [reservoir_size],
        'outputs': [outputs],
        'spectral_radius': [spectral_radius],
        'sparsity': [sparsity],
    }
    
    def get_activation(activation):
        if activation == 0:
            return 'relu'
        elif activation == 1:
            return 'softmax'
            
    ## Constrói a rede neural
    nn_units_key = [value for value in max_units_architecture_binary if 'units' in value]
    nn_activation_key = [value for value in max_units_architecture_binary if 'activation' in value] 
    first = True
    hidden_layer = 0
    for key_units, key_activation in zip(nn_units_key, nn_activation_key):
        units = bin_array_to_dec(genetic_tape[map_genetic_tape[key_units]])
        activation = bin_array_to_dec(genetic_tape[map_genetic_tape[key_activation]])
        
        row[key_units] = [units]
        row[key_activation] = [get_activation(activation)]
        if units > 0:
            hidden_layer +=1
        
    row['time_exec'] = time_exec
    row['test_validation'] = test_validation
    row['esn_params'] = esn_params
    row['nn_params'] = nn_params
    row['nn_hidden_layer'] = hidden_layer


    ## Save score
    
    if type(df) == type(None):
        pd.DataFrame(row)
    
    return pd.concat([df, pd.DataFrame(row)]).reset_index(drop=True)

In [42]:
df_population = None

In [44]:
import time
def train(esn, nn):
    global train_X_normalized, train_Y_normalized, test_X_normalized, test_Y_normalized, y_train, y_test

    inicio_time = time.time()
    
    predict_esn_train = esn.fit(train_X_normalized, train_Y_normalized)
    #predict_esn_train = esn.get_reservoir_states(train_X_normalized.T)

    # Treinar o modelo
    nn.fit(predict_esn_train, y_train, batch_size=32, verbose=False, epochs=5)
    #predict_esn_test = esn.get_reservoir_states(test_X_normalized.T)
    predict_esn_test = esn.predict(test_X_normalized)
    predict_nn = nn.predict(predict_esn_test, verbose=False)

    y_pred_rounded = np.argmax(predict_nn, axis=1)
    y_test_rounded = np.argmax(y_test, axis=1)
     #validacao = np.asarray([all(value_predicted == value_y_test) for value_predicted, value_y_test in zip(predicted_onehotencoderT, y_test)])
    f1score = f1_score(y_pred_rounded, y_test_rounded)
    
    final_time = time.time() - inicio_time
    
    #return final_time, (validacao.sum() / validacao.shape[0])
    return final_time, f1score

In [45]:
def create_score(max_validation, max_esn_params, max_nn_params, validation, esn_params, nn_params, weight_validation, weight_esn_params, weight_nn_params ):
    global max_units

    if max_validation == 0:
        validation_perc = 0
    else:
        validation_perc = validation#(validation / max_validation) * weight_validation
    
    esn_params_perc = (1 - (esn_params / max_esn_params)) * weight_esn_params
    nn_params_perc = (1 - (nn_params / max_nn_params)) * weight_nn_params

    if nn_params <=  int(max_units * .0004):
        nn_params_perc = 0
    if esn_params <= int(max_reservoir_size * .0004):
        esn_params_perc = 0

    
    score = validation_perc + (esn_params_perc + nn_params_perc) 
    return score

In [46]:
def save_score_array(array_score, generation):
    global df_population
    
    print('save_score_array')
    print('{}'.format(df_population.loc[df_population['generation']==generation].shape))
    print('{}'.format(len(array_score)))
    df_population.loc[df_population['generation']==generation, 'score'] = array_score

    indices_maiores = np.argsort(array_score)[:pop_size-n_reinsertion]
    marcados = np.zeros_like(array_score)
    marcados[indices_maiores] = 1
    df_population.loc[df_population['generation']==generation, 'best'] = marcados

In [47]:
def fitness_function(generation):
    global df_population

    data_population = df_population.query(f'generation == {generation}')
    array_score = []

		
    max_validation = data_population['test_validation'].max()
    max_esn_params = data_population['esn_params'].max()
    max_nn_params = data_population['nn_params'].max()

    weight_validation = 1
    weight_esn_params = .1
    weight_nn_params = .1
    
    for index, individual in data_population.iterrows():
        array_score.append( create_score(max_validation, max_esn_params, max_nn_params, individual['test_validation'], individual['esn_params'], 
                           individual['nn_params'], weight_validation, weight_esn_params, weight_nn_params) )
        
    save_score_array(array_score, generation)
    return array_score

In [48]:
# def roulette(array_fitness, n_parents):
#     scores = array_fitness.copy()
#     #inverted_list = [max(scores) - score for score in scores]
#     # Encontre a soma das pontuações invertidas
#     total_score = sum(scores)
#     # Calcule as proporções da roleta
#     proportions = [s / total_score for s in scores]
#     #[print("{}: {:f}".format(enum, a)) for enum, a in enumerate(proportions)]
#     selected_index = np.random.choice(len(scores), size=n_parents, p=proportions)
#     return selected_index

In [49]:
def roulette(array_fitness, n_parents):
    scores = array_fitness.copy()
    total_score = sum(scores)
    
    # Verificar se todos os scores são iguais a zero
    if total_score == 0:
        proportions = [1 / len(scores)] * len(scores)  # Proporções iguais para todos os elementos
    else:
        proportions = [s / total_score for s in scores]
    
    selected_index = np.random.choice(len(scores), size=n_parents, p=proportions)
    return selected_index


In [50]:
def tournament(array_fitness, n_parents):
    return np.flip(np.argsort(array_fitness))[:n_parents]

In [51]:
def simple_crossover(population, array_parents, pop_size, num_genes, pop_random_perc):
    
    children = []
    pop_crossover = int(pop_size * (1-pop_random_perc))
    pop_random = int(pop_size - pop_crossover)
    pop_random+= pop_size - (pop_crossover + pop_random)

    print('\tA nova geração terá:')
    print(f'\t\t- {pop_crossover} população cruzada')
    print(f'\t\t- {pop_random} população aleatória')
    
    for individual in range(pop_crossover):
        ## Seleciona 2 pais para o cruzamento
        parentes = select_random_couples(array_parents)
        ## Define uma mascara de quais genes serão pegos entre o pai 1 e o pai 2
        #cross_mask = np.random.choice([0,1], size=num_genes)
        corte = np.random.randint(num_genes)
        cross_mask = np.asarray(( ([0]*corte) + ([1]*(num_genes-corte)) ))
        ## Construi o filho baseado na mascara acima.
        child = [population[parentes[father]][enum]  for enum, father in enumerate(cross_mask)]
        children.append(np.asarray(child))
      
    ## Gera filhos aleatórios para evitar convergência entre os individuos
    
    generate_random_individuals = init_population(pop_random)
    return np.concatenate( (children, generate_random_individuals) , axis=0)

In [52]:
def random_crossover(population, array_parents, pop_size, num_genes, pop_random_perc):
    
    children = []
    pop_crossover = int(pop_size * (1-pop_random_perc))
    pop_random = int(pop_size - pop_crossover)
    pop_random+= pop_size - (pop_crossover + pop_random)

    print('\tA nova geração terá:')
    print(f'\t\t- {pop_crossover} população cruzada')
    print(f'\t\t- {pop_random} população aleatória')
    
    for individual in range(pop_crossover):
        ## Seleciona 2 pais para o cruzamento
        parentes = select_random_couples(array_parents)
        ## Define uma mascara de quais genes serão pegos entre o pai 1 e o pai 2
        cross_mask = np.random.choice([0,1], size=num_genes)
        ## Construi o filho baseado na mascara acima.
        child = [population[parentes[father]][enum]  for enum, father in enumerate(cross_mask)]
        children.append(np.asarray(child))
      
    ## Gera filhos aleatórios para evitar convergência entre os individuos
    
    generate_random_individuals = init_population(pop_random)
    return np.concatenate( (children, generate_random_individuals) , axis=0)

In [53]:
def change_value(value):
    if value == 0:
        return 1
    else:
        return 0

In [54]:
def hamming_similarity(array1, array2):
    if len(array1) != len(array2):
        raise ValueError("Os arrays devem ter o mesmo comprimento.")
    
    num_different = sum(el1 != el2 for el1, el2 in zip(array1, array2))
    similarity = 1 - (num_different / len(array1))
    
    return similarity


In [55]:
def mutation(children_array, mutation_rate_normal=.05, mutation_rate_similarity=.2, similarity_mutation=.65):
    children_array = children_array.copy()
    ## Obtém os individuos com muita similaridade na população:
    # individuos_mutation = []
    
    # for enum1,ind1 in enumerate(children_array):
    #     similarity = []
    #     for enum2, ind2 in enumerate(children_array):
    #         if enum1 != enum2:
    #             similarity.append( hamming_similarity(ind1, ind2) )

    #     if np.asarray(similarity).mean() >= similarity_mutation:
    #         individuos_mutation.append(enum1)


    for index_children in range(len(children_array)):
        for index_gene in range(len(children_array[index_children])):
            value = np.random.randint(1, 101)
            # mutation = mutation_rate_normal
            # if index_children in individuos_mutation:
            #     mutation = mutation_rate_similarity
                
            if value/100 <= mutation_rate_normal:
                children_array[index_children][index_gene] = change_value(children_array[index_children][index_gene])
    
    return children_array

In [56]:
length_genetic_tape

45

In [57]:
df_population = None

In [60]:
def reinsertion(population, children, pop_size, n_reinsertion, array_fitness):
    population_aux = population.copy()

    best_individuals_indexes = np.flip(np.argsort(array_fitness))[:pop_size-n_reinsertion]
    population_aux = np.concatenate(( np.asarray(population_aux[best_individuals_indexes]),  np.asarray(children) ), axis=0)
    return population_aux

In [61]:
df_population

In [62]:
def exec_train_for_population(generation, population):
    global df_population, n_reinsertion, pop_size, length_genetic_tape, pop_random_perc, n_parents_survive, first
    map_individual = {}
    
    for individual_enum, individual in enumerate(population):
        if individual_enum < pop_size-n_reinsertion and generation > 0:
            print('\t- [Campeão] Individuo {}'.format(individual_enum))
            df_last_generation = df_population.query('generation == {}'.format(generation-1))
            df_best_individual = df_last_generation.iloc[np.argsort(df_last_generation['score'])[::-1][individual_enum:individual_enum+1]]
            time_exec = df_best_individual['time_exec'].values[0]
            test_validation = df_best_individual['test_validation'].values[0]
            esn_params = df_best_individual['esn_params'].values[0]
            nn_params = df_best_individual['nn_params'].values[0]
            print('\t\t- Time: {} - Validation Test: {}'.format(round(time_exec, 2), round(test_validation, 2)))
            df_population = save_in_data_population(generation, individual_enum, individual, time_exec, test_validation, esn_params, nn_params, df_population)
        else:
            print('\t- [Novato] Individuo {}'.format(individual_enum))
            esn, nn = build_esn_nn_with_genetic_tape(individual)
            time_exec, test_validation = train(esn, nn)
            print('\t\t- Time: {} - Validation Test: {}'.format(round(time_exec, 2), round(test_validation, 2)))
            df_population = save_in_data_population(generation, individual_enum, individual, time_exec, test_validation, esn.count_parameters(), nn.count_params(), df_population)
    print('\t - Executa fitness_function')
    array_fitness = fitness_function(generation)
    print('\t - Executa tournament')
    selected_index = roulette(array_fitness, n_parents_survive)
    print('\t - random_crossover')
    children = simple_crossover(population.copy(), selected_index, n_reinsertion, length_genetic_tape, pop_random_perc)
    print('\t - childrens: {}'.format(children.shape))
    children = mutation(children)
    print('\t - mutation - childrens: {}'.format(children.shape))
    children = reinsertion(population, children, pop_size, n_reinsertion, array_fitness)
    print('\t - reinsertion - childrens: {}'.format(children.shape))
    print('\t - Métricas:')
    df_generation = df_population.query(f'generation == {generation}')

    validation_metrics = df_generation['test_validation'].mean()
    esn_params_metrics = df_generation['esn_params'].mean()
    nn_params_metrics = df_generation['nn_params'].mean()
    
    print(f'\t\t- validation_metrics: {validation_metrics}')
    print(f'\t\t- esn_params_metrics: {esn_params_metrics}')
    print(f'\t\t- nn_params_metrics: {nn_params_metrics}')
    
    return children

In [63]:
def select_random_couples(array_parents):
    return np.random.choice(array_parents, size=2, replace=False)

In [64]:
population = init_population(pop_size)
first = True
for epoch in range(generation):
    print('- Época {}'.format(epoch))
    children = exec_train_for_population(epoch, population)
    population = children.copy()

- Época 0
	- [Novato] Individuo 0
		- Time: 2.35 - Validation Test: 0.35
	- [Novato] Individuo 1
		- Time: 2.78 - Validation Test: 0.0
	- [Novato] Individuo 2
		- Time: 2.6 - Validation Test: 0.0
	- [Novato] Individuo 3
		- Time: 2.3 - Validation Test: 0.0
	- [Novato] Individuo 4
		- Time: 2.41 - Validation Test: 0.0
	- [Novato] Individuo 5
		- Time: 2.36 - Validation Test: 0.35
	- [Novato] Individuo 6
		- Time: 2.8 - Validation Test: 0.22
	- [Novato] Individuo 7
		- Time: 2.22 - Validation Test: 0.0
	- [Novato] Individuo 8
		- Time: 2.8 - Validation Test: 0.0
	- [Novato] Individuo 9
		- Time: 2.82 - Validation Test: 0.0
	- [Novato] Individuo 10
		- Time: 2.18 - Validation Test: 0.0
	- [Novato] Individuo 11
		- Time: 2.88 - Validation Test: 0.0
	- [Novato] Individuo 12
		- Time: 2.52 - Validation Test: 0.0
	- [Novato] Individuo 13
		- Time: 2.46 - Validation Test: 0.0
	- [Novato] Individuo 14
		- Time: 2.92 - Validation Test: 0.0
	- [Novato] Individuo 15
		- Time: 2.51 - Validation Tes