# Colab config

In [0]:
from google.colab import drive
drive.mount('drive', force_remount=True)

In [0]:
!pip install hyperas

# CNN 1, primeiras com repetição, 5 fatias

Importando as bibliotecas necessárias.

In [0]:
import os

import time

import gc

import numpy as np

from scipy import interp

from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform

import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix, roc_curve, auc

from keras import backend as K
from keras import optimizers
from keras.layers import Conv3D, MaxPool3D, Flatten, Dense, Dropout, Input
from keras.losses import binary_crossentropy
from keras.models import Model
from keras.callbacks import EarlyStopping
from keras.utils.vis_utils import plot_model, model_to_dot

In [0]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 1
set_session(tf.Session(config=config))
session = tf.Session(config=config)

## Otimização dos Hiperparâmetros

Função **data()** exigida pelo Hyperas. Deve retornar os 4 vetores X_train, Y_train, X_test, Y_test.

Os dados estão armazenados em 4 arquivos numpy, gerados pelo script **import_images.py**.

In [0]:
'''5-balanced-repeat'''

def data():
  prefix = "/content/drive/My Drive/Pesquisa - Dicom images/data"
  X_train = np.load(prefix + "/nps/solid-nodules/data-5-balanced-repeat/X_train.npy")
  X_test = np.load(prefix + "/nps/solid-nodules/data-5-balanced-repeat/X_test.npy")
  Y_train = np.load(prefix + "/nps/solid-nodules/data-5-balanced-repeat/Y_train.npy")
  Y_test = np.load(prefix + "/nps/solid-nodules/data-5-balanced-repeat/Y_test.npy")
  
  return X_train, Y_train, X_test, Y_test

### Definição do modelo a ser otimizado pelo Hyperas.

O espaço de busca do modelo é o seguinte:
* **conv_layer1**: [32, 48, 64, 96] unidades
* **dense_layer1**: [32, 64, 128, 256] unidades
* **dense_layer2**: [8, 16, 24, 32] unidades
* **dropout** nas camadas densas: entre 0 e 0.5

In [0]:
conv1 = [32, 48, 64, 96]
dense1 = [64, 96, 128]
dense2 = [16, 24, 32]
    
def model(X_train, Y_train, X_test, Y_test):
    conv1 = {{choice([32, 48, 64, 96])}}
    dense1 = {{choice([32, 64, 128])}}
    dense2 = {{choice([16, 24, 32])}}
    
    input_layer = Input(X_train.shape[1:5])
    
    conv_layer1 = Conv3D(conv1, kernel_size=(3, 3, 3), activation='relu')(input_layer)
    pooling_layer1 = MaxPool3D(pool_size=(2, 2, 2))(conv_layer1)
    
    flatten_layer = Flatten()(pooling_layer1)
    
    dense_layer1 = Dense(dense1, activation='relu')(flatten_layer)
    dense_layer1 = Dropout({{uniform(0, .5)}})(dense_layer1)
    
    dense_layer2 = Dense(dense2, activation='relu')(dense_layer1)
    dense_layer2 = Dropout({{uniform(0, .5)}})(dense_layer2)
    
    output_layer = Dense(units=1, activation='sigmoid')(dense_layer2)
    
    model = Model(inputs=input_layer, outputs=output_layer)
    
    opt = optimizers.RMSprop(lr=0.0001)
    
    model.compile(loss=binary_crossentropy, optimizer=opt, metrics=['acc'])

    #early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, verbose=1, mode='auto')
    
    model.fit(X_train, Y_train,
              batch_size=128,
              epochs=10,
              verbose=2,
              validation_data=(X_test, Y_test)#,
              #callbacks=[early_stop]
              )
    
    score, acc = model.evaluate(X_test, Y_test, verbose=0)
    
    print('Test accuracy:', acc)
    
    return {'loss': -acc, 'status': STATUS_OK, 'model': model}

### Otimização

Os dados são lidos e é feito o processo de otimização com o Hyperas. São testados 50 modelos.

In [0]:
X_train, Y_train, X_test, Y_test = data()

start = time.time()

best_run, best_model = optim.minimize(model=model,
                                      data=data,
                                      algo=tpe.suggest,
                                      max_evals=30,
                                      trials=Trials(),
                                      verbose=False,
                                      notebook_name='drive/My Drive/Pesquisa - Dicom images/notebooks/optimizer_5_balanced_repeat')
end = time.time()

print("Tempo para otimização:", (end - start)/60, "minutos")


print('Summary of the best model: -----------------------')

print("Evalutation of best performing model:")
print(best_model.evaluate(X_test, Y_test))

print(best_model.summary())

c1 = conv1[best_run['conv1']]
d1 = dense1[best_run['dense1']]
d2 = dense2[best_run['dense2']]
drop1 = best_run['Dropout']
drop2 = best_run['Dropout_1']

print('Conv 1:', c1, 'unidades')
print('Dense 1:', d1, 'unidades')
print('Dense 2:', d2, 'unidades')
print('Dropout 1:', drop1)
print('Dropout 2:', drop2)