In [1]:
import tensorflow as tf
import tensorflow.keras as keras
import pathlib
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import nibabel as nib
import scipy 
import util
import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
import tensorflow_docs.plots
import math
import time

In [2]:
ALL_DATA = "E:Corrected_FA/ALL_DATA/"
info_data = "idaSearch_8_01_2020.csv"

# Obtenemos los diccionarios con los nombres de los ficheros que contienen las imágenes
AD_CN, groups = util.obtain_data_files(ALL_DATA, info_data)

# Cargamos las imágenes
CN_imgs = np.array(util.load_data(ALL_DATA, AD_CN["CN"]), dtype='float32')

AD_imgs = util.load_data(ALL_DATA, AD_CN["AD"])

# Extendemos la clase con menos ejemplos
AD_imgs = np.array(util.extend_class(AD_imgs, len(CN_imgs)), dtype='float32')

# Creamos las etiquetas 1: AD, 0:CN
CN_labels = np.zeros((len(CN_imgs),1), dtype = "int32")
AD_labels = np.ones((len(AD_imgs),1), dtype = "int32")

In [3]:
import os 
root_logdir = os.path.join( os.curdir, "my_logs") 

def get_run_logdir(): 
    import time 
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S") 
    return os.path.join(root_logdir, run_id) 

run_logdir = get_run_logdir() # e.g., './ my_logs/ run_2019_06_07-15_15_22'

In [4]:
def create_model(filters, neurons, dropout_rate, regularization):
    layers = tf.keras.layers
    model = tf.keras.Sequential([
        layers.Conv3D(filters, 11, strides = (4,4,4), padding= 'valid', input_shape=(91,109,91, 1), activation = 'relu'),
        layers.BatchNormalization(),     
        layers.Dropout(dropout_rate),

        layers.Conv3D(filters, 5, strides = (1,1,1), padding= 'valid', activation = 'relu'),
        layers.BatchNormalization(),    
        layers.Dropout(dropout_rate),

        layers.Conv3D(filters, 3, strides = (1,1,1), padding= 'valid', activation = 'relu'),
        layers.BatchNormalization(),    
        layers.Dropout(dropout_rate),

        layers.Conv3D(filters, 3, strides = (1,1,1), padding= 'valid', activation = 'relu'),
        layers.BatchNormalization(),
        layers.MaxPooling3D(),
        layers.Dropout(dropout_rate),

        layers.Conv3D(filters, 3, strides = (1,1,1), padding= 'valid', activation = 'relu'),    
        layers.GlobalAveragePooling3D(),

        layers.Dense(neurons, activation = "relu", activity_regularizer= keras.regularizers.l2(regularization)),
        layers.Dense(neurons, activation = "relu", activity_regularizer= keras.regularizers.l2(regularization)),
        layers.Dense(neurons, activation = "relu", activity_regularizer= keras.regularizers.l2(regularization)),
        layers.Dense(1, activation = 'sigmoid')
    ])
    return model

In [5]:
def create_model_def(dropout, regularization):
    layers = tf.keras.layers
    model = tf.keras.Sequential([
        layers.Conv3D(64, 11, strides = (4,4,4), padding= 'valid', input_shape=(91,109,91, 1)),
        layers.BatchNormalization(),    
        layers.ReLU(),       

        layers.Conv3D(128, 5, strides = (1,1,1), padding= 'valid'),
        layers.BatchNormalization(),    
        layers.ReLU(),

        layers.Conv3D(256, 3, strides = (1,1,1), padding= 'valid'),
        layers.BatchNormalization(),    
        layers.ReLU(),

        layers.Conv3D(512, 3, strides = (1,1,1), padding= 'valid'),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.MaxPooling3D(),

        layers.Conv3D(512, 3, strides = (1,1,1), padding= 'valid'),    
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.GlobalAveragePooling3D(),

        layers.Dense(512, activation = "relu"),
        layers.Dropout(dropout),

        layers.Dense(512, activation = "relu"),
        layers.Dropout(dropout),

        layers.Dense(512, activation = "relu"),
        layers.Dropout(dropout),

        layers.Dense(1, activation = 'sigmoid')])
    return model

In [6]:
def try_model(train_ds, val_ds, train_size, filters, neurons, batch_size = 32, dropout=0.05, reg=0.003, learning_rate = 3e-7, fold = 0, n_epoch = 200):
    """ Crea, compila y entrena un modelo con los parámetros obtenidos, además guarda el modelo con mejor loss y el modelo con mejor accuracy.
    Devuelve la evaluación del modelo con mejor loss y la del modelo con mejor accuracy, los path de los modelos y el history del entrenamiento"""
    
    loss_path = "model_loss_{}_{}_{}_{}_{}.h5".format(filters,neurons, dropout, reg, fold)
    
    checkpoint_cb_loss = keras.callbacks.ModelCheckpoint(loss_path, monitor="val_loss", save_best_only = True) 
    
    root_logdir = os.path.join(os.curdir, "my_logs_cv") 
    def get_run_logdir(): 
        run_id = "run_{}_{}_{}_{}_{}".format(filters,neurons, dropout, reg, fold) 
        return os.path.join(root_logdir, run_id) 
    
    tensorboard_cb = keras.callbacks.TensorBoard(get_run_logdir())

    # Se crea el modelo
    m = create_model_def(dropout, reg)
    # Se compila
    m.compile(optimizer = keras.optimizers.Adam(learning_rate), loss = tf.keras.losses.BinaryCrossentropy(), metrics = ['accuracy'])       
    # Se entrena
    history = m.fit(train_ds.repeat(), epochs = n_epoch, steps_per_epoch= train_size/batch_size, 
                    validation_data = val_ds, verbose = 0, callbacks =[checkpoint_cb_loss, 
                                                                       tensorboard_cb]) 
    # Evaluacion del modelo con mejor loss
    m = keras.models.load_model(loss_path) 
    evaluation_loss = m.evaluate(val_ds)
    
    
    return {"ev_loss": evaluation_loss, "loss_path": loss_path, "history": history}


In [7]:
batch_size = 32

data = util.train_test_split(CN_imgs, CN_labels, AD_imgs, AD_labels, 0.15)

CN_imgs, AD_imgs = None, None # Liberamos memoria

In [8]:
fold_data = util.k_fold(data["train_imgs"], data["train_labels"], 5, 0 )
train_ds = fold_data["train_ds"]
train_ds

<ShuffleDataset shapes: ((91, 109, 91, 1), (1,)), types: (tf.float32, tf.int32)>

In [9]:
fold_data

{'train_ds': <ShuffleDataset shapes: ((91, 109, 91, 1), (1,)), types: (tf.float32, tf.int32)>,
 'val_ds': <ShuffleDataset shapes: ((91, 109, 91, 1), (1,)), types: (tf.float32, tf.int32)>,
 'train_size': 300,
 'val_size': 74}

In [61]:
fold_data = util.k_fold(data["train_imgs"], data["train_labels"], 5, 1 )
train_ds = fold_data["train_ds"].map(lambda tensor, labels : util.transform(tensor,labels), num_parallel_calls=16)\
                                .batch(batch_size).prefetch(8)
val_ds = fold_data["val_ds"].batch(fold_data["val_size"])
train_size = fold_data["train_size"]
start = time.time()
evaluations = try_model(train_ds, val_ds, train_size, 0, 0,dropout = 0.1, fold= 0, learning_rate = 3e-6, n_epoch = 400)
end = time.time()
print(end-start)

4718.74587726593


In [62]:
%pwd

'C:\\Users\\Javie\\OneDrive - unizar.es\\Documentos\\Universidad\\TFG'

In [63]:
model = keras.models.load_model("model_loss_0_0_0.1_0.003_0.h5")  # roll back to best model
model.compile(optimizer=keras.optimizers.Adam(3e-6), loss=tf.keras.losses.BinaryCrossentropy())

In [54]:
model.predict(val_ds)

array([[0.9189684 ],
       [0.4244495 ],
       [0.79911834],
       [0.20836599],
       [0.4181527 ],
       [0.65679014],
       [0.20506841],
       [0.8831246 ],
       [0.22305681],
       [0.6291448 ],
       [0.9487446 ],
       [0.84349126],
       [0.14489901],
       [0.22651221],
       [0.03659551],
       [0.7882234 ],
       [0.13126223],
       [0.12724917],
       [0.38470346],
       [0.1113186 ],
       [0.05739108],
       [0.18246384],
       [0.8205814 ],
       [0.918437  ],
       [0.21611376],
       [0.0549912 ],
       [0.81238294],
       [0.24963723],
       [0.16593477],
       [0.4327924 ],
       [0.3404741 ],
       [0.10454987],
       [0.19310784],
       [0.26219666],
       [0.74144584],
       [0.66255224],
       [0.29982603],
       [0.20242548],
       [0.3364907 ],
       [0.54433656],
       [0.6760556 ],
       [0.11919296],
       [0.77755314],
       [0.21625437],
       [0.35384864],
       [0.11689481],
       [0.80708706],
       [0.960

In [64]:
preds = model.predict(val_ds)

In [65]:
model.evaluate(val_ds)



0.7591670155525208

In [66]:
fold_data["val_size"]

74

In [67]:
preds = (preds >= 5).astype("int")

In [68]:
for images, labels in val_ds.take(1):  # only take first element of dataset
    numpy_images = images.numpy()
    numpy_labels = labels.numpy()

In [69]:
preds = (model.predict(numpy_images) >= .5).astype("int32")

In [70]:
acc = np.mean(preds == numpy_labels)
acc

0.7972972972972973

In [71]:
tp = np.sum((preds == numpy_labels) & (preds == 1)).astype("int32")
tn = np.sum((preds == numpy_labels) & (preds == 0)).astype("int32")
fp = np.sum((preds != numpy_labels) & (preds == 1)).astype("int32")
fn = np.sum((preds != numpy_labels) & (preds == 0)).astype("int32")
print(tp,tn,fp,fn)
sensitivity = np.mean(tp /(tp + fn))
specificity = np.mean(tn /(tn + fp))
print(sensitivity, specificity)

32 27 7 8
0.8 0.7941176470588235


In [13]:
# filters = [160, 192, 256]
# neurons = [192, 256, 512]
filters = [ 9999]
neurons = [ 9999]
dropouts = [0.2, .3,.4,.5]
n_epoch = 250
batch_size = 32
n_folds = 5
effective_folds = 3
histories = []

best_model_loss = ""
best_loss = np.inf
best_acc = 0
dropout = None
reg = 0.003
lr = 1e-6
f = 0.41

best_loss_parameters = []

for d in dropouts:
    for n in neurons:
        run_evaluations = []
        for fold in range(effective_folds):
            start = time.time()
            #print("Iniciado modelo con f = {} y n = {}".format(f,n))
            
            fold_data = util.k_fold(data["train_imgs"], data["train_labels"], n_folds, fold )
            train_ds = fold_data["train_ds"].map(lambda tensor, labels : util.transform(tensor,labels), num_parallel_calls=16)\
                                            .batch(batch_size).prefetch(8)
            val_ds = fold_data["val_ds"].batch(fold_data["val_size"])
            train_size = fold_data["train_size"]
            
            evaluations = try_model(train_ds, val_ds, train_size, f, n,dropout = d, fold= fold, learning_rate = lr, n_epoch = n_epoch)
            evaluation_loss = evaluations["ev_loss"]
            history = evaluations["history"]

            print("{} filters, {} neurons, {} dropout, {} regularization, {} fold".format(f,n, d, reg, fold))
            print("Loss: {}, Accuracy: {}".format(evaluation_loss[0], evaluation_loss[1]))
            
            histories.append(history)
            run_evaluations.append(evaluation_loss)
            end = time.time()
            print("Tiempo de ejecucion de fold: {}".format(end-start))
            
        # Comprobamos si el modelo con mejor loss es el mejor hasta el momento
        loss = 0
        acc = 0
        for i in range(len(run_evaluations)):
            loss += run_evaluations[i][0]
            acc += run_evaluations[i][1]
        loss /= len(run_evaluations)
        acc /= len(run_evaluations)
            
        if loss < best_loss:
            best_loss = loss
            best_model_loss = evaluations["loss_path"]
            print("Nuevo mejor modelo de loss con {}".format(best_loss))

        if acc > best_acc:
            
            best_acc = acc
            best_model_acc = evaluations["loss_path"]
            print("Nuevo mejor modelo de accuracy con {}".format(best_acc))

        print("Definitive evaluation of best loss of model with {} filters, {} neurons, {} dropout, {} regularization, {} fold".format(f,n, dropout, reg, fold))
        print("Loss: {}, Accuracy: {}".format(loss, acc))
        
        
print("Terminado")

0.41 filters, 9999 neurons, 0.2 dropout, 0.003 regularization, 0 fold
Loss: 0.5779489696025848, Accuracy: 0.7599999904632568
Tiempo de ejecucion de fold: 3272.8806574344635
0.41 filters, 9999 neurons, 0.2 dropout, 0.003 regularization, 1 fold
Loss: 0.3804469764232635, Accuracy: 0.8233333230018616
Tiempo de ejecucion de fold: 3223.65621137619
0.41 filters, 9999 neurons, 0.2 dropout, 0.003 regularization, 2 fold
Loss: 0.43915838599205015, Accuracy: 0.8066666722297668
Tiempo de ejecucion de fold: 3230.420414209366
Nuevo mejor modelo de loss con 0.4658514440059662
Nuevo mejor modelo de accuracy con 0.796666661898295
Definitive evaluation of best loss of model with 0.41 filters, 9999 neurons, None dropout, 0.003 regularization, 2 fold
Loss: 0.4658514440059662, Accuracy: 0.796666661898295
0.41 filters, 9999 neurons, 0.3 dropout, 0.003 regularization, 0 fold
Loss: 0.8217580318450928, Accuracy: 0.7433333396911621
Tiempo de ejecucion de fold: 3251.027955532074
0.41 filters, 9999 neurons, 0.3 dr

In [10]:
n_folds = 5
fold = 0
fold_data = util.k_fold(data["train_imgs"], data["train_labels"], n_folds, fold )
train_ds = fold_data["train_ds"].map(lambda tensor, labels : util.transform(tensor,labels), num_parallel_calls=16)\
                                .batch(batch_size).prefetch(8)
val_ds = fold_data["val_ds"].batch(fold_data["val_size"])


In [11]:
m = keras.models.load_model("model_loss_0.3_9999_0.1_0.003_1.h5") 
evaluation_loss = m.evaluate(data["test"].batch(len(data["test_labels"])))



In [12]:
m = keras.models.load_model("model_loss_0.3_9999_0.1_0.003_0.h5") 
evaluation_loss = m.evaluate(data["test"].batch(len(data["test_labels"])))

OSError: SavedModel file does not exist at: model_loss_0.3_9999_0.1_0.003_0.h5/{saved_model.pbtxt|saved_model.pb}

In [None]:
m = keras.models.load_model("model_loss_0.3_9999_0.1_0.003_2.h5") 
evaluation_loss = m.evaluate(data["test"].batch(len(data["test_labels"])))