In [1]:
import warnings, random, os, pydicom, cv2, glob, re
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import seaborn as sns; sns.set()

import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow.keras.applications import InceptionV3, MobileNetV2, VGG19

from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Flatten, Dense, Input, GlobalAveragePooling2D, MaxPooling2D, Conv2D, AveragePooling2D

from tensorflow.keras.callbacks import EarlyStopping

from sklearn.metrics import classification_report, confusion_matrix, f1_score
from sklearn.model_selection import train_test_split

from pydicom.pixel_data_handlers.util import apply_voi_lut

tf.config.run_functions_eagerly(True)

print('Tensorflow version:', tf.__version__)

Tensorflow version: 2.10.0


In [2]:
mri_types = ['FLAIR','T1w','T1wCE','T2w']
SIZE = 256
NUM_IMAGES = 64

In [3]:
def load_dicom_image(path, img_size=SIZE):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    data = cv2.resize(data, (img_size, img_size))
    return data

In [None]:
def load_dicom_images_3d(scan_id, num_imgs=NUM_IMAGES, img_size=SIZE, mri_type="T1w"):
    files = sorted(glob.glob(f"./data/train/{scan_id}/{mri_type}/*.dcm"), 
               key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)])
    middle = len(files)//2
    num_imgs2 = num_imgs//2
    p1 = max(0, middle - num_imgs2)
    p2 = min(len(files), middle + num_imgs2)
    img3d = np.stack([load_dicom_image(f) for f in files[p1:p2]]).T 
    if img3d.shape[-1] < num_imgs:
        n_zero = np.zeros((img_size, img_size, num_imgs - img3d.shape[-1]))
        img3d = np.concatenate((img3d,  n_zero), axis = -1)
    if np.min(img3d) < np.max(img3d):
        img3d = img3d - np.min(img3d)
        img3d = img3d / np.max(img3d)     
    return img3d

In [None]:
df_labels = pd.read_csv('./labels.csv', dtype={0:'object', 1:'int8'})
df_labels.head()

In [None]:
df_labels_train_val, df_labels_test = train_test_split(df_labels, test_size=0.2, random_state=123)
df_labels_train, df_labels_val = train_test_split(df_labels_train_val, test_size=0.2, random_state=123)

In [None]:
def generator_train():
    for i in range(df_labels_train.shape[0]):
        x = load_dicom_images_3d(df_labels_train['BraTS21ID'].iloc[i])
        y = df_labels_train['MGMT_value'].iloc[i]
        yield x, y
def generator_val():
    for i in range(df_labels_val.shape[0]):
        x = load_dicom_images_3d(df_labels_val['BraTS21ID'].iloc[i])
        y = df_labels_val['MGMT_value'].iloc[i]
        yield x, y
def generator_test():
    for i in range(df_labels_test.shape[0]):
        x = load_dicom_images_3d(df_labels_test['BraTS21ID'].iloc[i])
        y = df_labels_test['MGMT_value'].iloc[i]
        yield x, y

In [None]:
ds_train = tf.data.Dataset.from_generator(generator_train, args=[], 
                                          output_types=(tf.int16, tf.int8), 
                                          output_shapes = (((SIZE, SIZE, NUM_IMAGES), ())))
ds_val = tf.data.Dataset.from_generator(generator_val, args=[], 
                                          output_types=(tf.int16, tf.int8), 
                                          output_shapes = (((SIZE, SIZE, NUM_IMAGES), ())))
ds_test = tf.data.Dataset.from_generator(generator_test, args=[], 
                                          output_types=(tf.int16, tf.int8), 
                                          output_shapes = (((SIZE, SIZE, NUM_IMAGES), ())))
ds_train = ds_train.batch(8)
ds_val = ds_val.batch(8)
ds_test = ds_test.batch(8)

In [None]:
def crear_clasificador_propio(n_lay_convs = 3, fil1 = 16, fil2 = 32, fil3 = 64, ks=3, s=2, 
                              n_lay_dense = 2, nn1 = 32, nn2 = 16, activacion = 'relu'):
    output = 1
    filtros = [fil1, fil2, fil3]
    neurons = [nn1, nn2]
    clasificador = Sequential(name="ClasificadorPropio")
    clasificador.add(Input(shape=(SIZE, SIZE, NUM_IMAGES)))
    for i in range(n_lay_convs):
        clasificador.add(Conv2D(filters=filtros[i], kernel_size=ks, strides=s, padding="same", activation=activacion))
    clasificador.add(GlobalAveragePooling2D()) 
    for i in range(n_lay_dense):
        clasificador.add(Dense(neurons[i], activation=activacion))
    clasificador.add(Dense(output, activation='sigmoid', name= 'Capa_Salida'))
    clasificador.compile(loss='binary_crossentropy',
                          optimizer='adam', 
                          metrics=["accuracy"],
                          run_eagerly=True)
    return clasificador

In [None]:
clasificador_propio = crear_clasificador_propio()
clasificador_propio.summary()

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min', baseline=None, restore_best_weights=False)

In [None]:
with tf.device('/device:GPU:0'): 
    clasificador_propio.fit(
        ds_train,
        validation_data=ds_val,
        epochs=10,
        callbacks=[early_stopping]
    )

In [None]:
# clasificador_propio.save('./models/clasificador_propio_base.h5')
clasificador_propio = tf.keras.models.load_model('./models/clasificador_propio_base.h5')

In [None]:
def darF1Score(model, dataset, print_report=False):
    y_true, y_pred = [], []
    for x, y in dataset:
        y_true.extend(y.numpy().tolist())
        y_pred.extend(model.predict(x, verbose=0).round().astype(int).tolist())
    if print_report:
        print(classification_report(y_true, y_pred))
    f1 = f1_score(y_true, y_pred, average='weighted')
    return y_true, y_pred, f1

In [None]:
y_true, y_pred, f1 = darF1Score(clasificador_propio, ds_val, print_report=True)

In [None]:
def get_params(params_grid):
    params = {}
    for key, list_values in params_grid.items():
        params[key] = random.choice(list_values)
    return params

In [None]:
def search_model(create_model, params_grid, ds_train, ds_val, n_iter=25):
    params_best_model, best_f1_score = None, 0
    for i in range(n_iter):
        params = get_params(params_grid)
        print('Training inter {} for params:\n'.format(i+1), params)
        model = create_model(**params)
        with tf.device('/device:GPU:0'): 
            model.fit(
                ds_train,
                validation_data=ds_val,
                epochs=25,
                callbacks=[early_stopping]
            )
        f1_score = darF1Score(model, ds_val)[2]
        print('F1-score for iter {}: {}'.format(i+1, f1_score))
        if f1_score > best_f1_score:
            params_best_model = params
            best_f1_score = f1_score
        print('Best F1-score so far:\n', best_f1_score)
        print('Best params so far:\n', params_best_model)
    return params_best_model, best_f1_score

In [None]:
params_grid = {  
    'n_lay_convs': [1, 2, 3],
    'fil1': [20, 16, 12],
    'fil2': [40, 32, 24],
    'fil3': [80, 64, 48],
    'ks': [3, 5, 7],
    's': [2, 3, 4],
    'n_lay_dense': [1, 2],
    'nn1': [40, 32, 24],
    'nn2': [20, 16, 12],
    'activacion': ['relu','sigmoid']
}

params_best_model_propio, best_f1_score_propio = search_model(crear_clasificador_propio, params_grid, ds_train, ds_val)

In [None]:
print('El mejor modelo encontrado por la búsqueda de hiperparámetros obtuvo un F1-score de {} con parámetros \n{}'.format(best_f1_score_propio, params_best_model_propio))

In [None]:
params_best_model_propio = {'n_lay_convs': 3, 'fil1': 12, 'fil2': 24, 'fil3': 80, 'ks': 5, 's': 2, 'n_lay_dense': 1, 'nn1': 40, 'nn2': 16, 'activacion': 'relu'}
best_clasificador_propio = crear_clasificador_propio(**params_best_model_propio)
best_clasificador_propio.summary()

In [None]:
early_stopping = EarlyStopping(monitor='loss', patience=3, verbose=1, mode='min', baseline=None, restore_best_weights=False)
with tf.device('/device:GPU:0'): 
    best_clasificador_propio.fit(
        ds_train.concatenate(ds_val),
        epochs=12,
        callbacks=[early_stopping]
    )

In [None]:
best_clasificador_propio.save('./models/best_clasificador_propio.h5')
# best_clasificador_propio = tf.keras.models.load_model('./models/best_clasificador_propio.h5')

In [None]:
print('--------------------------------------------------------')
print('Reporte para el mejor modelo propio sobre datos de entrenamiento')
y_true, y_pred, f1 = darF1Score(best_clasificador_propio, ds_train, print_report=True)
print('--------------------------------------------------------')
print('Reporte para el mejor modelo propio sobre datos de validación')
y_true, y_pred, f1 = darF1Score(best_clasificador_propio, ds_val, print_report=True)
print('--------------------------------------------------------')
print('Reporte para el mejor modelo propio sobre datos de prueba')
y_true, y_pred, f1 = darF1Score(best_clasificador_propio, ds_test, print_report=True)
plt.figure(figsize=(8,8))
mat = confusion_matrix(y_true, y_pred)
sns.heatmap(mat.T, square=True, annot=True, cmap='Blues', fmt='d', cbar=False)
plt.xlabel('Etiquetas reales')
plt.ylabel('Etiquetas predichas')