# Experimento 5
***
- Rede Inception
- Conjunto de Dados: NIH
- Analisando o treinamento de uma rede com multiclassificadores

### Importação dos pacotes

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, accuracy_score
import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow as tf

import warnings
warnings.filterwarnings("ignore")

### Importação dos dados
***
- Todo o dataset foi pré-processado anteriormente

In [3]:
# importando os dataframes dos dados de treinamento, validação e teste
train_df = pd.read_csv('/content/drive/MyDrive/train_df.csv', sep = ',', index_col=  0)
validation_df = pd.read_csv('/content/drive/MyDrive/validation_df.csv', sep = ',', index_col=  0)
test_df = pd.read_csv('/content/drive/MyDrive/test_df.csv' , sep = ',', index_col=  0)

In [4]:
# tornando as classes na coluna 'labels' categórica
train_df.loc[train_df.labels == 1, 'labels'] = 'abnormal'
train_df.loc[train_df.labels == 0, 'labels'] = 'normal'

validation_df.loc[validation_df.labels == 1, 'labels'] = 'abnormal'
validation_df.loc[validation_df.labels == 0, 'labels'] = 'normal'

test_df.loc[test_df.labels == 1, 'labels'] = 'abnormal'
test_df.loc[test_df.labels == 0, 'labels'] = 'normal'

In [5]:
# ajustando a quantidade de exemplos nos dados de treinamento relativo a radiográficas normais
train_normal_cases = train_df[train_df.labels == 'normal']
train_abnormal_cases = train_df[train_df.labels == 'abnormal']
train_normal_cases, _ = train_test_split(train_normal_cases, test_size = 0.5, random_state = 42)
train = np.concatenate((train_normal_cases, train_abnormal_cases))
train_df = pd.DataFrame(train, columns = ['Image Index', 'finding_labels', 'labels'])

In [6]:
# visualizando como é a organização do dataframe pré-processado
train_df.head()

Unnamed: 0,Image Index,finding_labels,labels
0,/content/drive/MyDrive/images-nih4/images/0002...,No Finding,normal
1,/content/drive/MyDrive/images-nih3/images/0001...,No Finding,normal
2,/content/drive/MyDrive/images-nih2/images/0000...,No Finding,normal
3,/content/drive/MyDrive/images-nih2/images/0000...,No Finding,normal
4,/content/drive/MyDrive/images-nih2/images/0001...,No Finding,normal


In [7]:
# visualizando a quantidade de imagens disponíveis por classe
train_df['finding_labels'].value_counts()

No Finding            27811
Infiltration           8770
Atelectasis            3902
Effusion               3645
Nodule                 2497
Pneumothorax           2024
Consolidation          1224
Pleural_Thickening     1028
Emphysema               818
Fibrosis                671
Edema                   578
Pneumonia               295
Name: finding_labels, dtype: int64

In [8]:
# organizando um dicionário para realizar o balanceamento nos dados das classes
class_weights = class_weight.compute_class_weight('balanced', np.unique(train_df['finding_labels']),
                                                  train_df['finding_labels'])
class_weight = {0: class_weights[0], 1: class_weights[1], 2: class_weights[2], 
                3: class_weights[3], 4: class_weights[4], 5: class_weights[5],
                6: class_weights[6], 7: class_weights[7], 8: class_weights[8],
                9: class_weights[9], 10: class_weights[10], 11: class_weights[11]}

In [9]:
# normalizando as imagens de treinamento e aplicando aumento de dados
image_generator = ImageDataGenerator(rescale = 1./255., rotation_range = 10, zoom_range = 0.2)

# criando o gerador de imagens de treinamento 
train_generator = image_generator.flow_from_dataframe(
                                                      dataframe = train_df,
                                                      x_col = 'Image Index',
                                                      y_col = 'finding_labels',
                                                      batch_size = 256,
                                                      seed = 42,
                                                      shuffle = True,
                                                      class_mode = 'categorical',
                                                      color_mode = 'rgb',
                                                      target_size = (256, 256))

# normalizando as imagens de teste 
test_datagen = ImageDataGenerator(rescale = 1./255.)

# criando o gerador de imagens de validação 
valid_generator = test_datagen.flow_from_dataframe(
                                                    dataframe = validation_df,
                                                    x_col = 'Image Index',
                                                    y_col = 'finding_labels',
                                                    batch_size = 128,
                                                    seed = 42,
                                                    shuffle = True,
                                                    class_mode = 'categorical',
                                                    target_size = (256, 256))

test_generator = test_datagen.flow_from_dataframe(
                                                  dataframe = test_df, 
                                                  x_col = 'Image Index',
                                                  y_col = 'finding_labels',
                                                  batch_size = 128,
                                                  seed = 42,
                                                  shuffle = True,
                                                  class_mode = 'categorical',
                                                  target_size = (256, 256))

Found 53259 validated image filenames belonging to 12 classes.
Found 4268 validated image filenames belonging to 12 classes.
Found 2640 validated image filenames belonging to 12 classes.


In [10]:
# visualizando a ordem numérica das classes nos dados de treinamento
train_generator.class_indices

{'Atelectasis': 0,
 'Consolidation': 1,
 'Edema': 2,
 'Effusion': 3,
 'Emphysema': 4,
 'Fibrosis': 5,
 'Infiltration': 6,
 'No Finding': 7,
 'Nodule': 8,
 'Pleural_Thickening': 9,
 'Pneumonia': 10,
 'Pneumothorax': 11}

### Preparando a rede neural convolucional 

In [11]:
# baixando os pesos treinados da rede inception
!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 \
    -O /tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5

--2021-04-14 23:06:50--  https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.142.128, 74.125.195.128, 74.125.20.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.142.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 87910968 (84M) [application/x-hdf]
Saving to: ‘/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5’


2021-04-14 23:06:51 (106 MB/s) - ‘/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5’ saved [87910968/87910968]



In [12]:
# referenciando o local em que os pesos estão armazenados
local_weights_file = '/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'

# carregando a arquitetura inception pré-treinada
pre_trained_model = InceptionV3(input_shape = (256, 256, 3), include_top = False, weights = None)

# carregando os pesos treinados com outros dados 
pre_trained_model.load_weights(local_weights_file)

# definindo as flags iniciais  
pre_trained_model.trainable = True
set_trainable = False

# para a arquitetura inception, a rede será retreinada a partir da camada 'mixed6'
for layer in pre_trained_model.layers:
    if layer.name == 'mixed6':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

# obtendo a última camada como sendo a nomeada por 'mixed7'
last_layer = pre_trained_model.get_layer('mixed7')
last_output = last_layer.output

In [13]:
# definindo uma camada de achatamento
x = layers.Flatten()(last_output)
# conecatando a rede uma camada com 1024 neurônios e função de ativação relu
x = layers.Dense(units = 1024, activation = tf.nn.relu)(x)     
# conecatando a rede uma camada com 128 neurônios e função de ativação relu
x = layers.Dense(units = 512, activation = tf.nn.relu)(x) 
# aplicando uma camada de dropout com uma taxa de 20% (normalização)
x = layers.Dropout(rate = 0.2)(x)                  
# adicionando uma camada de saída com um neurônio e uma função de ativação sigmoide
x = layers.Dense(units = 12, activation = tf.nn.softmax)(x)           

# conecatando as camadas definidas acima com a arquitetura inception
model = Model(pre_trained_model.input, x) 

# compilando a rede 
model.compile(optimizer = optimizers.RMSprop(learning_rate = 0.0001), loss = 'categorical_crossentropy', 
              metrics = ['acc']) 

In [None]:
# definindo o caminho pelo qual os pesos serão armazenados 
filepath = "/content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.0.hdf5"
# callback para salvar o melhor valor dos pesos em relação ao desempenho com os dados de validação 
checkpoint = ModelCheckpoint(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')

In [None]:
# definindo um array de callbacks
callbacks = [checkpoint]

In [None]:
# treinando a rede neural convolucional
history = model.fit_generator(train_generator, steps_per_epoch = 53263 // 256, 
                              validation_data = valid_generator, validation_steps = 4268 // 128,
                              callbacks = callbacks, epochs = 10, class_weight = class_weight,
                              use_multiprocessing = True, workers = 8)

Epoch 1/10

Epoch 00001: val_acc improved from -inf to 0.36529, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.0.hdf5
Epoch 2/10

Epoch 00002: val_acc did not improve from 0.36529
Epoch 3/10

Epoch 00003: val_acc did not improve from 0.36529
Epoch 4/10

Epoch 00004: val_acc did not improve from 0.36529
Epoch 5/10

Epoch 00005: val_acc did not improve from 0.36529
Epoch 6/10

Epoch 00006: val_acc did not improve from 0.36529
Epoch 7/10

Epoch 00007: val_acc did not improve from 0.36529
Epoch 8/10

Epoch 00008: val_acc did not improve from 0.36529
Epoch 9/10

Epoch 00009: val_acc improved from 0.36529 to 0.36742, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.0.hdf5
Epoch 10/10

Epoch 00010: val_acc did not improve from 0.36742


In [None]:
# definindo o caminho pelo qual os pesos serão armazenados 
filepath = "/content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.1.hdf5"
# callback para salvar o melhor valor dos pesos em relação ao desempenho com os dados de validação 
checkpoint = ModelCheckpoint(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')

# definindo um array de callbacks
callbacks = [checkpoint]

# treinando a rede neural convolucional
history = model.fit_generator(train_generator, steps_per_epoch = 53263 // 256, 
                              validation_data = valid_generator, validation_steps = 4268 // 128,
                              callbacks = callbacks, epochs = 5, class_weight = class_weight,
                              use_multiprocessing = True, workers = 8)

Epoch 1/5

Epoch 00001: val_acc improved from -inf to 0.30350, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.1.hdf5
Epoch 2/5

Epoch 00002: val_acc did not improve from 0.30350
Epoch 3/5

Epoch 00003: val_acc did not improve from 0.30350
Epoch 4/5

Epoch 00004: val_acc did not improve from 0.30350
Epoch 5/5

Epoch 00005: val_acc did not improve from 0.30350


In [None]:
# testando a capacidade de generalização do modelo com os dados de teste
model.evaluate(test_generator)



[2.0524284839630127, 0.2643939256668091]

In [None]:
# salvando o modelo previamente treinado
model.save('/content/drive/MyDrive/weights-nih/inception/modelv1.1')

INFO:tensorflow:Assets written to: /content/drive/MyDrive/weights-nih/inception/modelv1.1/assets


### Continuação do treinamento

In [None]:
# carregando o modelo salvo
model = tf.keras.models.load_model('/content/drive/MyDrive/weights-nih/inception/modelv1.1')
# carregando os pesos previamente treinados
model.load_weights('/content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.1.hdf5')

In [None]:
# definindo o caminho pelo qual os pesos serão armazenados 
filepath = "/content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.2.hdf5"
# callback para salvar o melhor valor dos pesos em relação ao desempenho com os dados de validação 
checkpoint = ModelCheckpoint(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')

# definindo um array de callbacks
callbacks = [checkpoint]

# treinando a rede neural convolucional
history = model.fit_generator(train_generator, steps_per_epoch = 53263 // 256, 
                              validation_data = valid_generator, validation_steps = 4268 // 128,
                              callbacks = callbacks, epochs = 15, class_weight = class_weight,
                              use_multiprocessing = True, workers = 8)

Epoch 1/15

Epoch 00001: val_acc improved from -inf to 0.32884, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.2.hdf5
Epoch 2/15

Epoch 00002: val_acc did not improve from 0.32884
Epoch 3/15

Epoch 00003: val_acc did not improve from 0.32884
Epoch 4/15

Epoch 00004: val_acc did not improve from 0.32884
Epoch 5/15

Epoch 00005: val_acc improved from 0.32884 to 0.38873, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.2.hdf5
Epoch 6/15

Epoch 00006: val_acc did not improve from 0.38873
Epoch 7/15

Epoch 00007: val_acc did not improve from 0.38873
Epoch 8/15

Epoch 00008: val_acc did not improve from 0.38873
Epoch 9/15

Epoch 00009: val_acc did not improve from 0.38873
Epoch 10/15

Epoch 00010: val_acc did not improve from 0.38873
Epoch 11/15

Epoch 00011: val_acc did not improve from 0.38873
Epoch 12/15

Epoch 00012: val_acc improved from 0.38873 to 0.39181, saving model to /content/drive/MyDrive/weights-ni

In [None]:
# definindo o caminho pelo qual os pesos serão armazenados 
filepath = "/content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.3.hdf5"
# callback para salvar o melhor valor dos pesos em relação ao desempenho com os dados de validação 
checkpoint = ModelCheckpoint(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')

# definindo um array de callbacks
callbacks = [checkpoint]

# treinando a rede neural convolucional
history = model.fit_generator(train_generator, steps_per_epoch = 53263 // 256, 
                              validation_data = valid_generator, validation_steps = 4268 // 128,
                              callbacks = callbacks, epochs = 15, class_weight = class_weight,
                              use_multiprocessing = True, workers = 8)

Epoch 1/15

Epoch 00001: val_acc improved from -inf to 0.44389, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.3.hdf5
Epoch 2/15

Epoch 00002: val_acc did not improve from 0.44389
Epoch 3/15

Epoch 00003: val_acc did not improve from 0.44389
Epoch 4/15

Epoch 00004: val_acc did not improve from 0.44389
Epoch 5/15

Epoch 00005: val_acc did not improve from 0.44389
Epoch 6/15

Epoch 00006: val_acc did not improve from 0.44389
Epoch 7/15

Epoch 00007: val_acc did not improve from 0.44389
Epoch 8/15

Epoch 00008: val_acc improved from 0.44389 to 0.49929, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.3.hdf5
Epoch 9/15

Epoch 00009: val_acc did not improve from 0.49929
Epoch 10/15

Epoch 00010: val_acc improved from 0.49929 to 0.50592, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.3.hdf5
Epoch 11/15

Epoch 00011: val_acc did not improve from 0.50592
Epoch 12/15

Epo

In [None]:
# salvando o modelo previamente treinado
model.save('/content/drive/MyDrive/weights-nih/inception/modelv1.3')

INFO:tensorflow:Assets written to: /content/drive/MyDrive/weights-nih/inception/modelv1.3/assets


In [None]:
# testando a capacidade de predição do modelo com dados de teste
model.evaluate(test_generator)



[2.1838455200195312, 0.3973484933376312]

### Continuação da etapa de treinamento

In [14]:
# carrregando os pesos previamente treinados
model.load_weights('/content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.3.hdf5')

In [15]:
# definindo o caminho pelo qual os pesos serão armazenados 
filepath = "/content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.4.hdf5"
# callback para salvar o melhor valor dos pesos em relação ao desempenho com os dados de validação 
checkpoint = ModelCheckpoint(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')

# definindo um array de callbacks
callbacks = [checkpoint]

# treinando a rede neural convolucional
history = model.fit_generator(train_generator, steps_per_epoch = 53263 // 256, 
                              validation_data = valid_generator, validation_steps = 4268 // 128,
                              callbacks = callbacks, epochs = 15, class_weight = class_weight,
                              use_multiprocessing = True, workers = 8)

Epoch 1/15

Epoch 00001: val_acc improved from -inf to 0.49645, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.4.hdf5
Epoch 2/15

Epoch 00002: val_acc improved from 0.49645 to 0.50284, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.4.hdf5
Epoch 3/15

Epoch 00003: val_acc did not improve from 0.50284
Epoch 4/15

Epoch 00004: val_acc did not improve from 0.50284
Epoch 5/15

Epoch 00005: val_acc did not improve from 0.50284
Epoch 6/15

Epoch 00006: val_acc did not improve from 0.50284
Epoch 7/15

Epoch 00007: val_acc improved from 0.50284 to 0.50402, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.4.hdf5
Epoch 8/15

Epoch 00008: val_acc improved from 0.50402 to 0.51989, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.4.hdf5
Epoch 9/15

Epoch 00009: val_acc did not improve from 0.51989
Epoch 10/15

Epoch 00010: val_acc did not

In [16]:
# salvando o modelo previamente treinado
model.save('/content/drive/MyDrive/weights-nih/inception/modelv1.4')

INFO:tensorflow:Assets written to: /content/drive/MyDrive/weights-nih/inception/modelv1.4/assets
