# Experimento 5
***
- Rede Inception
- Conjunto de Dados: NIH
- Analisando o treinamento de uma rede com multiclassificadores

### Importação dos pacotes

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, accuracy_score
import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow as tf

import warnings
warnings.filterwarnings("ignore")

### Importação dos dados
***
- Todos os dados já foram pré-processados anteriormente

In [2]:
# importando os dataframes dos dados de treinamento, validação e teste
train_df = pd.read_csv('/content/drive/MyDrive/train_df.csv', sep = ',', index_col=  0)
validation_df = pd.read_csv('/content/drive/MyDrive/validation_df.csv', sep = ',', index_col=  0)
test_df = pd.read_csv('/content/drive/MyDrive/test_df.csv' , sep = ',', index_col=  0)

In [3]:
# visualizando como é a organização do dataframe pré-processado
train_df.head()

Unnamed: 0,Image Index,finding_labels,labels
87745,/content/drive/MyDrive/images-nih1/images/0000...,Consolidation,1
82970,/content/drive/MyDrive/images-nih1/images/0000...,No Finding,0
51704,/content/drive/MyDrive/images-nih3/images/0001...,No Finding,0
38340,/content/drive/MyDrive/images-nih3/images/0001...,No Finding,0
71153,/content/drive/MyDrive/images-nih4/images/0002...,Infiltration,1


In [4]:
# visualizando a quantidade de imagens disponíveis por classe
train_df['finding_labels'].value_counts()

No Finding            55622
Infiltration           8770
Atelectasis            3902
Effusion               3645
Nodule                 2497
Pneumothorax           2024
Consolidation          1224
Pleural_Thickening     1028
Emphysema               818
Fibrosis                671
Edema                   578
Pneumonia               295
Name: finding_labels, dtype: int64

In [5]:
# organizando o balanceamento das classes
total_train_examples = 55622 + 8770 + 3902 + 3645 + 2497 + 2024 + 1224 + 1028 + 818 + 671 + 578 + 295
class_weight = {0: total_train_examples / 55622,
                1: total_train_examples / 8770,
                2: total_train_examples / 3902,
                3: total_train_examples / 3645,
                4: total_train_examples / 2497,
                5: total_train_examples / 2024,
                6: total_train_examples / 1224,
                7: total_train_examples / 1028,
                8: total_train_examples / 818,
                9: total_train_examples / 671,
                10: total_train_examples / 578,
                11: total_train_examples / 295}

In [6]:
# normalizando as imagens de treinamento e aplicando aumento de dados
image_generator = ImageDataGenerator(rescale = 1./255., rotation_range = 10, zoom_range = 0.2)

# criando o gerador de imagens de treinamento 
train_generator = image_generator.flow_from_dataframe(
                                                      dataframe = train_df,
                                                      x_col = 'Image Index',
                                                      y_col = 'finding_labels',
                                                      batch_size = 256,
                                                      seed = 42,
                                                      shuffle = True,
                                                      class_mode = 'categorical',
                                                      color_mode = 'rgb',
                                                      target_size = (256, 256))

# normalizando as imagens de teste 
test_datagen = ImageDataGenerator(rescale = 1./255.)

# criando o gerador de imagens de validação 
valid_generator = test_datagen.flow_from_dataframe(
                                                    dataframe = validation_df,
                                                    x_col = 'Image Index',
                                                    y_col = 'finding_labels',
                                                    batch_size = 128,
                                                    seed = 42,
                                                    shuffle = True,
                                                    class_mode = 'categorical',
                                                    target_size = (256, 256))

test_generator = test_datagen.flow_from_dataframe(
                                                  dataframe = test_df, 
                                                  x_col = 'Image Index',
                                                  y_col = 'finding_labels',
                                                  batch_size = 128,
                                                  seed = 42,
                                                  shuffle = True,
                                                  class_mode = 'categorical',
                                                  target_size = (256, 256))

Found 81074 validated image filenames belonging to 12 classes.
Found 4268 validated image filenames belonging to 12 classes.
Found 2640 validated image filenames belonging to 12 classes.


### Preparando a rede neural convolucional 

In [7]:
# baixando os pesos treinados da rede inception
!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 \
    -O /tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5

--2021-04-09 12:20:38--  https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Resolving storage.googleapis.com (storage.googleapis.com)... 108.177.120.128, 142.250.128.128, 142.251.6.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|108.177.120.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 87910968 (84M) [application/x-hdf]
Saving to: ‘/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5’


2021-04-09 12:20:38 (118 MB/s) - ‘/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5’ saved [87910968/87910968]



In [8]:
# referenciando o local em que os pesos estão armazenados
local_weights_file = '/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'

# carregando a arquitetura inception pré-treinada
pre_trained_model = InceptionV3(input_shape = (256, 256, 3), include_top = False, weights = None)

# carregando os pesos treinados com outros dados 
pre_trained_model.load_weights(local_weights_file)

# definindo as flags iniciais  
pre_trained_model.trainable = True
set_trainable = False

# para a arquitetura inception, a rede será retreinada a partir da camada 'mixed6'
for layer in pre_trained_model.layers:
    if layer.name == 'mixed6':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

# obtendo a última camada como sendo a nomeada por 'mixed7'
last_layer = pre_trained_model.get_layer('mixed7')
last_output = last_layer.output

In [9]:
# adicionando uma camada de Global Average Pooling
x = layers.GlobalAveragePooling2D()(last_output)
# conecatando a rede uma camada com 1024 neurônios e função de ativação relu
x = layers.Dense(units = 512, activation = tf.nn.relu)(x)     
# conecatando a rede uma camada com 128 neurônios e função de ativação relu
x = layers.Dense(units = 256, activation = tf.nn.relu)(x) 
# aplicando uma camada de dropout com uma taxa de 20% (normalização)
x = layers.Dropout(rate = 0.2)(x)                  
# adicionando uma camada de saída com um neurônio e uma função de ativação sigmoide
x = layers.Dense  (units = 12, activation = tf.nn.softmax)(x)           

# conecatando as camadas definidas acima com a arquitetura inception
model = Model(pre_trained_model.input, x) 

# compilando a rede 
model.compile(optimizer = optimizers.RMSprop(learning_rate = 0.0001), loss = 'categorical_crossentropy', 
              metrics = ['acc']) 

In [10]:
# definindo o caminho pelo qual os pesos serão armazenados 
filepath = "/content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.0.hdf5"
# callback para salvar o melhor valor dos pesos em relação ao desempenho com os dados de validação 
checkpoint = ModelCheckpoint(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')

In [11]:
# definindo um array de callbacks
callbacks = [checkpoint]

In [12]:
# treinando a rede neural convolucional
history = model.fit_generator(train_generator, steps_per_epoch = 81072 // 256, 
                              validation_data = valid_generator, validation_steps = 4268 // 128,
                              callbacks = callbacks, epochs = 1, class_weight = class_weight,
                              use_multiprocessing = True, workers = 4)


Epoch 00001: val_acc improved from -inf to 0.68229, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.0.hdf5


In [14]:
# definindo o caminho pelo qual os pesos serão armazenados 
filepath = "/content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.1.hdf5"
# callback para salvar o melhor valor dos pesos em relação ao desempenho com os dados de validação 
checkpoint = ModelCheckpoint(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')
# definindo um array de callbacks
callbacks = [checkpoint]

In [15]:
# treinando a rede neural convolucional
history = model.fit_generator(train_generator, steps_per_epoch = 81072 // 256, 
                              validation_data = valid_generator, validation_steps = 4268 // 128,
                              callbacks = callbacks, epochs = 1, class_weight = class_weight,
                              use_multiprocessing = True, workers = 8)


Epoch 00001: val_acc improved from -inf to 0.68111, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.1.hdf5


In [None]:
# treinando a rede neural convolucional
history = model.fit_generator(train_generator, steps_per_epoch = 81072 // 256, 
                              validation_data = valid_generator, validation_steps = 4268 // 128,
                              callbacks = callbacks, epochs = 10, class_weight = class_weight,
                              use_multiprocessing = True, workers = 16)

Epoch 1/10

Epoch 00001: val_acc improved from 0.68111 to 0.68300, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.1.hdf5
Epoch 2/10

Epoch 00002: val_acc did not improve from 0.68300
Epoch 3/10

Epoch 00003: val_acc improved from 0.68300 to 0.68750, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.1.hdf5
Epoch 4/10

Epoch 00004: val_acc did not improve from 0.68750
Epoch 5/10

Epoch 00005: val_acc did not improve from 0.68750
Epoch 6/10

Epoch 00006: val_acc did not improve from 0.68750
Epoch 7/10

Epoch 00007: val_acc did not improve from 0.68750
Epoch 8/10

Epoch 00008: val_acc did not improve from 0.68750
Epoch 9/10

Epoch 00009: val_acc improved from 0.68750 to 0.68963, saving model to /content/drive/MyDrive/weights-nih/inception/transferlearning_weights_v1.1.hdf5
Epoch 10/10