### Importação dos pacotes

In [1]:
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import class_weight
import numpy as np
import tensorflow as tf

### Configuração dos conjuntos de dados

In [2]:
# configurando o caminho dos datasets 
path_train = '../2-datasets-preprocessing/dataframes/vinbigdata/train_df.csv'
path_validation = '../2-datasets-preprocessing/dataframes/vinbigdata/validation_df.csv'
path_test = '../2-datasets-preprocessing/dataframes/vinbigdata/test_df.csv'
# criando objetos para armazenar os datasets com os subconjuntos de dados tratados
train_df = pd.read_csv(path_train, sep  = ',', index_col = 0)
validation_df = pd.read_csv(path_validation, sep = ',', index_col = 0)
test_df = pd.read_csv(path_test, sep = ',', index_col = 0)

In [3]:
# visualizando a distribuição de dados no conjunto de treinamento
train_df['class_name'].value_counts()

No finding            2104
Pleural thickening     566
Pulmonary fibrosis     503
Lung Opacity           279
Nodule/Mass            230
Pleural effusion       211
Calcification          106
Infiltration           104
ILD                     93
Atelectasis             27
Pneumothorax            19
Name: class_name, dtype: int64

In [4]:
# visualizando a distribuição de dados no conjunto de validação
validation_df['class_name'].value_counts()

No finding            526
Pleural thickening    154
Pulmonary fibrosis    118
Lung Opacity           68
Nodule/Mass            53
Pleural effusion       47
Calcification          31
Infiltration           29
ILD                    27
Atelectasis             6
Pneumothorax            2
Name: class_name, dtype: int64

In [5]:
# visualizando a distribuição de dados no conjunto de teste
test_df['class_name'].value_counts()

No finding            657
Pleural thickening    188
Pulmonary fibrosis    125
Lung Opacity          101
Pleural effusion       74
Nodule/Mass            64
Infiltration           36
ILD                    32
Calcification          30
Atelectasis            12
Pneumothorax            7
Name: class_name, dtype: int64

In [6]:
# normalizando as imagens de treinamento 
image_generator = ImageDataGenerator(samplewise_center = True, samplewise_std_normalization = True)

# criando o gerador de imagens de treinamento 
train_generator = image_generator.flow_from_dataframe(
                                                      dataframe = train_df,
                                                      directory = '',
                                                      x_col = 'image_path',
                                                      y_col = 'class_name',
                                                      batch_size = 32,
                                                      seed = 42,
                                                      shuffle = True,
                                                      class_mode = 'categorical',
                                                      target_size = (512, 512))
# criando o gerador de imagens de validação 
valid_generator = image_generator.flow_from_dataframe(
                                                      dataframe = validation_df,
                                                      directory = '', 
                                                      x_col = 'image_path',
                                                      y_col = 'class_name',
                                                      batch_size = 32,
                                                      seed = 42,
                                                      shuffle = True,
                                                      class_mode = 'categorical',
                                                      target_size = (512, 512))

# normalizando as imagens de teste 
test_datagen = ImageDataGenerator(samplewise_center = True, samplewise_std_normalization = True)

test_generator = test_datagen.flow_from_dataframe(
                                                  dataframe = test_df, 
                                                  directory = '',
                                                  x_col = 'image_path',
                                                  y_col = 'class_name',
                                                  batch_size = 32,
                                                  seed = 42,
                                                  shuffle = True,
                                                  class_mode = 'categorical',
                                                  target_size = (512, 512))

Found 4242 validated image filenames belonging to 11 classes.
Found 1061 validated image filenames belonging to 11 classes.
Found 1326 validated image filenames belonging to 11 classes.


In [7]:
# visualizando atributos numéricos de cada uma das classes
train_generator.class_indices

{'Atelectasis': 0,
 'Calcification': 1,
 'ILD': 2,
 'Infiltration': 3,
 'Lung Opacity': 4,
 'No finding': 5,
 'Nodule/Mass': 6,
 'Pleural effusion': 7,
 'Pleural thickening': 8,
 'Pneumothorax': 9,
 'Pulmonary fibrosis': 10}

In [8]:
# organizando um dicionário para realizar o balanceamento nos dados das classes
class_weights = class_weight.compute_class_weight('balanced', np.unique(train_df['class_name']),
                                                  train_df['class_name'])
class_weights = {0: class_weights[0], 1: class_weights[1], 2: class_weights[2], 
                 3: class_weights[3], 4: class_weights[4], 5: class_weights[5],
                 6: class_weights[6], 7: class_weights[7], 8: class_weights[8],
                 9: class_weights[9], 10: class_weights[10]}

 'No finding' 'Nodule/Mass' 'Pleural effusion' 'Pleural thickening'
 'Pneumothorax' 'Pulmonary fibrosis'], y=39441         Nodule/Mass
6231           No finding
31582          No finding
45648          No finding
29139          No finding
               ...       
3404     Pleural effusion
15909    Pleural effusion
32371          No finding
2765        Calcification
4237         Lung Opacity
Name: class_name, Length: 4242, dtype: object as keyword args. From version 1.0 (renaming of 0.25) passing these as positional arguments will result in an error


### Configurano a rede neural artificial

In [9]:
input_a = tf.keras.layers.Input(shape = (512, 512, 3))
x = tf.keras.layers.Conv2D(filters = 32, kernel_size = (2, 2), activation = tf.nn.relu)(input_a)
x = tf.keras.layers.MaxPooling2D(pool_size = (2, 2))(x)
x = tf.keras.layers.Conv2D(filters = 64, kernel_size = (3, 3), activation = tf.nn.relu)(x)
x = tf.keras.layers.Conv2D(filters = 64, kernel_size = (3, 3), activation = tf.nn.relu)(x)
x = tf.keras.layers.MaxPooling2D(pool_size = (2, 2))(x)
x = tf.keras.layers.Conv2D(filters = 128, kernel_size = (2, 2), activation = tf.nn.relu)(x)
x = tf.keras.layers.Conv2D(filters = 128, kernel_size = (2, 2), activation = tf.nn.relu)(x)
x = tf.keras.layers.MaxPooling2D(pool_size = (2, 2))(x)
x = tf.keras.layers.Conv2D(filters = 256, kernel_size = (2, 2), activation = tf.nn.relu)(x)
x = tf.keras.layers.Conv2D(filters = 256, kernel_size = (3, 3), activation = tf.nn.relu)(x)
x = tf.keras.layers.MaxPooling2D(pool_size = (2, 2))(x)
x = tf.keras.layers.Conv2D(filters = 512, kernel_size = (2, 2), activation = tf.nn.relu)(x)
x = tf.keras.layers.Conv2D(filters = 512, kernel_size = (3, 3), activation = tf.nn.relu)(x)
x = tf.keras.layers.MaxPooling2D(pool_size = (2, 2))(x)
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size = (2, 2), activation = tf.nn.relu)(x)
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size = (3, 3), activation = tf.nn.relu)(x)
x = tf.keras.layers.AveragePooling2D(pool_size = (2, 2))(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(units = 11, activation = tf.nn.softmax)(x)
model = tf.keras.models.Model(inputs = input_a, outputs = x)

In [10]:
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['acc']) 

In [11]:
# definindo o caminho pelo qual os pesos serão armazenados 
filepath = "transferlearning_weights.hdf5"
# callback para salvar o melhor valor dos pesos em relação ao desempenho com os dados de validação 
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')
callbacks = [checkpoint]

In [None]:
# treinando a rede neural convolucional
history = model.fit_generator(generator = train_generator, 
                              steps_per_epoch = 4242 // 32, 
                              validation_data = valid_generator, 
                              validation_steps = 1061 // 32,
                              callbacks = callbacks, epochs = 10, class_weight = class_weights,
                              verbose = 1)



Epoch 1/10