# Conjunto de Dados 4: *ChestXray-NIH*
***
> Disponível em: <https://nihcc.app.box.com/v/ChestXray-NIHCC/folder/36938765345>. Acesso em 19 fev. 2021.

### Importação dos pacotes necessários

In [1]:
from keras.preprocessing.image import ImageDataGenerator
import urllib.request
#urllib.request.urlretrieve('https://raw.githubusercontent.com/Alyssonmach/cnn-lung-diseases/main/assets/cxr8_dados.py', 'cxr8_dados.py')
urllib.request.urlretrieve('https://raw.githubusercontent.com/Alyssonmach/histogram-equalization/main/histogram_equalization.py', 'histogram_equalization.py')
from histogram_equalization import histogram_equalization
from cxr8_dados import data_download, organize_csv, download_images, train_validation_test_split 

import warnings
warnings.filterwarnings("ignore")

### Baixando os dados

In [2]:
# baixando dataframe
data_download('https://raw.githubusercontent.com/Alyssonmach/cnn-lung-diseases/main/assets/dataframe-info.csv', 'dataframe-info.csv')

In [None]:
# baixando as imagens a serem utilizadas
download_images()

In [2]:
# especificando o diretório com as imagens 
IMAGE_DIR = '/content/drive/MyDrive/images'

In [3]:
# obtendo o dataframe organizando
dataframe, (normal, anormal) = organize_csv('/content/dataframe-info.csv')

In [4]:
# visualizando o dataframe
print(dataframe.head())
print('dataframe shape:', dataframe.shape)

        Image Index  labels
0  00011895_001.png       0
1  00015384_004.png       0
2  00028131_011.png       0
3  00028792_001.png       1
4  00010716_000.png       0
dataframe shape: (87982, 2)


In [5]:
# particionando o dataset em dados de treino, validação e teste  
train_df, validation_df, test_df = train_validation_test_split(dataframe)

In [6]:
# visualizando detalhes dos dados de treinamento
print(train_df.head())
print('train_df shape:', train_df.shape)

            Image Index  labels
68473  00013172_000.png       1
86448  00010007_169.png       1
61928  00016051_010.png       0
44912  00004594_002.png       0
20587  00007406_003.png       0
train_df shape: (84497, 2)


In [7]:
# visualizando detalhes dos dados de treinamento
print(validation_df.head())
print('validation_df shape:', validation_df.shape)

            Image Index  labels
57440  00016944_004.png       1
85690  00010695_015.png       1
52337  00011380_014.png       1
11606  00010749_000.png       1
27801  00012976_006.png       0
validation_df shape: (1725, 2)


In [8]:
# visualizando detalhes dos dados de teste 
print(test_df.head())
print('validation_df shape:', test_df.shape)

            Image Index  labels
3423   00004605_005.png       1
34940  00018610_042.png       1
1864   00029870_000.png       0
38846  00007632_014.png       1
46844  00001286_003.png       1
validation_df shape: (1760, 2)


### Gerados de Dados para a rede pelo Tensorflow

> **Opção 1**

In [10]:
# normalizando as imagens de treinamento e aplicando aumento de dados
image_generator = ImageDataGenerator(samplewise_center = True, samplewise_std_normalization = True,
                                     rotation_range = 10, zoom_range = 0.2)

# criando o gerador de imagens de treinamento 
train_generator = image_generator.flow_from_dataframe(
                                                      dataframe = train_df,
                                                      directory = IMAGE_DIR,
                                                      x_col = 'Image Index',
                                                      y_col = 'labels',
                                                      batch_size = 256,
                                                      seed = 42,
                                                      shuffle = True,
                                                      class_mode = 'raw',
                                                      color_mode = 'rgb',
                                                      target_size = (256, 256))
# criando o gerador de imagens de validação 
valid_generator = image_generator.flow_from_dataframe(
                                                      dataframe = validation_df,
                                                      directory = IMAGE_DIR, 
                                                      x_col = 'Image Index',
                                                      y_col = 'labels',
                                                      batch_size = 128,
                                                      seed = 42,
                                                      shuffle = True,
                                                      class_mode = 'raw',
                                                      target_size = (256, 256))

# normalizando as imagens de teste 
test_datagen = ImageDataGenerator(samplewise_center = True, samplewise_std_normalization = True)

test_generator = test_datagen.flow_from_dataframe(
                                                  dataframe = test_df, 
                                                  directory = IMAGE_DIR,
                                                  x_col = 'Image Index',
                                                  y_col = 'labels',
                                                  batch_size = 128,
                                                  seed = 42,
                                                  shuffle = True,
                                                  class_mode = 'raw',
                                                  target_size = (256, 256))

Found 36449 validated image filenames.
Found 741 validated image filenames.
Found 760 validated image filenames.


- **Opção 2**

In [9]:
# normalizando as imagens de treinamento e aplicando aumento de dados
image_generator = ImageDataGenerator(rescale = 1./255., rotation_range = 10, zoom_range = 0.2)

# criando o gerador de imagens de treinamento 
train_generator = image_generator.flow_from_dataframe(
                                                      dataframe = train_df,
                                                      directory = IMAGE_DIR,
                                                      x_col = 'Image Index',
                                                      y_col = 'labels',
                                                      batch_size = 256,
                                                      seed = 42,
                                                      shuffle = True,
                                                      class_mode = 'raw',
                                                      color_mode = 'rgb',
                                                      target_size = (256, 256))
# criando o gerador de imagens de validação 
valid_generator = image_generator.flow_from_dataframe(
                                                      dataframe = validation_df,
                                                      directory = IMAGE_DIR, 
                                                      x_col = 'Image Index',
                                                      y_col = 'labels',
                                                      batch_size = 128,
                                                      seed = 42,
                                                      shuffle = True,
                                                      class_mode = 'raw',
                                                      target_size = (256, 256))

# normalizando as imagens de teste 
test_datagen = ImageDataGenerator(rescale = 1./255.)

test_generator = test_datagen.flow_from_dataframe(
                                                  dataframe = test_df, 
                                                  directory = IMAGE_DIR,
                                                  x_col = 'Image Index',
                                                  y_col = 'labels',
                                                  batch_size = 128,
                                                  seed = 42,
                                                  shuffle = True,
                                                  class_mode = 'raw',
                                                  target_size = (256, 256))

Found 36449 validated image filenames.
Found 741 validated image filenames.
Found 760 validated image filenames.
