# First Try CNN

In [1]:
#TensorFlow
import tensorflow as tf

#Standarts
import numpy as np
import pandas as pd
import os

## Dataset filtering

In [2]:
csvpath   = r'D:\Downloads\NIH'
imagepath = r'D:\Downloads\NIH\images-224\3channel'

In [3]:
df = pd.read_csv(os.path.join(csvpath,'Data_Entry_2017.csv')).drop(['Unnamed: 11'], axis = 1)

In [7]:
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y]
0,00000001_000.png,Cardiomegaly,0,1,058Y,M,PA,2682,2749,0.143,0.143
1,00000001_001.png,Cardiomegaly|Emphysema,1,1,058Y,M,PA,2894,2729,0.143,0.143
2,00000001_002.png,Cardiomegaly|Effusion,2,1,058Y,M,PA,2500,2048,0.168,0.168
3,00000002_000.png,No Finding,0,2,081Y,M,PA,2500,2048,0.171,0.171
4,00000003_000.png,Hernia,0,3,081Y,F,PA,2582,2991,0.143,0.143


In [9]:
errorlist = pd.read_csv('errorlist.csv')

In [10]:
df.set_index(df['Image Index']).drop(index = errorlist['0'], inplace = True)

In [11]:
df['Finding Labels'] = df['Finding Labels'].apply(lambda x: x.split('|')[0] if '|' in x else x)

In [12]:
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y]
0,00000001_000.png,Cardiomegaly,0,1,058Y,M,PA,2682,2749,0.143,0.143
1,00000001_001.png,Cardiomegaly,1,1,058Y,M,PA,2894,2729,0.143,0.143
2,00000001_002.png,Cardiomegaly,2,1,058Y,M,PA,2500,2048,0.168,0.168
3,00000002_000.png,No Finding,0,2,081Y,M,PA,2500,2048,0.171,0.171
4,00000003_000.png,Hernia,0,3,081Y,F,PA,2582,2991,0.143,0.143


In [13]:
labels = pd.get_dummies(df['Finding Labels'])

In [14]:
labels.columns

Index(['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Effusion',
       'Emphysema', 'Fibrosis', 'Hernia', 'Infiltration', 'Mass', 'No Finding',
       'Nodule', 'Pleural_Thickening', 'Pneumonia', 'Pneumothorax'],
      dtype='object')

In [15]:
labels.head(1)

Unnamed: 0,Atelectasis,Cardiomegaly,Consolidation,Edema,Effusion,Emphysema,Fibrosis,Hernia,Infiltration,Mass,No Finding,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


## CNN

In [16]:
dataframe = df[['Image Index', 'Finding Labels']]

In [17]:
IMAGE_SIZE = 224
BATCH_SIZE = 64

datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    validation_split = 0.2)

train_generator = datagen.flow_from_dataframe(
    dataframe,
    imagepath,
    x_col='Image Index',
    y_col="Finding Labels",
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE, 
    subset='training')

val_generator = datagen.flow_from_dataframe(
    dataframe,
    imagepath,
    x_col='Image Index',
    y_col="Finding Labels",
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE, 
    subset='training')

  .format(n_invalid, x_col)


Found 89281 validated image filenames belonging to 15 classes.
Found 89281 validated image filenames belonging to 15 classes.


In [20]:
for image_batch, label_batch in train_generator:
  break
image_batch.shape, label_batch.shape

((64, 224, 224, 3), (64, 15))

In [22]:
IMG_SHAPE = (IMAGE_SIZE, IMAGE_SIZE, 3)

# Create the base model from the pre-trained model MobileNet V2
base_model = tf.keras.applications.ResNet50(input_shape=IMG_SHAPE,
                                              include_top=False, 
                                              weights='imagenet')

In [48]:
base_model.trainable = False

In [None]:
metamodel = tf.keras.layers.Input()

In [45]:
model = tf.keras.Sequential([
  base_model,
  tf.keras.layers.Dropout(0.2),
  merge([tf.tf.keras.layers.GlobalAveragePooling2D(), metamodel], 'concat'),          #Adicionar metadados aqui
  tf.keras.layers.Dense(15, activation='softmax')
])

In [46]:
model.compile(optimizer=tf.keras.optimizers.Adam(), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [47]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 7, 7, 2048)        23587712  
_________________________________________________________________
dropout_1 (Dropout)          (None, 7, 7, 2048)        0         
_________________________________________________________________
global_average_pooling2d_1 ( (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 15)                30735     
Total params: 23,618,447
Trainable params: 30,735
Non-trainable params: 23,587,712
_________________________________________________________________


In [26]:
epochs = 1

history = model.fit(train_generator, 
                    steps_per_epoch=len(train_generator), 
                    epochs=epochs, 
                    validation_data=val_generator, 
                    validation_steps=len(val_generator))

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 1396 steps, validate for 1396 steps


In [127]:
model.save_weights('weights', save_format ='HDF5')

In [40]:
train_generator.filenames[train_generator.batch_index]

'00005986_015.png'

In [44]:
base_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
___________________________________________________________________________________________