In [1]:
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout
from keras.layers import Flatten
from keras.layers import Dense
# Implementing Callbacks to save various check points of our keras model; can be used to terminate model prematurely to also prevent overfitting
from keras.callbacks import ModelCheckpoint, TensorBoard

# for single predictions
from keras.preprocessing import image
import time

import numpy as np
import pandas as pd
from keras.utils import to_categorical
from keras.preprocessing import image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# initializing the classfier
classifier = Sequential()

# Convolution
classifier.add(Conv2D(32, (3, 3), input_shape = (350, 350, 1), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2,2)))
classifier.add(Dropout(0.2))

classifier.add(Conv2D(64, (3, 3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
classifier.add(Dropout(0.2))

classifier.add(Conv2D(64, (3, 3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
classifier.add(Dropout(0.2))

classifier.add(Conv2D(64, (3, 3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
classifier.add(Dropout(0.2))

# Flattening
classifier.add(Flatten())

# Fully connected layer
classifier.add(Dense(activation = 'relu', units = 128))
classifier.add(Dropout(0.3))
classifier.add(Dense(activation = 'relu', units = 64))
classifier.add(Dropout(0.3))
classifier.add(Dense(activation = 'relu', units = 64))
classifier.add(Dropout(0.3))
classifier.add(Dense(activation = 'relu', units = 64))
classifier.add(Dropout(0.3))
classifier.add(Dense(activation = 'relu', units = 64))
classifier.add(Dropout(0.3))
classifier.add(Dense(activation = 'sigmoid', units = 14))

classifier.compile(optimizer = 'adam',
                   loss ='binary_crossentropy',
                   metrics = ['accuracy']
                  )


classifier.summary()






Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 348, 348, 32)      320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 174, 174, 32)      0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 174, 174, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 172, 172, 64)      18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 86, 86, 64)        0         
__________________________________________________________

In [3]:
# Using image generators
from keras.preprocessing.image import ImageDataGenerator

train_df = pd.read_csv("CheXpert-v1.0-small/train_data.csv", nrows=6400)
valid_df = pd.read_csv("CheXpert-v1.0-small/valid.csv")

tensor_log_name = f"chest-xray-cnn-{int(time.time())}"

train_datagen = ImageDataGenerator(
        rescale=1./255,
        zoom_range=0.2,
        horizontal_flip=False)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        directory=None,
        x_col="path",
        y_col=['no_finding',
               'enlarged_cardiomediastinum',
               'cardiomegaly',
               'lung_opacity',
               'lung_lesion',
               'edema',
               'consolidation',
               'pneumonia',
               'atelectasis',
               'pneumothorax',
               'pleural_effusion',
               'pleural_other',
               'fracture',
               'support_devices'
               ],
        color_mode="grayscale",
        target_size=(350, 350),
        batch_size=32,
#         class_mode="multi_output",
        class_mode="raw",
)

validation_generator = test_datagen.flow_from_dataframe(
        dataframe=valid_df,
        directory=None,
        x_col="Path",
        y_col=['No Finding',
               'Enlarged Cardiomediastinum',
               'Cardiomegaly',
               'Lung Opacity',
               'Lung Lesion',
               'Edema',
               'Consolidation',
               'Pneumonia',
               'Atelectasis',
               'Pneumothorax',
               'Pleural Effusion',
               'Pleural Other',
               'Fracture',
               'Support Devices'
              ],
        color_mode="grayscale",
        target_size=(350, 350),
        batch_size=128,
        class_mode="raw")

Found 6400 validated image filenames.
Found 234 validated image filenames.


In [4]:
# filepath contains location for storing keras models with the lowwest loss function after every 10 epoch
filepath="models/weights-improvement-{epoch:02d}-{val_loss:.2f}-{val_acc:.2f}.hdf5"

checkpoint = ModelCheckpoint(filepath, 
                             monitor='val_loss', 
                             verbose=1, 
                             save_best_only=True, 
                             mode='min',
#                              save_freq=5
                             period=2
                            )

tensorboard = TensorBoard(log_dir = f'logs/{tensor_log_name}')

classifier.fit_generator(
        train_generator,
        steps_per_epoch=200,
        epochs=15,
        validation_data=validation_generator,
        validation_steps=2,
        callbacks=[checkpoint, tensorboard]
)



Epoch 1/15
Epoch 2/15

Epoch 00002: val_loss improved from inf to 0.47809, saving model to models/weights-improvement-02-0.48-0.79.hdf5
Epoch 3/15
Epoch 4/15

Epoch 00004: val_loss improved from 0.47809 to 0.47242, saving model to models/weights-improvement-04-0.47-0.79.hdf5
Epoch 5/15
Epoch 6/15

Epoch 00006: val_loss improved from 0.47242 to 0.45986, saving model to models/weights-improvement-06-0.46-0.80.hdf5
Epoch 7/15
Epoch 8/15

Epoch 00008: val_loss improved from 0.45986 to 0.45203, saving model to models/weights-improvement-08-0.45-0.81.hdf5
Epoch 9/15
Epoch 10/15

Epoch 00010: val_loss did not improve from 0.45203
Epoch 11/15
Epoch 12/15

Epoch 00012: val_loss improved from 0.45203 to 0.44865, saving model to models/weights-improvement-12-0.45-0.81.hdf5
Epoch 13/15
Epoch 14/15

Epoch 00014: val_loss did not improve from 0.44865
Epoch 15/15


<keras.callbacks.History at 0x1b8c27de9b0>

In [5]:
# Single preds
test_image = image.load_img('CheXpert-v1.0-small/valid/patient64740/study1/view1_frontal.jpg',
                            target_size = (350, 350),
                            color_mode= 'grayscale'
                           )

test_image_arr = image.img_to_array(test_image)

test_image_arr = np.expand_dims(test_image_arr, axis = 0)

classifier.predict(test_image_arr)

array([[9.3569601e-01, 2.9802322e-08, 0.0000000e+00, 1.2308359e-05,
        9.4473362e-06, 0.0000000e+00, 0.0000000e+00, 5.9604645e-08,
        5.9604645e-08, 0.0000000e+00, 5.9604645e-08, 2.9802322e-08,
        1.9508182e-06, 1.2234969e-05]], dtype=float32)

In [6]:
# Save model on local storage

# Save the model
classifier.save('models/final_test_model_6400_class_1.h5')

# Recreate the exact same model purely from the file
new_model = keras.models.load_model('models/final_test_model_6400_class_1.h5')