In [None]:
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout
from keras.layers import Flatten
from keras.layers import Dense
# Implementing Callbacks to save various check points of our keras model; can be used to terminate model prematurely to also prevent overfitting
from keras.callbacks import ModelCheckpoint, TensorBoard
# for single predictions
from keras.preprocessing import image
import time

import numpy as np
import pandas as pd
from keras.utils import to_categorical
from keras.preprocessing import image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
%matplotlib inline

In [None]:
# initializing the classfier
classifier = Sequential()

# Convolution
classifier.add(Conv2D(32, (3, 3), input_shape = (350, 350, 1), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2,2)))
classifier.add(Dropout(0.2))

classifier.add(Conv2D(64, (3, 3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
classifier.add(Dropout(0.2))

classifier.add(Conv2D(64, (3, 3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
classifier.add(Dropout(0.2))

classifier.add(Conv2D(64, (3, 3), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
classifier.add(Dropout(0.2))

# Flattening
classifier.add(Flatten())

# Fully connected layer
classifier.add(Dense(activation = 'relu', units = 128))
classifier.add(Dropout(0.3))
classifier.add(Dense(activation = 'relu', units = 64))
classifier.add(Dropout(0.3))
classifier.add(Dense(activation = 'relu', units = 64))
classifier.add(Dropout(0.3))
classifier.add(Dense(activation = 'sigmoid', units = 14))

classifier.compile(optimizer = 'adam',
                   loss ='binary_crossentropy',
                   metrics = ['accuracy']
                  )


classifier.summary()


In [None]:
# Using image generators
from keras.preprocessing.image import ImageDataGenerator

train_df = pd.read_csv("CheXpert-v1.0-small/train_data.csv", nrows=12800)
valid_df = pd.read_csv("CheXpert-v1.0-small/valid.csv")

tensor_log_name = f"chest-xray-cnn-larger-dataset-{int(time.time())}"

train_datagen = ImageDataGenerator(
        rescale=1./255,
        zoom_range=0.2,
        horizontal_flip=False)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        directory=None,
        x_col="path",
        y_col=['no_finding',
               'enlarged_cardiomediastinum',
               'cardiomegaly',
               'lung_opacity',
               'lung_lesion',
               'edema',
               'consolidation',
               'pneumonia',
               'atelectasis',
               'pneumothorax',
               'pleural_effusion',
               'pleural_other',
               'fracture',
               'support_devices'
               ],
        color_mode="grayscale",
        target_size=(350, 350),
        batch_size=32,
#         class_mode="multi_output",
        class_mode="raw",
)

validation_generator = test_datagen.flow_from_dataframe(
        dataframe=valid_df,
        directory=None,
        x_col="Path",
        y_col=['No Finding',
               'Enlarged Cardiomediastinum',
               'Cardiomegaly',
               'Lung Opacity',
               'Lung Lesion',
               'Edema',
               'Consolidation',
               'Pneumonia',
               'Atelectasis',
               'Pneumothorax',
               'Pleural Effusion',
               'Pleural Other',
               'Fracture',
               'Support Devices'
              ],
        color_mode="grayscale",
        target_size=(350, 350),
        batch_size=128,
        class_mode="raw")

In [None]:
# filepath contains location for storing keras models with the lowwest loss function after every 10 epoch
filepath="models/weights-improvement-large-dataset-{epoch:02d}-{val_loss:.2f}-{val_acc:.2f}.hdf5"

checkpoint = ModelCheckpoint(filepath, 
                             monitor='val_loss', 
                             verbose=1, 
                             save_best_only=True, 
                             mode='min',
#                              save_freq=5
                             period=2
                            )

tensorboard = TensorBoard(log_dir = f'logs/{tensor_log_name}')

classifier.fit_generator(
        train_generator,
        steps_per_epoch=200,
        epochs=15,
        validation_data=validation_generator,
        validation_steps=2,
        callbacks=[checkpoint, tensorboard]
)

In [None]:
# Single preds
test_image = image.load_img('CheXpert-v1.0-small/valid/patient64740/study1/view1_frontal.jpg',
                            target_size = (350, 350),
                            color_mode= 'grayscale'
                           )

test_image_arr = image.img_to_array(test_image)

test_image_arr = np.expand_dims(test_image_arr, axis = 0)

classifier.predict(test_image_arr)

In [None]:
# Save model on local storage

# Save the model
classifier.save('models/first_test_model.h5')

# Recreate the exact same model purely from the file
new_model = keras.models.load_model('models/large-dataset-first_test_model.h5')