In [6]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from imutils import paths
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from keras.applications import VGG19
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.models import Model 
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.callbacks import EarlyStopping

In [7]:
PATH = 'data/' # root
CSV_PATH = os.path.join(PATH,'csv') # folder with csv datasets
DICOM_PATH = os.path.join(PATH, 'pool') # folder containing all of the dicom files
JPG_PATH = os.path.join(PATH, 'pool_jpg') # folder containing all the converted jpg files
DESTINATION_PATH = os.path.join(PATH, 'sorted_balanced') # Folder where the train and test subsets will be located
MODELS_PATH = os.path.join(PATH,'model')

# Train folder
TRAIN_PATH = os.path.join(DESTINATION_PATH, 'train')
TRAIN_NORMAL_PATH = os.path.join(TRAIN_PATH, 'normal')
TRAIN_NNNP_PATH = os.path.join(TRAIN_PATH, 'nnnp')
TRAIN_PNEUMONIA_PATH = os.path.join(TRAIN_PATH, 'pneumonia')

# Validation folder
VAL_PATH = os.path.join(DESTINATION_PATH, 'validation')
VAL_NORMAL_PATH = os.path.join(VAL_PATH, 'normal')
VAL_NNNP_PATH = os.path.join(VAL_PATH, 'nnnp')
VAL_PNEUMONIA_PATH = os.path.join(VAL_PATH, 'pneumonia')

# Test folder
TEST_PATH = os.path.join(DESTINATION_PATH, 'test')
TEST_NORMAL_PATH = os.path.join(TEST_PATH, 'normal') 
TEST_NNNP_PATH = os.path.join(TEST_PATH, 'nnnp')
TEST_PNEUMONIA_PATH = os.path.join(TEST_PATH, 'pneumonia') 

In [8]:
# defining constants and variables
img_width, img_height = 128, 128
train_data_dir = "data/train"
validation_data_dir = "data/val"
test_data_dir = "data/test"
NB = 2
BS = 64
EPOCHS = 10

In [13]:
TRAIN = len(list(paths.list_images(TRAIN_PATH)))
VAL = len(list(paths.list_images(VAL_PATH)))
TEST = len(list(paths.list_images(TEST_PATH)))

In [18]:
trainAug = ImageDataGenerator(rescale = 1./255,
                    fill_mode = "nearest")

valAug = ImageDataGenerator(rescale = 1./255,
                            fill_mode = "nearest")

trainGen = trainAug.flow_from_directory(
                    TRAIN_PATH,
                    target_size = (img_height, img_width),
                    batch_size = BS,
                    shuffle = True,
                    class_mode = "categorical")

valGen = valAug.flow_from_directory(
                    TEST_PATH,
                    target_size = (img_height, img_width),
                    batch_size = BS,
                    shuffle = False,
                    class_mode = "categorical")

testGen = valAug.flow_from_directory(
                    TEST_PATH,
                    target_size = (img_height, img_width),
                    batch_size = BS,
                    shuffle = False,
                    class_mode = "categorical")

Found 8416 images belonging to 2 classes.
Found 1804 images belonging to 2 classes.
Found 1804 images belonging to 2 classes.


In [None]:
# loading pre-trained model, training additional features and saving model
base_model = VGG19(weights = "imagenet", include_top=False, 
                   input_shape = (img_width, img_height, 3))
x = base_model.output
x = Flatten()(x)
x = Dense(1024, activation = "relu")(x)
x = Dropout(0.4)(x)
x = Dense(256, activation = "relu")(x)
x = Dropout(0.2)(x)
preds = Dense(NB, activation = "softmax")(x)

model = Model(input = base_model.input, output = preds)


Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
for i,layer in enumerate(model.layers):
    print(i,layer.name)

for layer in model.layers[:16]:
    layer.trainable=False
for layer in model.layers[16:]:
    layer.trainable=True

In [None]:
model.summary()

In [None]:
early = EarlyStopping(monitor = 'val_acc', min_delta = 0, 
                      patience = 10, verbose= 1 , mode = 'auto')

In [None]:
model.compile(loss = "categorical_crossentropy", 
                    optimizer = SGD(lr=0.001, momentum=0.9), 
                    metrics=["accuracy"])

In [None]:
H = model.fit_generator(
        trainGen,
        epochs = EPOCHS,
        steps_per_epoch = TRAIN // BS,
        validation_data = valGen,
        validation_steps = VAL // BS,
        callbacks = [early])

model.save('4_3_Model_Binary_Transfer_kjaisingh.h5')

In [None]:
# generating predictions using model
testGen.reset()
predictions = model.predict_generator(testGen, steps = (TEST // BS) + 1) 
predictions = np.argmax(predictions, axis=1)

print("Test set accuracy: " + 
      str(accuracy_score(testGen.classes, predictions, normalize=True) * 100) 
      + "%") 

print(classification_report(testGen.classes, predictions,
                            target_names=testGen.class_indices.keys())) 

In [None]:
# plotting training data
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, EPOCHS), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, EPOCHS), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, EPOCHS), H.history["acc"], label="train_acc")
plt.plot(np.arange(0, EPOCHS), H.history["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig("plot.jpg")