In [68]:
import os
import pre_processing
import numpy as np
import pandas as pd
from matplotlib.image import imread
from tensorflow.keras.metrics import AUC as auc
import seaborn as sns
import matplotlib.pyplot as plt

<h2>Data Pre-Processing</h2>

In [4]:
pre_processing.check_data_exists()

test data location = ./chest_xray/test
training data location = ./chest_xray/train
validation data location = ./chest_xray/val


<h2>Data Pre-Processing</h2>

In [5]:
# We're going to have images of different dimensions, so handle that before using the CNN
dim1_mean, dim2_mean = pre_processing.get_average_dimensions()
image_shape = (300, 300, 3)

In [6]:
# Going to get the mean image size and resize to that
print(dim1_mean + "\n" + dim2_mean)

UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U32'), dtype('<U32')) -> dtype('<U32')

In [None]:
pre_processing.flipImages()

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
image_gen = ImageDataGenerator(horizontal_flip=True,
                               fill_mode='nearest')

In [85]:
image_gen.flow_from_directory(train_path)

Found 5216 images belonging to 2 classes.


<keras_preprocessing.image.directory_iterator.DirectoryIterator at 0x7f8d74080bd0>

In [None]:
image_gen.flow_from_directory(test_path)

In [None]:
image_gen.flow_from_directory(val_path)

<h2>Building a Model</h2>

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Dropout, Flatten
from tensorflow.keras.metrics import Accuracy, AUC

In [None]:
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=(3,3), input_shape=image_shape, activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))

# model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu'))
# model.add(MaxPool2D(pool_size=(2,2)))

# model.add(Conv2D(filters=256, kernel_size=(3,3), activation='relu'))
# model.add(MaxPool2D(pool_size=(2,2)))

# model.add(Conv2D(filters=512, kernel_size=(3,3), activation='relu'))
# model.add(MaxPool2D(pool_size=(2,2)))

model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam',
             metrics=[AUC()])

In [None]:
model.summary()

<h2>Regularization</h2>

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=2)
batch_size = 64

In [None]:
train_image_gen = image_gen.flow_from_directory(train_path,
                                               target_size = image_shape[:2],
                                               batch_size = batch_size,
                                               class_mode = 'binary')

In [None]:
test_image_gen = image_gen.flow_from_directory(test_path,
                                               target_size=image_shape[:2],
                                               batch_size = batch_size,
                                               class_mode='binary',
                                               shuffle=False) # Don't want to shuffle test data and lose labels

In [None]:
test_image_gen.class_indices

<h3>Running the Model</h3>

In [None]:
# Running without early stop for now - not enough examples for 'val_loss'?
results = model.fit(train_image_gen, epochs=20,
                             validation_data=test_image_gen,
                    callbacks=[early_stop]
                   )

Train for 82 steps, validate for 10 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20


In [None]:
metrics = pd.DataFrame(model.history.history)

In [None]:
metrics[['accuracy', 'val_accuracy']].plot()

In [None]:
metrics[['auc', 'val_auc']].plot()

In [None]:
metrics[['loss', 'val_loss']].plot()

<p>We see that the model is overfitting significantly.</p>

In [None]:
model.save('image_classifier_accuracy.h5')