In [None]:
# IMPORT RELEVANT PACKAGES AND LIBRARIES
import keras
import tensorflow as tf
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense

In [None]:
# Download zip file with images if not already exists
!wget -nc ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip

In [None]:
# Randomly generate test/train set
import os
import random
from shutil import copyfile
from zipfile import ZipFile

# Set "True" to re-randomize image set on every run
REGENERATE_IMAGES_EVERY_RUN = False

if REGENERATE_IMAGES_EVERY_RUN:
    !rm -r Malaria_Dataset/

ROOT_DIR = os.getcwd()
CELL_IMAGES_DIR = os.path.join(ROOT_DIR, "cell_images")
PARASITIZED_DIR = os.path.join(CELL_IMAGES_DIR, "Parasitized")
UNINFECTED_DIR = os.path.join(CELL_IMAGES_DIR, "Uninfected")
MALARIA_DATASET_DIR = os.path.join(ROOT_DIR, "Malaria_Dataset")
TRAINING_SET_DIR = os.path.join(MALARIA_DATASET_DIR, "Training_Set")
TRAIN_PARASITIZED_DIR = os.path.join(TRAINING_SET_DIR, "Parasitized")
TRAIN_UNINFECTED_DIR = os.path.join(TRAINING_SET_DIR, "Uninfected")
TESTING_SET_DIR = os.path.join(MALARIA_DATASET_DIR, "Testing_Set")
TEST_PARASITIZED_DIR = os.path.join(TESTING_SET_DIR, "Parasitized")
TEST_UNINFECTED_DIR = os.path.join(TESTING_SET_DIR, "Uninfected")

# Ignore script if test/train set already exists
if os.path.isdir(MALARIA_DATASET_DIR):
    raise SystemExit("test/train set already exists!")

# Extract images if not already extracted
if not os.path.isdir("cell_images"):
    print("Extracting images...")

    with ZipFile(os.path.join(ROOT_DIR, "cell_images.zip"), "r") as zipObj:
        zipObj.extractall()

cell_images = os.listdir(CELL_IMAGES_DIR)
parasitized_images = os.listdir(PARASITIZED_DIR)
uninfected_images = os.listdir(UNINFECTED_DIR)
train_test_ratio = 0.8
target_parasitized_train_size = int(len(parasitized_images) * train_test_ratio)
target_parasitized_test_size = len(parasitized_images) - target_parasitized_train_size
target_uninfected_train_size = int(len(uninfected_images) * train_test_ratio)
target_uninfected_test_size = len(uninfected_images) - target_uninfected_train_size
target_train_size = target_parasitized_train_size + target_uninfected_train_size
target_test_size = target_parasitized_test_size + target_uninfected_test_size

# Randomly move 20% of parisitized images to testing set
print("Copying parisitized images to testing set...")
os.makedirs(TEST_PARASITIZED_DIR, exist_ok=True)

while len(parasitized_images) > target_parasitized_test_size:
    cell_image = random.choice(parasitized_images)
    cell_image_dir = os.path.join(PARASITIZED_DIR, cell_image)
    renamed_dir = os.path.join(TEST_PARASITIZED_DIR, cell_image)

    copyfile(cell_image_dir, renamed_dir)
    parasitized_images.remove(cell_image)

# Move the remaining parisitized images to training set
print("Copying parisitized images to training set...")
os.makedirs(TRAIN_PARASITIZED_DIR, exist_ok=True)

while len(parasitized_images) > 0:
    cell_image = random.choice(parasitized_images)
    cell_image_dir = os.path.join(PARASITIZED_DIR, cell_image)
    renamed_dir = os.path.join(TRAINING_SET_DIR, "Parasitized", cell_image)

    copyfile(cell_image_dir, renamed_dir)
    parasitized_images.remove(cell_image)

# Randomly move 20% of uninfected images to testing set
print("Copying uninfected images to testing set...")
os.makedirs(TEST_UNINFECTED_DIR, exist_ok=True)

while len(uninfected_images) > target_uninfected_test_size:
    cell_image = random.choice(uninfected_images)
    cell_image_dir = os.path.join(UNINFECTED_DIR, cell_image)
    renamed_dir = os.path.join(TESTING_SET_DIR, "Uninfected", cell_image)

    copyfile(cell_image_dir, renamed_dir)
    uninfected_images.remove(cell_image)

# Move the remaining uninfected images to training set
print("Copying uninfected images to training set...")
os.makedirs(TRAIN_UNINFECTED_DIR, exist_ok=True)

while len(uninfected_images) > 0:
    cell_image = random.choice(uninfected_images)
    cell_image_dir = os.path.join(UNINFECTED_DIR, cell_image)
    renamed_dir = os.path.join(TRAINING_SET_DIR, "Uninfected", cell_image)

    copyfile(cell_image_dir, renamed_dir)
    uninfected_images.remove(cell_image)

print("Done!")

In [None]:
# INITIALIZE VARAIBLES
img_width, img_height = 128, 128                         # images should be rescaled to 128x128 pixels
train_data_dir = 'Malaria_Dataset/Training_Set'          # directory of training set folder
validation_data_dir = 'Malaria_Dataset/Testing_Set'      # directory of test set folder
nb_train_samples = target_train_size                     # number of images in training set
nb_validation_samples = target_test_size                 # number of images in testing set
epochs = 20                                              # number of epochs to go through
batch_size = 4                                           # number of batches (weights updated after each batch)

In [None]:
# BUILDING CONVOLUTIONAL NEURAL NETWORK FROM SCRATCH

# Input layer is a 125x125 pixel image in RGB (x3)
inp = tf.keras.layers.Input(shape=(128,128,3))

# 1st convolutional layer has 32 filters with 3x3 kernels, ReLU activation function, and padding
conv1 = tf.keras.layers.Conv2D(32, kernel_size=(3, 3), 
                               activation='relu', padding='same')(inp)

# 1st pooling layer with 2x2 downscaling
pool1 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv1)

# 2nd convolutional layer has 64 filters with 3x3 kernels, ReLU activation function, and padding
conv2 = tf.keras.layers.Conv2D(64, kernel_size=(3, 3), 
                               activation='relu', padding='same')(pool1)

# 2nd pooling layer with 2x2 downscaling
pool2 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv2)

# 3rd convolutional layer has 128 filters with 3x3 kernels, ReLU activation function, and padding
conv3 = tf.keras.layers.Conv2D(128, kernel_size=(3, 3), 
                               activation='relu', padding='same')(pool2)

# 3rd pooling layer with 2x2 downscaling
pool3 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv3)

# flattening layer to convert convolutional neural output into inputs for MLP
flat = tf.keras.layers.Flatten()(pool3)

# 1st multilayer perceptron hidden layer with 256 nodes and ReLU activation functions
hidden1 = tf.keras.layers.Dense(256, activation='relu')(flat)

# 1st dropout layer to drop 30% of input units to reduce overfitting
drop1 = tf.keras.layers.Dropout(rate=0.3)(hidden1)

# 2nd multilayer perceptron hidden layer with 256 nodes and ReLU activation functions
hidden2 = tf.keras.layers.Dense(256, activation='relu')(drop1)

# 2nd dropout layer to drop 30% of input units to reduce overfitting
drop2 = tf.keras.layers.Dropout(rate=0.3)(hidden2)

# Output layer with two nodes giving probabilities of each class
out = tf.keras.layers.Dense(2, activation='softmax')(drop2)

# Creates a model that includes all layers in the computation of "out" given "inp"
model = tf.keras.Model(inputs=inp, outputs=out)

# Configures the model for training
sgd = tf.keras.optimizers.SGD(lr= .0001)
adam = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
model.compile(optimizer=adam,
                loss='binary_crossentropy',
                metrics=['accuracy'])

# Summarize convolutional neural network architecture
model.summary()

In [None]:
# GENERATE IMAGES AND TRAIN MODEL
#   Here, we generate variants of our original dataset. Examples of variants 
#   include images that are flipped, rotated, rescaled in size, and sheared.
#   The purpose of this is to prevent overfitting of the model. It can also 
#   better help generalize the identification of different objects. For example,
#   this way, the model can identify a red blood cell that is oriented differently.

# build training image generator
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range = 20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True)

# build testing image generator
test_datagen = ImageDataGenerator(rescale=1. / 255)

# generate training images
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical')

# generate testing images
validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical')

# fit the model on the generated images
history = model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples)
    #class_weight = class_weight)

In [None]:
import matplotlib.pyplot as plt
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()