In [None]:
# IMPORT RELEVANT PACKAGES AND LIBRARIES
import keras
import tensorflow as tf
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense

In [None]:
# Download zip file with images if not already exists
!wget -nc ftp://lhcftp.nlm.nih.gov/Open-Access-Datasets/Malaria/cell_images.zip

In [None]:
# Randomly generate test/train set
import os
import random
from shutil import copyfile
from zipfile import ZipFile

# Set "True" to re-randomize image set on every run
REGENERATE_IMAGES_EVERY_RUN = True

if REGENERATE_IMAGES_EVERY_RUN:
    !rm -r Malaria_Dataset/

ROOT_DIR = os.getcwd()
CELL_IMAGES_DIR = os.path.join(ROOT_DIR, "cell_images")
PARASITIZED_DIR = os.path.join(CELL_IMAGES_DIR, "Parasitized")
UNINFECTED_DIR = os.path.join(CELL_IMAGES_DIR, "Uninfected")
MALARIA_DATASET_DIR = os.path.join(ROOT_DIR, "Malaria_Dataset")
TRAINING_SET_DIR = os.path.join(MALARIA_DATASET_DIR, "Training_Set")
TRAIN_PARASITIZED_DIR = os.path.join(TRAINING_SET_DIR, "Parasitized")
TRAIN_UNINFECTED_DIR = os.path.join(TRAINING_SET_DIR, "Uninfected")
TESTING_SET_DIR = os.path.join(MALARIA_DATASET_DIR, "Testing_Set")
TEST_PARASITIZED_DIR = os.path.join(TESTING_SET_DIR, "Parasitized")
TEST_UNINFECTED_DIR = os.path.join(TESTING_SET_DIR, "Uninfected")

# Ignore script if test/train set already exists
if os.path.isdir(MALARIA_DATASET_DIR):
    raise SystemExit("test/train set already exists!")

# Extract images if not already extracted
if not os.path.isdir("cell_images"):
    print("Extracting images...")

    with ZipFile(os.path.join(ROOT_DIR, "cell_images.zip"), "r") as zipObj:
        zipObj.extractall()

cell_images = os.listdir(CELL_IMAGES_DIR)
parasitized_images = os.listdir(PARASITIZED_DIR)
uninfected_images = os.listdir(UNINFECTED_DIR)
train_test_ratio = 0.8
target_parasitized_train_size = int(len(parasitized_images) * train_test_ratio)
target_uninfected_train_size = int(len(uninfected_images) * train_test_ratio)

# Randomly move 20% of parisitized images to testing set
print("Copying parisitized images to testing set...")
os.makedirs(TEST_PARASITIZED_DIR, exist_ok=True)

while len(parasitized_images) > target_parasitized_train_size:
    cell_image = random.choice(parasitized_images)
    cell_image_dir = os.path.join(PARASITIZED_DIR, cell_image)
    renamed_dir = os.path.join(TEST_PARASITIZED_DIR, cell_image)

    copyfile(cell_image_dir, renamed_dir)
    parasitized_images.remove(cell_image)

# Move the remaining parisitized images to training set
print("Copying parisitized images to training set...")
os.makedirs(TRAIN_PARASITIZED_DIR, exist_ok=True)

while len(parasitized_images) > 0:
    cell_image = random.choice(parasitized_images)
    cell_image_dir = os.path.join(PARASITIZED_DIR, cell_image)
    renamed_dir = os.path.join(TRAIN_PARASITIZED_DIR, cell_image)

    copyfile(cell_image_dir, renamed_dir)
    parasitized_images.remove(cell_image)

# Randomly move 20% of uninfected images to testing set
print("Copying uninfected images to testing set...")
os.makedirs(TEST_UNINFECTED_DIR, exist_ok=True)

while len(uninfected_images) > target_uninfected_train_size:
    cell_image = random.choice(uninfected_images)
    cell_image_dir = os.path.join(UNINFECTED_DIR, cell_image)
    renamed_dir = os.path.join(TEST_UNINFECTED_DIR, cell_image)

    copyfile(cell_image_dir, renamed_dir)
    uninfected_images.remove(cell_image)

# Move the remaining uninfected images to training set
print("Copying uninfected images to training set...")
os.makedirs(TRAIN_UNINFECTED_DIR, exist_ok=True)

while len(uninfected_images) > 0:
    cell_image = random.choice(uninfected_images)
    cell_image_dir = os.path.join(UNINFECTED_DIR, cell_image)
    renamed_dir = os.path.join(TRAIN_UNINFECTED_DIR, cell_image)

    copyfile(cell_image_dir, renamed_dir)
    uninfected_images.remove(cell_image)

print("Done!")

In [None]:
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k 
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

# INITIALIZE VARAIBLES
train_size = len(os.listdir(TRAIN_PARASITIZED_DIR) + os.listdir(TRAIN_UNINFECTED_DIR))
test_size = len(os.listdir(TEST_PARASITIZED_DIR) + os.listdir(TEST_UNINFECTED_DIR))
img_width, img_height = 128, 128                         # images should be rescaled to 128x128 pixels
train_data_dir = TRAINING_SET_DIR                        # directory of training set folder
validation_data_dir = TESTING_SET_DIR                    # directory of test set folder
nb_train_samples = train_size                            # number of images in training set
nb_validation_samples = test_size                        # number of images in testing set
epochs = 10                                              # number of epochs to go through
batch_size = 10                                          # number of batches (weights updated after each batch)

base_model = applications.VGG19(weights = "imagenet", include_top=False, input_shape = (img_width, img_height, 3))

# Freeze the layers which you don't want to train. Here I am freezing the first 5 layers.
#for layer in base_model.layers[:10]:
#    layer.trainable = False

#Adding custom Layers 
x = base_model.output
x = Flatten()(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(512, activation="relu")(x)
predictions = Dense(2, activation="softmax")(x)

# creating the final model 
model = Model(input = base_model.input, output = predictions)

# compile the model 
model.compile(loss = "categorical_crossentropy", optimizer = optimizers.SGD(lr=0.00001, momentum=0.9), metrics=["accuracy"])

# output model architecture
model.summary()

In [None]:
# GENERATE IMAGES AND TRAIN MODEL
#   Here, we generate variants of our original dataset. Examples of variants 
#   include images that are flipped, rotated, rescaled in size, and sheared.
#   The purpose of this is to prevent overfitting of the model. It can also 
#   better help generalize the identification of different objects. For example,
#   this way, the model can identify a red blood cell that is oriented differently.

# build training image generator
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range = 20,
    horizontal_flip=True,
    vertical_flip=True)
    #shear_range=0.2,
    #zoom_range=0.1,


# build testing image generator
test_datagen = ImageDataGenerator(rescale=1. / 255)

# generate training images
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical')

# generate testing images
validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical')

# fit the model on the generated images
history = model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples)
    #class_weight = class_weight)

In [None]:
import matplotlib.pyplot as plt

# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()