In [1]:
import os
import numpy as np
import tqdm
from PIL import Image
import matplotlib.pyplot as plt
from keras.layers import Input, Conv2D, MaxPooling2D, Activation, Dense, Dropout, Flatten, BatchNormalization
from keras.models import Model
from keras.utils import to_categorical
from sklearn.metrics import confusion_matrix
from dotenv import load_dotenv

Using TensorFlow backend.


In [None]:
load_dotenv(dotenv_path='./config.env')

# Preprocessing Images and Creating Train and Test Data

In [2]:
def load_images(image_files_list, resize_factor=1):
    '''
    Load images from file paths and put together as numpy array.
     '''
    for i, image_file in enumerate(tqdm.tqdm(image_files_list)):
        img = Image.open(image_file)            

        if resize_factor > 1:
            img = img.resize((int(img.width/resize_factor), int(img.height/resize_factor)))

        # convert the image to numpy array and normalize pixel values
        data = np.asarray(img, dtype=np.float32)/255 
        
        if i == 0:                
            #initialize array to return
            ret = np.empty((len(image_files_list), *data.shape), dtype=np.float32)

        ret[i] = data
    return ret

def get_image_array_and_labels(path_to_types_folders, resize_factor=1):
    
    '''Returns array containing all images under path_to_types_folders and array containing corresponding labels'''
    
    #Mapping of white blood cell types to ints
    type_to_int = {'EOSINOPHIL': 0, 'LYMPHOCYTE': 1, 'MONOCYTE': 2, 'NEUTROPHIL': 3}
    
    type_folders = os.listdir(path_to_types_folders)
    image_files = []
    image_labels = []
    
    #Get files and labels for images under folders for each type of white blood cell
    for type_folder in types_folders:
        folder_path = os.path.join(path_to_types_folders, type_folder)
        files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.lower().endswith('.jpeg')]
        labels = [type_to_int[type_folder]] * len(files)
        image_files.extend(files)
        image_labels.extend(labels)
        
    images_array = load_images(image_files, resize_factor)
    image_labels = np.array(image_labels, dtype=np.uint8)
    return images_array, image_labels

In [None]:
X_train, y_train = get_image_array_and_labels(os.getenv('PATH_TO_TRAIN'))
X_test, y_test = get_image_array_and_labels(os.getenv('PATH_TO_TEST'))

In [None]:
#Convert labels to one hot encoding
y_train_one_hot = to_categorical(y_train)
y_test_one_hot = to_categorical(y_test)

# Creating and Training Models

## Model 1

In [3]:
#Input
x_input = Input(shape=X_test.shape[1:])

# Layer 1
x = Conv2D(filters=32, kernel_size=3, activation='relu')(x_input)
x = MaxPooling2D(pool_size=2)(x)
x = Dropout(rate=0.25)(x)

# Layer 2
x = Conv2D(filters=64, kernel_size=3, activation='relu')(x)
x = MaxPooling2D(pool_size=2)(x)
x = Dropout(rate=0.25)(x)

# Fully Connected Network layer
x = Flatten()(x)
x = Dense(units=128, activation='relu')(x)
x = Dropout(rate=0.25)(x)

# Output
x = Dense(units=4, activation='softmax')(x)

# Build model 
model1 = Model(inputs=x_input, outputs=x)

NameError: name 'X_test' is not defined

In [None]:
# Compile and Train Model
model1.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])
history1 = model1.fit(x=X_train, y=y_train_one_hot, epochs=150, batch_size=64, validation_split=0.1, verbose=2)

In [None]:
# Test Trained Model on Test Data
y1_predict = model1.predict(X_test).argmax(axis=1)
model1_test_acc = (y1_predict == y_test).sum()/(y1_predict.shape[0])

## Model 2

In [None]:
# Input
x_input = Input(shape=X_test.shape[1:])
    
# Layer 1
x = Conv2D(filters=32, kernel_size=3, activation='relu', padding='same')(x_input)
x = Dropout(rate=0.25)(x)
    
# Layer 2
x = Conv2D(filters=32, kernel_size=3, activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=2)(x)
x = Dropout(rate=0.40)(x)
    
# Layer 3
x = Conv2D(filters=64, kernel_size=3, activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=2)(x)
x = Dropout(rate=0.40)(x)
    
# Fully Connected Network Layer
x = Flatten()(x)      
x = Dense(units=64, activation='relu')(x)
x = Dropout(rate=0.40)(x)
    
# Output
x = Dense(units=4, activation='softmax')(x)

# Build Model
model2 = Model(inputs=x_input, outputs=x)


In [None]:
# Compile and Train Model
model2.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])
history2 = model2.fit(x=X_train, y=y_train_one_hot, epochs=150, batch_size=64, validation_split=0.1, verbose=2)

In [None]:
# Test Trained Model on Test Data
y2_predict = model1.predict(X_test).argmax(axis=1)
model2_test_acc = (y2_predict == y_test).sum()/(y2_predict.shape[0])

## Model 3

In [None]:
#Input
x_input = Input(shape=X_test.shape[1:])
    
# Layer 1
x = Conv2D(filters=64, kernel_size=3, activation='relu', padding='same')(x_input)
x = MaxPooling2D(pool_size=2)(x)
x = Dropout(rate=0.25)(x)
    
# Layer 2
x = Conv2D(filters=32, kernel_size=3, activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=2)(x)
x = Dropout(rate=0.25)(x)
    
# Layer 3
x = Conv2D(filters=32, kernel_size=3, activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=2)(x)
x = Dropout(rate=0.4)(x)
    
# Layer 4
x = Conv2D(filters=32, kernel_size=3, activation='relu')(x)
x = Dropout(rate=0.4)(x)

# Fully Connected Network Layer
x = Flatten()(x)      
x = Dense(units=128, activation='relu')(x)
x = Dropout(rate=0.1)(x)
    
# Output
x = Dense(units=4, activation='softmax')(x)

# Build Model
model3 = Model(inputs=x_input, outputs=x)


In [None]:
# Compile and Train Model
model3.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])
history3 = model3.fit(x=X_train, y=y_train_one_hot, epochs=150, batch_size=64, validation_split=0.1, verbose=2)

In [None]:
# Test Trained Model on Test Data
y3_predict = model3.predict(X_test).argmax(axis=1)
model2_test_acc = (y3_predict == y_test).sum()/(y3_predict.shape[0])

## Plot Results

In [None]:
def plot_confusion_matrix(title, class_labels, cm, ax=None):
    '''
    From https://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html    
    '''    
    
    if isinstance(class_labels, dict):
        class_labels = [class_labels[k] for k in sorted(class_labels.keys())]
    if isinstance(cm, list):
        cm = np.asarray(cm)
    
    assert cm.shape == (len(class_labels), len(class_labels))

    if ax is None:
        fig, ax = plt.subplots()

    # each row corresponds to total true labels for the class
    class_totals = cm.sum(axis=1, keepdims=True)
    cm_pct  = 100*cm/class_totals
    im = ax.imshow(cm_pct, interpolation='nearest', cmap=plt.cm.Blues)
    ax.get_figure().colorbar(im, ax=ax)
    ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]))
    
    ax.set_title(title + ' Confusion Matrix', fontsize=14)    
    ax.set_xlabel('Predicted Label', fontsize=12)
    ax.set_ylabel('True Label', fontsize=12)

    ax.set_xticklabels(class_labels, fontsize=8, rotation=45)
    ax.set_yticklabels(class_labels, fontsize=8)

    
    # show text annotations
    thresh = cm_pct.max()/2
    for i in range(cm_pct.shape[0]):
        for j in range(cm_pct.shape[1]):
            c = 'white' if cm_pct[i, j] > thresh else 'black'
            ax.text(j, i, f"{cm_pct[i, j]:.1f}%", ha='center', va='center', color=c)
    if ax is None:
        fig.tight_layout() 


def plot_results(model_name, history, y_test, y_predict):
    '''
    Creates and saves two plots:
       1. Training and validation accuracy graph
       2. Confusion matrix
    '''
    fig, axs = plt.subplots(1, 2, figsize=(14,5))
    
    # Plot training and validation accuracy 
    train_acc = history.history['acc']
    val_acc = history.history['val_acc']
    axs[0].plot(range(1, 1+len(train_acc)), train_acc, label='train_acc')
    axs[0].plot(range(1, 1+len(val_acc)), val_acc, label='val_acc')
    axs[0].set_xlabel('Epoch')
    axs[0].set_ylabel('Accuracy')
    axs[0].set_xticks(range(1, 1+len(val_acc)))
    axs[0].legend()
    axs[0].set_title(model_name + ' Accuracy History')

    # Plot Confusion Matrix 
    cm = confusion_matrix(y_test, y_predict)
    class_labels = ['EOSINOPHIL', 'LYMPHOCYTE', 'MONOCYTE', 'NEUTROPHIL']
    plot_confusion_matrix(model_name, class_labels, cm, ax=axs[1])
    png_file = model_name + '_results.png'
    plt.savefig(png_file)


In [None]:
# Plotting results for all models
plot_results('Model 1', history1, y_test, y1_predict)
plot_results('Model 2', history2, y_test, y2_predict)
plot_results('Model 3', history3, y_test, y3_predict)