In [None]:
INPUT_DIR = "../input/blood-cells/dataset2-master/dataset2-master/images/"
train_dir = INPUT_DIR + "TRAIN/"
test_dir = INPUT_DIR + "TEST/"

In [None]:
!pip install --upgrade git+https://github.com/broadinstitute/keras-resnet
import keras
import keras_resnet

print("imported")

In [None]:
# resnets_utils.py

import os
import numpy as np
import tensorflow as tf
import h5py
import math

def load_dataset():
    train_dataset = h5py.File('datasets/train_signs.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('datasets/test_signs.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes


def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
    """
    Creates a list of random minibatches from (X, Y)
    
    Arguments:
    X -- input data, of shape (input size, number of examples) (m, Hi, Wi, Ci)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples) (m, n_y)
    mini_batch_size - size of the mini-batches, integer
    seed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours.
    
    Returns:
    mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
    """
    
    m = X.shape[0]                  # number of training examples
    mini_batches = []
    np.random.seed(seed)
    
    # Step 1: Shuffle (X, Y)
    permutation = list(np.random.permutation(m))
    shuffled_X = X[permutation,:,:,:]
    shuffled_Y = Y[permutation,:]

    # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
    num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:,:,:]
        mini_batch_Y = shuffled_Y[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = shuffled_X[num_complete_minibatches * mini_batch_size : m,:,:,:]
        mini_batch_Y = shuffled_Y[num_complete_minibatches * mini_batch_size : m,:]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches


def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)].T
    return Y


def forward_propagation_for_predict(X, parameters):
    """
    Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX
    
    Arguments:
    X -- input dataset placeholder, of shape (input size, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"
                  the shapes are given in initialize_parameters
    Returns:
    Z3 -- the output of the last LINEAR unit
    """
    
    # Retrieve the parameters from the dictionary "parameters" 
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3'] 
                                                           # Numpy Equivalents:
    Z1 = tf.add(tf.matmul(W1, X), b1)                      # Z1 = np.dot(W1, X) + b1
    A1 = tf.nn.relu(Z1)                                    # A1 = relu(Z1)
    Z2 = tf.add(tf.matmul(W2, A1), b2)                     # Z2 = np.dot(W2, a1) + b2
    A2 = tf.nn.relu(Z2)                                    # A2 = relu(Z2)
    Z3 = tf.add(tf.matmul(W3, A2), b3)                     # Z3 = np.dot(W3,Z2) + b3
    
    return Z3

def predict(X, parameters):
    
    W1 = tf.convert_to_tensor(parameters["W1"])
    b1 = tf.convert_to_tensor(parameters["b1"])
    W2 = tf.convert_to_tensor(parameters["W2"])
    b2 = tf.convert_to_tensor(parameters["b2"])
    W3 = tf.convert_to_tensor(parameters["W3"])
    b3 = tf.convert_to_tensor(parameters["b3"])
    
    params = {"W1": W1,
              "b1": b1,
              "W2": W2,
              "b2": b2,
              "W3": W3,
              "b3": b3}
    
    x = tf.placeholder("float", [12288, 1])
    
    z3 = forward_propagation_for_predict(x, params)
    p = tf.argmax(z3)
    
    sess = tf.Session()
    prediction = sess.run(p, feed_dict = {x: X})
        
    return prediction

print("completed")

In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import os
from urllib.request import urlopen,urlretrieve
from PIL import Image
from tqdm import tqdm_notebook
%matplotlib inline
from sklearn.utils import shuffle
import cv2
# from resnets_utils import *

from keras.models import load_model
from sklearn.datasets import load_files   
from keras.utils import np_utils
from glob import glob
from keras import applications
from keras.preprocessing.image import ImageDataGenerator 
from keras import optimizers
from keras.models import Sequential,Model,load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D,GlobalAveragePooling2D
from keras.callbacks import TensorBoard,ReduceLROnPlateau,ModelCheckpoint

# -----------------------------------------------
import os
import gc
import numpy as np 
import pandas as pd
import time
import cv2
# import tensorflow as tf
from tqdm import tqdm
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16 as tmodel_vgg16
from keras.utils.np_utils import to_categorical
from tensorflow.keras.applications.resnet50 import ResNet50

from keras.applications.xception import Xception as tmodel_xception
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D,MaxPooling2D
from keras.layers import Input,SeparableConv2D,Conv2D
from keras.models import Sequential, Model 
from keras.losses import categorical_crossentropy as loss
from tensorflow.keras.optimizers import SGD,Adam
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.preprocessing import OneHotEncoder
from keras.callbacks import ModelCheckpoint,LearningRateScheduler
from keras.callbacks import EarlyStopping,CSVLogger,ReduceLROnPlateau,TensorBoard
# from keras.layers.normalization import BatchNormalization
from tensorflow.keras.layers import BatchNormalization
from keras.layers import Concatenate
from tensorflow.keras.utils import to_categorical
from keras.layers import Add
import numpy as np
import keras
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Conv2D, BatchNormalization, Dropout, MaxPool2D, Input, Softmax, Activation, Flatten
from keras.models import Model
from keras import optimizers
from keras.layers import SeparableConv2D, Concatenate
from keras.layers import concatenate,AveragePooling2D
# from keras.objectives import sparse_categorical_crossentropy as scc
from tensorflow.keras.metrics import sparse_categorical_crossentropy as scc
from keras.losses import categorical_crossentropy as cc
import os
import cv2
import scipy
from keras.callbacks import ModelCheckpoint
from keras.utils.vis_utils import plot_model
import matplotlib.pyplot as plt
# print(os.listdir("/content/gdrive/MyDrive/4-2"))
print("Loaded")

In [None]:
height = 128
width = 128
channels = 3

n_classes = 5
input_shape = (height, width, channels)

epochs = 50
batch_size = 32

In [None]:
def resize_img(img, shape):
    return cv2.resize(img, (shape[1], shape[0]), interpolation=cv2.INTER_CUBIC)

# Thanks to Paul Moonely for How to load data
def get_data(folder):
    """
    Load the data and labels from the given folder.
    """
    X = []
    y = []
    z = []
    for wbc_type in os.listdir(folder):
        if not wbc_type.startswith('.'):
            if wbc_type in ['NEUTROPHIL']:
                label = 1
                label2 = 1
            elif wbc_type in ['EOSINOPHIL']:
                label = 2
                label2 = 1
            elif wbc_type in ['MONOCYTE']:
                label = 3  
                label2 = 0
            elif wbc_type in ['LYMPHOCYTE']:
                label = 4 
                label2 = 0
            else:
                label = 5
                label2 = 0
            for image_filename in tqdm(os.listdir(folder + wbc_type)):
                img_file = cv2.imread(folder + wbc_type + '/' + image_filename)
                if img_file is not None:
                  #input must have a static square shape (one of (128, 128), (160, 160), (192, 192), or (224, 224))
#                     img_file = scipy.misc.imresize(arr=img_file, size=(128, 128, 3))
                    img_file = resize_img(img_file, (height, width))
                    img_arr = np.asarray(img_file)
                    X.append(img_arr)
                    y.append(label)
                    z.append(label2)
    X = np.asarray(X)
    y = np.asarray(y)
    z = np.asarray(z)
    return X,y,z
print("Completed")

In [None]:
x_trn, y_trn, z_trn = get_data(train_dir)
x_test, y_test, z_test = get_data(test_dir)
print("Completed")

### normal preprocessing

In [None]:
print(x_trn.shape)
print(y_trn.shape)
print(x_test.shape)
print(y_test.shape)
X_train_orig, Y_train_orig, X_test_orig, Y_test_orig = x_trn, y_trn, x_test, y_test

X_train = X_train_orig/255.
X_test = X_test_orig/255.

Y_train = to_categorical(Y_train_orig,5)
Y_test = to_categorical(Y_test_orig,5)
z_trnHot = to_categorical(z_trn,2)
z_testHot = to_categorical(z_test, num_classes = 2)
dict_characters = {1:'NEUTROPHIL',2:'EOSINOPHIL',3:'MONOCYTE',4:'LYMPHOCYTE'}
dict_characters2 = {0:'Mononuclear',1:'Polynuclear'}
print(dict_characters)
# print(dict_characters2)

# print(Y_train.shape)
# print(Y_test.shape)

def do_shuffling(X, y):
  index_array = np.arange(X.shape[0])
  sample_index = np.random.choice(index_array, X.shape[0], replace=False)

  X = X[sample_index, :, :, :]
  y = y[sample_index, : ]

  return X, y

X_train, Y_train = do_shuffling(X_train, Y_train)

print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

print('Completed')

### batch preprocessing

In [None]:
X_train_orig, Y_train_orig, X_test_orig, Y_test_orig = x_trn, y_trn, x_test, y_test
dict_characters = {1:'NEUTROPHIL',2:'EOSINOPHIL',3:'MONOCYTE',4:'LYMPHOCYTE'}

n_classes = 5
y_train = to_categorical(Y_train_orig, n_classes)
y_test = to_categorical(Y_test_orig, n_classes)

X_train = X_train_orig
X_test = X_test_orig


def do_shuffling(X, y):
  index_array = np.arange(X.shape[0])
  sample_index = np.random.choice(index_array, X.shape[0], replace=False)

  X = X[sample_index, :, :, :]
  y = y[sample_index, : ]

  return X, y

X_train, y_train = do_shuffling(X_train, y_train)



#using stratified shuffle split to preserve the percentage of samples in each of the 100 classes
sss = StratifiedShuffleSplit(n_splits=2, test_size=0.2, random_state=123)

for train_index, val_index in sss.split(X_train, y_train):
    X_train_data, X_val_data = X_train[train_index], X_train[val_index]
    y_train_data, y_val_data = y_train[train_index], y_train[val_index]

print("Number of training samples: ", X_train_data.shape)
print("Number of validation samples: ", X_val_data.shape)

X_test_data = X_test
y_test_data = y_test

In [None]:
#resizing the images as per EfficientNetB0 to size (224, 224)
height = 128
width = 128
channels = 3

n_classes = 5
input_shape = (height, width, channels)

epochs = 50
batch_size = 32

In [None]:
!pip install albumentations

In [None]:
import tensorflow
import albumentations as albu

class DataGenerator(tensorflow.keras.utils.Sequence):
    def __init__(self, images, labels=None, mode='fit', batch_size=batch_size, dim=(height, width), channels=channels, n_classes=n_classes, shuffle=True, augment=False):
        
        #initializing the configuration of the generator
        self.images = images
        self.labels = labels
        self.mode = mode
        self.batch_size = batch_size
        self.dim = dim
        self.channels = channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.augment = augment
        self.on_epoch_end()
   
    #method to be called after every epoch
    def on_epoch_end(self):
        self.indexes = np.arange(self.images.shape[0])
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
    
    #return numbers of steps in an epoch using samples and batch size
    def __len__(self):
        return int(np.floor(len(self.images) / self.batch_size))
    
    #this method is called with the batch number as an argument to obtain a given batch of data
    def __getitem__(self, index):
        #generate one batch of data
        #generate indexes of batch
        batch_indexes = self.indexes[index * self.batch_size:(index+1) * self.batch_size]
        
        #generate mini-batch of X
        X = np.empty((self.batch_size, *self.dim, self.channels))
        
        for i, ID in enumerate(batch_indexes):
            #generate pre-processed image
            img = self.images[ID]
            #image rescaling
            img = img.astype(np.float32)/255.
            #resizing as per new dimensions
#             img = resize_img(img, self.dim)
            X[i] = img
            
        #generate mini-batch of y
        if self.mode == 'fit':
            y = self.labels[batch_indexes]
            
            #augmentation on the training dataset
            if self.augment == True:
                X = self.__augment_batch(X)
            return X, y
        
        elif self.mode == 'predict':
            return X
        
        else:
            raise AttributeError("The mode should be set to either 'fit' or 'predict'.")
            
    #augmentation for one image
    def __random_transform(self, img):
        composition = albu.Compose([albu.HorizontalFlip(p=0.5),
                                   albu.VerticalFlip(p=0.5),
                                   albu.GridDistortion(p=0.2),
                                   albu.ElasticTransform(p=0.2)])
        return composition(image=img)['image']
    
    #augmentation for batch of images
    def __augment_batch(self, img_batch):
        for i in range(img_batch.shape[0]):
            img_batch[i] = self.__random_transform(img_batch[i])
        return img_batch
    
print("completed")

In [None]:
# train_data_generator = DataGenerator(X_train_data, y_train_data, augment=True)
train_data_generator = DataGenerator(X_train_data, y_train_data, augment=False)
valid_data_generator = DataGenerator(X_val_data, y_val_data, augment=False)

### Train Model

#### build model

In [None]:
# res_model = ResNet50(include_top=False,
#                     weights="imagenet",
#                     input_shape=input_shape)


res_model = tf.keras.applications.ResNet101(include_top=False,
                                            weights="imagenet",
                                            input_shape=input_shape)

In [None]:
base_model = res_model
x = base_model

for layer in base_model.layers:
    layer.trainable = False
    
# un-freeze the BatchNorm layers
for layer in base_model.layers:
    if "BatchNormalization" in layer.__class__.__name__:
        layer.trainable = True

In [None]:
# x = res_model.output
# x = GlobalAveragePooling2D()(x)
# x = Dropout(0.5)(x)
# predictions = Dense(n_classes, activation= 'softmax')(x)
# model = Model(inputs = res_model.input, outputs = predictions)

import tensorflow.keras as K
model = K.models.Sequential()
model.add(x)
model.add(K.layers.Flatten())
model.add(K.layers.BatchNormalization())
model.add(K.layers.Dense(256, activation='relu'))
model.add(K.layers.Dropout(0.5))
model.add(K.layers.BatchNormalization())
model.add(K.layers.Dense(128, activation='relu'))
model.add(K.layers.Dropout(0.5))
model.add(K.layers.BatchNormalization())
model.add(K.layers.Dense(64, activation='relu'))
model.add(K.layers.Dropout(0.5))
model.add(K.layers.BatchNormalization())
model.add(K.layers.Dense(n_classes, activation='softmax'))

print("added")

In [None]:
# keras.optimizers.Adam(learning_rate=0.0001)
# K.optimizers.RMSprop(learning_rate=2e-5),
model.compile(loss='categorical_crossentropy',
                  optimizer=K.optimizers.Adam(learning_rate=0.0001),
                  metrics=['categorical_accuracy'])

#### prepare callbacks

In [None]:
# we want only the very best version of the model and we define ‘best’ as the one with the lowest validation loss.
# best_model = "../input/bestmodelhdf5/best_model.hdf5"
best_model = "./kaggle/working/best_model.hdf5"
checkpoint = ModelCheckpoint(
    filepath=best_model,
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    mode='min')

#early stopping to monitor the validation loss and avoid overfitting
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, restore_best_weights=True)

#reducing learning rate on plateau
rlrop = ReduceLROnPlateau(monitor='val_loss', mode='min', patience= 5, factor= 0.5, min_lr= 1e-6, verbose=1)

print("completed")

### use saved model

In [None]:
model = load_model(best_model)

### fit model

In [None]:
# history = model.fit(X_train, Y_train, 
#                     batch_size=32, epochs=80,
#                     callbacks=[checkpoint, rlrop],
#                     verbose=1, validation_split=0.2, shuffle=True)

history = model.fit(train_data_generator,
                    validation_data=valid_data_generator,
                    # callbacks=[checkpoint, early_stop, rlrop],
                    callbacks=[checkpoint, rlrop],
                    verbose=1,
                    epochs=80,
                    use_multiprocessing=True)

In [None]:
print(history.history.keys())

In [None]:
#plot to visualize the loss and accuracy against number of epochs
# history
model_history = history
plt.figure(figsize=(18,8))

plt.suptitle('Loss and Accuracy Plots', fontsize=18)

plt.subplot(1,2,1)
plt.plot(model_history.history['loss'], label='Training Loss')
plt.plot(model_history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.xlabel('Number of epochs', fontsize=15)
plt.ylabel('Loss', fontsize=15)

plt.subplot(1,2,2)
plt.plot(model_history.history['categorical_accuracy'], label='Train Accuracy')
plt.plot(model_history.history['val_categorical_accuracy'], label='Validation Accuracy')
plt.legend()
plt.xlabel('Number of epochs', fontsize=14)
plt.ylabel('Accuracy', fontsize=14)
plt.show()

### finding test accuracy

In [None]:
print(Y_test_orig.shape)

In [None]:
predict_labels = model.predict(X_test)
# reversing one-hot
import numpy as np
y_pred = np.argmax(predict_labels, axis=1) # assuming you have n-by-5 class_prob
# print(y_pred.shape)

from sklearn.metrics import accuracy_score
test_accuracy = accuracy_score(Y_test_orig, y_pred)
print('Test Accuracy: ', round((test_accuracy * 100), 2), "%")

from sklearn.metrics import f1_score
macro_f1 = f1_score(Y_test_orig, y_pred, average="macro")
print( 'macro-f1: ' + str(macro_f1))

from sklearn.metrics import classification_report
labels = ['1', '2', '3', '4']
print(classification_report(Y_test_orig, y_pred, target_names=labels))

In [None]:
test_factor = int(X_test_data.shape[0] / batch_size)
test_limit = batch_size * test_factor

X_test_data = X_test_data[:test_limit]
y_test_data = Y_test_orig[:test_limit]
print(y_test_data.shape)


y_pred = model.predict(DataGenerator(X_test_data, mode='predict', augment=False, shuffle=False), verbose=1)
y_pred = np.argmax(y_pred, axis=1)

print(y_pred.shape)

In [None]:
import itertools
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.figure(figsize = (5,5))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
test_accuracy = accuracy_score(y_test_data, y_pred)

print('Test Accuracy: ', round((test_accuracy * 100), 2), "%")

from sklearn.metrics import f1_score
macro_f1 = f1_score(y_test_data, y_pred, average="macro")
print( 'macro-f1: ' + str(macro_f1))

dict_characters = {1:'NEUTROPHIL',2:'EOSINOPHIL',3:'MONOCYTE',4:'LYMPHOCYTE'}
from sklearn.metrics import classification_report
labels = list(dict_characters.values())
print(classification_report(y_test_data, y_pred, target_names=labels))

from sklearn.metrics import confusion_matrix
confusion_mtx = confusion_matrix(y_test_data, y_pred) 
plot_confusion_matrix(confusion_mtx, classes = list(dict_characters.values())) 
plt.show()