# Libraries & GPU connection test

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from keras import layers
from keras import models
from keras.utils import np_utils
from keras.datasets import cifar10
from keras.datasets import cifar100
from keras.preprocessing.image import ImageDataGenerator
import os, shutil
import datetime
from IPython.display import display
from PIL import Image
import math
from keras import callbacks

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


Using TensorFlow backend.


# Data preparation:

In [59]:
# Path to original data:
original_dataset_dir = 'C:\\Users\\admin\\Desktop\\GitHub\\DNN\\Datasets\\Blood_cells'

# Path to new data store:
base_dir = 'C:\\Users\\admin\\Desktop\\GitHub\\DNN\\Datasets\\Blood_cells\\DATA'
os.mkdir(base_dir)

# Create train, validation, test folders:
train_dir = os.path.join(base_dir, 'Train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'Validation')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'Test')
os.mkdir(test_dir)

# Train: 
train_eosinophil_dir = os.path.join(train_dir, 'EOSINOPHIL')
os.mkdir(train_eosinophil_dir)
train_lymphocyte_dir = os.path.join(train_dir, 'LYMPHOCYTE')
os.mkdir(train_lymphocyte_dir)
train_monocyte_dir = os.path.join(train_dir, 'MONOCYTE')
os.mkdir(train_monocyte_dir)
train_neutrophil_dir = os.path.join(train_dir, 'NEUTROPHIL')
os.mkdir(train_neutrophil_dir)

# Validation:
validation_eosinophil_dir = os.path.join(validation_dir, 'EOSINOPHIL')
os.mkdir(validation_eosinophil_dir)
validation_lymphocyte_dir = os.path.join(validation_dir, 'LYMPHOCYTE')
os.mkdir(validation_lymphocyte_dir)
validation_monocyte_dir = os.path.join(validation_dir, 'MONOCYTE')
os.mkdir(validation_monocyte_dir)
validation_neutrophil_dir = os.path.join(validation_dir, 'NEUTROPHIL')
os.mkdir(validation_neutrophil_dir)

# Test:
test_eosinophil_dir = os.path.join(test_dir, 'EOSINOPHIL')
os.mkdir(test_eosinophil_dir)
test_lymphocyte_dir = os.path.join(test_dir, 'LYMPHOCYTE')
os.mkdir(test_lymphocyte_dir)
test_monocyte_dir = os.path.join(test_dir, 'MONOCYTE')
os.mkdir(test_monocyte_dir)
test_neutrophil_dir = os.path.join(test_dir, 'NEUTROPHIL')
os.mkdir(test_neutrophil_dir)

# Raname files to more structured:
def change_files_names(path, class_name): 
    i = 0
    for filename in os.listdir(path):        
        dst = str(i) + "_" + class_name + ".jpg"
        src = path + "\\" + filename 
        dst = path + "\\" + dst 
        os.rename(src, dst) 
        i += 1
        
# Train, Validation, Test Sizes:
train_size = 300
validation_size = train_size + 100
test_size = validation_size + 100

# EOSINOPHIL:
original_dataset_dir = 'C:\\Users\\admin\\Desktop\\GitHub\\DNN\\Datasets\\Blood_cells\\EOSINOPHIL'
change_files_names(original_dataset_dir, "Eosinophil")
fnames = ['{}_Eosinophil.jpg'.format(i) for i in range(train_size)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(train_eosinophil_dir, fname)
    shutil.copyfile(src, dst)
print(datetime.datetime.now(), "EOSINOPHIL train dataset created successfully", ": number of observations =", len(os.listdir(train_eosinophil_dir)))

fnames = ['{}_Eosinophil.jpg'.format(i) for i in range(train_size, validation_size)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(validation_eosinophil_dir, fname)
    shutil.copyfile(src, dst)
print(datetime.datetime.now(), "EOSINOPHIL validation dataset created successfully", ": number of observations =", len(os.listdir(validation_eosinophil_dir)))

fnames = ['{}_Eosinophil.jpg'.format(i) for i in range(validation_size, test_size)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(test_eosinophil_dir, fname)
    shutil.copyfile(src, dst)
print(datetime.datetime.now(), "EOSINOPHIL test dataset created successfully", ": number of observations =", len(os.listdir(test_eosinophil_dir)))
    
# LYMPHOCYTE:
original_dataset_dir = 'C:\\Users\\admin\\Desktop\\GitHub\\DNN\\Datasets\\Blood_cells\\LYMPHOCYTE'
change_files_names(original_dataset_dir, "Lymphocyte")
fnames = ['{}_Lymphocyte.jpg'.format(i) for i in range(train_size)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(train_lymphocyte_dir, fname)
    shutil.copyfile(src, dst)
print(datetime.datetime.now(), "LYMPHOCYTE train dataset created successfully", ": number of observations =", len(os.listdir(train_lymphocyte_dir)))

fnames = ['{}_Lymphocyte.jpg'.format(i) for i in range(train_size, validation_size)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(validation_lymphocyte_dir, fname)
    shutil.copyfile(src, dst)
print(datetime.datetime.now(), "LYMPHOCYTE validation dataset created successfully", ": number of observations =", len(os.listdir(validation_lymphocyte_dir)))

fnames = ['{}_Lymphocyte.jpg'.format(i) for i in range(validation_size, test_size)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(test_lymphocyte_dir, fname)
    shutil.copyfile(src, dst)
print(datetime.datetime.now(), "LYMPHOCYTE test dataset created successfully", ": number of observations =", len(os.listdir(test_lymphocyte_dir)))
    
# MONOCYTE:
original_dataset_dir = 'C:\\Users\\admin\\Desktop\\GitHub\\DNN\\Datasets\\Blood_cells\\MONOCYTE'
change_files_names(original_dataset_dir, "Monocyte")
fnames = ['{}_Monocyte.jpg'.format(i) for i in range(train_size)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(train_monocyte_dir, fname)
    shutil.copyfile(src, dst)
print(datetime.datetime.now(), "MONOCYTE train dataset created successfully", ": number of observations =", len(os.listdir(train_monocyte_dir)))

fnames = ['{}_Monocyte.jpg'.format(i) for i in range(train_size, validation_size)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(validation_monocyte_dir, fname)
    shutil.copyfile(src, dst)
print(datetime.datetime.now(), "MONOCYTE validation dataset created successfully", ": number of observations =", len(os.listdir(validation_monocyte_dir)))

fnames = ['{}_Monocyte.jpg'.format(i) for i in range(validation_size, test_size)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(test_monocyte_dir, fname)
    shutil.copyfile(src, dst)
print(datetime.datetime.now(), "MONOCYTE test dataset created successfully", ": number of observations =", len(os.listdir(test_monocyte_dir)))
    
# NEUTROPHIL:
original_dataset_dir = 'C:\\Users\\admin\\Desktop\\GitHub\\DNN\\Datasets\\Blood_cells\\NEUTROPHIL'
change_files_names(original_dataset_dir, "Neutrophil")
fnames = ['{}_Neutrophil.jpg'.format(i) for i in range(train_size)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(train_neutrophil_dir, fname)
    shutil.copyfile(src, dst)
print(datetime.datetime.now(), "NEUTROPHIL train dataset created successfully", ": number of observations =", len(os.listdir(train_neutrophil_dir)))

fnames = ['{}_Neutrophil.jpg'.format(i) for i in range(train_size, validation_size)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(validation_neutrophil_dir, fname)
    shutil.copyfile(src, dst)
print(datetime.datetime.now(), "NEUTROPHIL validation dataset created successfully", ": number of observations =", len(os.listdir(validation_neutrophil_dir)))
    
fnames = ['{}_Neutrophil.jpg'.format(i) for i in range(validation_size, test_size)]
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(test_neutrophil_dir, fname)
    shutil.copyfile(src, dst)
print(datetime.datetime.now(), "NEUTROPHIL test dataset created successfully", ": number of observations =", len(os.listdir(test_neutrophil_dir)))

2019-11-17 18:45:35.577906 EOSINOPHIL train dataset created successfully : number of observations = 300
2019-11-17 18:45:35.654700 EOSINOPHIL validation dataset created successfully : number of observations = 100
2019-11-17 18:45:35.733490 EOSINOPHIL test dataset created successfully : number of observations = 100
2019-11-17 18:45:36.317926 LYMPHOCYTE train dataset created successfully : number of observations = 300
2019-11-17 18:45:36.394721 LYMPHOCYTE validation dataset created successfully : number of observations = 100
2019-11-17 18:45:36.473510 LYMPHOCYTE test dataset created successfully : number of observations = 100
2019-11-17 18:45:37.040992 MONOCYTE train dataset created successfully : number of observations = 300
2019-11-17 18:45:37.116790 MONOCYTE validation dataset created successfully : number of observations = 100
2019-11-17 18:45:37.189595 MONOCYTE test dataset created successfully : number of observations = 100
2019-11-17 18:45:37.748101 NEUTROPHIL train dataset create

# VGG16 architecture & generators

In [3]:
##########
# Parameters:
train_dir = "C:\\Users\\admin\\Desktop\\GitHub\\DNN\\Datasets\\Blood_cells\\DATA\\Train"
validation_dir = "C:\\Users\\admin\\Desktop\\GitHub\\DNN\\Datasets\\Blood_cells\\DATA\\Validation"
test_dir = "C:\\Users\\admin\\Desktop\\GitHub\\DNN\\Datasets\\Blood_cells\\DATA\\Test"

input_shape = 150 # originally 224
canales = 3
batch_size = 8
num_classes = len(os.listdir(train_dir))
epochs = 10
optimizer = "adam"
loss = "categorical_crossentropy"
metrics = ["accuracy"]
class_mode = "categorical"

def count_observations(directory):
    i = 0
    files = 0
    for folder in os.listdir(directory):
        directory_2 = directory + "\\" + folder
        count = sum([len(files) for r, d, files in os.walk(directory_2)])
        files = files + count
    return(files)

train_images_count = count_observations(train_dir)  
validation_images_count = count_observations(validation_dir)
test_images_count = count_observations(test_dir)
train_steps_per_epoch = math.ceil(train_images_count/batch_size)
validation_steps_per_epoch = math.ceil(validation_images_count/batch_size)
test_steps_per_epoch = math.ceil(test_images_count/batch_size)

##########
# VGG16 architecture:
VGG16 = models.Sequential()

# Conv 1-2:
VGG16.add(layers.Conv2D(filters = 64, kernel_size = (3, 3), strides = (1, 1), activation = "linear", padding = "same",
                        input_shape = (input_shape, input_shape, canales)))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.Conv2D(filters = 64, kernel_size = (3, 3), strides = (1, 1), activation = "linear", padding = "same"))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.MaxPool2D(pool_size = (2, 2), strides = (2, 2), padding = "valid"))

# Conv 3-4:
VGG16.add(layers.Conv2D(filters = 128, kernel_size = (3, 3), strides = (1, 1), activation = "linear", padding = "same"))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.Conv2D(filters = 128, kernel_size = (3, 3), strides = (1, 1), activation = "linear", padding = "same"))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.MaxPool2D(pool_size = (2, 2), strides = (2, 2), padding = "valid"))

# Conv 5-7:
VGG16.add(layers.Conv2D(filters = 256, kernel_size = (3, 3), strides = (1, 1), activation = "linear", padding = "same"))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.Conv2D(filters = 256, kernel_size = (3, 3), strides = (1, 1), activation = "linear", padding = "same"))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.Conv2D(filters = 256, kernel_size = (3, 3), strides = (1, 1), activation = "linear", padding = "same"))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.MaxPool2D(pool_size = (2, 2), strides = (2, 2), padding = "valid"))

# Conv 8-10:
VGG16.add(layers.Conv2D(filters = 512, kernel_size = (3, 3), strides = (1, 1), activation = "linear", padding = "same"))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.Conv2D(filters = 512, kernel_size = (3, 3), strides = (1, 1), activation = "linear", padding = "same"))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.Conv2D(filters = 512, kernel_size = (3, 3), strides = (1, 1), activation = "linear", padding = "same"))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.MaxPool2D(pool_size = (2, 2), strides = (2, 2), padding = "valid"))

# Conv 11-13:
VGG16.add(layers.Conv2D(filters = 512, kernel_size = (3, 3), strides = (1, 1), activation = "linear", padding = "same"))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.Conv2D(filters = 512, kernel_size = (3, 3), strides = (1, 1), activation = "linear", padding = "same"))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.Conv2D(filters = 512, kernel_size = (3, 3), strides = (1, 1), activation = "linear", padding = "same"))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.MaxPool2D(pool_size = (2, 2), strides = (2, 2), padding = "valid"))

# Dense: 14-15
VGG16.add(layers.Flatten())
VGG16.add(layers.Dense(units = 128, activation = "linear"))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.Dropout(rate = 0.5))
VGG16.add(layers.Dense(units = 128, activation = "linear"))
VGG16.add(layers.BatchNormalization())
VGG16.add(layers.Activation("relu"))
VGG16.add(layers.Dropout(rate = 0.5))
print(VGG16.summary())

# Dense: 16
VGG16.add(layers.Dense(units = num_classes, activation = "softmax"))

##########
# Model compilation:
VGG16.compile(optimizer = optimizer,
              loss = loss,
              metrics = metrics)

#os.chdir("C:\\Users\\admin\\Desktop\\GitHub\\DNN\\Datasets\\Blood_cells")
#os.mkdir(os.path.join(os.getcwd(), 'Models'))
#os.chdir(os.path.join(os.getcwd(), 'Models'))
os.chdir("C:\\Users\\admin\\Desktop\\GitHub\\DNN\\Datasets\\Blood_cells\\Models")

checkpoint = callbacks.ModelCheckpoint(
    filepath = os.path.join(os.getcwd(), "VGG16-weights-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5"),
    monitor = 'val_accuracy',
    verbose = 1, 
    save_best_only = True,
    mode = 'max')
callbacks_list = [checkpoint]

##########
# Generators:
train_datagen = ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 45, 
    width_shift_range = 0.1,
    height_shift_range = 0.1, 
    shear_range = 0.1,
    zoom_range = 0.1,
    horizontal_flip = True, 
    vertical_flip = True,
    fill_mode = "nearest")
train_generator = train_datagen.flow_from_directory(
    directory = train_dir, 
    target_size = (input_shape, input_shape), 
    batch_size = batch_size,
    class_mode = class_mode,
    shuffle = True)

validation_datagen = ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 45, 
    width_shift_range = 0.1,
    height_shift_range = 0.1, 
    shear_range = 0.1,
    zoom_range = 0.1,
    horizontal_flip = True, 
    vertical_flip = True,
    fill_mode = "nearest")
validation_generator = validation_datagen.flow_from_directory(
    directory = validation_dir, 
    target_size = (input_shape, input_shape), 
    batch_size = batch_size,
    class_mode = class_mode,
    shuffle = True)

test_datagen = ImageDataGenerator(rescale = 1./255)
test_generator = test_datagen.flow_from_directory(
    directory = test_dir, 
    target_size = (input_shape, input_shape), 
    batch_size = batch_size,
    class_mode = class_mode,
    shuffle = True)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 150, 150, 64)      1792      
_________________________________________________________________
batch_normalization_16 (Batc (None, 150, 150, 64)      256       
_________________________________________________________________
activation_16 (Activation)   (None, 150, 150, 64)      0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 150, 150, 64)      36928     
_________________________________________________________________
batch_normalization_17 (Batc (None, 150, 150, 64)      256       
_________________________________________________________________
activation_17 (Activation)   (None, 150, 150, 64)      0         
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 75, 75, 64)       

In [4]:
##########
# Fit model:
history = VGG16.fit_generator(
    generator = train_generator,
    steps_per_epoch = train_steps_per_epoch,
    epochs = epochs,
    validation_data = validation_generator,
    validation_steps = validation_steps_per_epoch, 
    callbacks = callbacks_list)

Epoch 1/10

Epoch 00001: val_accuracy improved from -inf to 0.25000, saving model to C:\Users\admin\Desktop\GitHub\DNN\Datasets\Blood_cells\Models\VGG16-weights-improvement-01-0.25.hdf5
Epoch 2/10

Epoch 00002: val_accuracy did not improve from 0.25000
Epoch 3/10

Epoch 00003: val_accuracy did not improve from 0.25000
Epoch 4/10

Epoch 00004: val_accuracy did not improve from 0.25000
Epoch 5/10

Epoch 00005: val_accuracy did not improve from 0.25000
Epoch 6/10

Epoch 00006: val_accuracy improved from 0.25000 to 0.30750, saving model to C:\Users\admin\Desktop\GitHub\DNN\Datasets\Blood_cells\Models\VGG16-weights-improvement-06-0.31.hdf5
Epoch 7/10

Epoch 00007: val_accuracy improved from 0.30750 to 0.34250, saving model to C:\Users\admin\Desktop\GitHub\DNN\Datasets\Blood_cells\Models\VGG16-weights-improvement-07-0.34.hdf5
Epoch 8/10

Epoch 00008: val_accuracy improved from 0.34250 to 0.37250, saving model to C:\Users\admin\Desktop\GitHub\DNN\Datasets\Blood_cells\Models\VGG16-weights-impr

'C:\\Users\\admin\\Desktop\\GitHub\\DNN\\Datasets\\Blood_cells\\Models'