## Dataset
1. https://github.com/zxaoyou/segmentation_WBC
2. https://raabindata.com/free-data/

In [11]:
import csv
import sys

import tensorflow as tf
from keras import layers, models

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Conv2D, Flatten, Dropout, MaxPooling2D, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from PIL import Image
import os
import csv
import numpy as np
# import matplotlib.pyplot as plt
import pandas as pd
import shutil
import logging
import cv2
import matplotlib.pyplot as plt
import skimage.morphology as morph


import multiprocessing as mp
mp.set_start_method('spawn', force=True)

In [12]:
# Configuration
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
force = False # Force the image preprocessing again
types = ['Basophil', 'Eosinophil', 'Lymphocyte', 'Monocyte', 'Neutrophil']
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

raw_train = 'data/raw/train/'
raw_test = 'data/raw/test/'

train_folder = 'data/train'
validation_folder = 'data/validation'
test_folder = 'data/test'

# percentage of each cell present in each dataset

train_size = 0.70
validation_size = 0.15
test_size = 0.15

if tf.test.gpu_device_name():
    print('GPU found')
    tf.config.experimental.set_virtual_device_configuration(
       tf.config.list_physical_devices('GPU')[0],
       [
           tf.config.experimental.VirtualDeviceConfiguration(memory_limit=8192)
        ])
    tf.config.experimental.set_memory_growth(tf.config.list_physical_devices('GPU')[0], True)
else:
    print("No GPU found")


No GPU found


In [13]:
def create_generator(folder, subset):
    df = pd.read_csv(folder + '/class.csv', dtype=str, delimiter=',')
    data = ImageDataGenerator(rescale=1./255.)
    return data.flow_from_dataframe(
        dataframe=df,
        directory=folder,
        x_col='Image',
        y_col='Id',
        subset=subset,
        batch_size=128,
        shuffle=True,
        class_mode='categorical',
        target_size=(128, 128),
        validation_split=0.25
    )

def resize(cell, open_file=False):
    if open_file:
        cell = cv2.imread(cell)
    shape = cell.shape
    # Cropping the picture
    cell = np.delete(cell, np.s_[shape[0] - 150:shape[0]], axis=0)
    cell = np.delete(cell, np.s_[0:150], axis=0)
    cell = np.delete(cell, np.s_[shape[0] - 150:shape[0]], axis=1)
    cell = np.delete(cell, np.s_[0:150], axis=1)

    # resizing
    cell = cv2.resize(cell, (128, 128), interpolation=cv2.INTER_AREA)
    ori = np.copy(cell)
    cell = np.dot(cell[...,:3], [0.2999, 0.587, 0.114])
    cell = cell > 100
    return cell, ori

def get_core(cell, ori=None):
    if ori is None:
        cell, ori = cell
    shape = np.uint8(np.invert(morph.remove_small_holes(cell, 1024)))
    # plt.imshow(shape, cmap='gray')
    zeros = np.zeros((128, 128))
    shape = np.stack((zeros + shape, shape, zeros + shape), axis=2)
    return np.uint8(ori * shape)

In [14]:
logging.info("Starting program...")
train_csv = None

if not os.path.isdir('data/train') or force is True:

    logging.warning("No train folder detected...")
    logging.info("Creating folders...")


    if not os.path.isdir(train_folder):
        os.mkdir(train_folder)
        os.mkdir(test_folder)

    # Load Train
    train_csv = open(train_folder + '/class.csv', 'w', newline='')
    test_csv = open(test_folder + '/class.csv', 'w', newline='')
    train_writer = csv.writer(train_csv)
    test_writer = csv.writer(test_csv)
    train_writer.writerow(['Image', 'Id'])
    test_writer.writerow(['Image', 'Id'])
    i = 1
    for c in types:
        logging.info(c)
        folder_size = len([name for name in os.listdir(raw_train + c) if os.path.isfile(name)])
        count = 0
        for f in os.listdir(raw_train + c):
            # if os.path.isfile('data/' + c):
            p = get_core(resize(raw_train + c + '/' + f, open_file=True))
            cv2.imwrite(train_folder + '/' + f, p)
            # shutil.copy(raw_train + c + '/' + f, train_folder + '/' + f)
            train_writer.writerow([f, i])
            count += 1
            # if train_size >= folder_size * train_size:
            #     break
        count = 0
        for f in os.listdir(raw_test + c):
            # if os.path.isfile('data/' + c):
            p = get_core(resize(raw_test + c + '/' + f, open_file=True))
            cv2.imwrite(test_folder + '/' + f, p)
            # shutil.copy(raw_train + c + '/' + f, train_folder + '/' + f)
            test_writer.writerow([f, i])
            # shutil.copy(raw_test + c + '/' + f, test_folder + '/' + f)
            # test_writer.writerow([f, i])
        i += 1
    train_csv.close()
    test_csv.close()


"""
    with open(dataset_folder + '.csv', 'r') as f:
        reader = csv.reader(f)
        for line in reader:
            classification.append(line[1])
    if not os.path.isdir('data/train'):
        os.mkdir(train_folder)
        os.mkdir(validation_folder)
        os.mkdir(test_folder)

    i = 1
    file = None
    writer = None
    current = train_folder
    for f in os.listdir(dataset_folder):
        if i == 1:
            logging.info("Creating test dataset...")
            file = open(train_folder + '/class.csv', 'w', newline='')
            writer = csv.writer(file)
            writer.writerow(['Image', 'Id'])
        elif i == train_size :
            logging.info("Creating validation dataset...")
            file.close()
            file = open(validation_folder + '/class.csv', 'w', newline='')
            current = validation_folder
            writer = csv.writer(file)
            writer.writerow(['Image', 'Id'])
        elif i == train_size + validation_size:
            logging.info("Creating test dataset...")
            file.close()
            file = open(test_folder + '/class.csv', 'w', newline='')
            current = test_folder
            writer = csv.writer(file)
            writer.writerow(['Image', 'Id'])

        if os.path.isfile(dataset_folder + '/' + f) and 'bmp' in f:
            gray_scale(dataset_folder + '/' + f, current + '/' + f)
            writer.writerow([f, classification[i]])
            i += 1
    file.close()
    logging.info("Creation complete!")
    print(classification)
else:
    logging.info("Train folder detected!")
"""
logging.info("Starting WBC classification...")

INFO:Starting program...
INFO:Starting WBC classification...


In [15]:
logging.info("Creating generator...")
train_generator = create_generator(train_folder, 'training')
# validation_generator = create_generator(validation_folder, 'validation')
test_generator = create_generator(test_folder, 'validation')

INFO:Creating generator...


Found 10175 validated image filenames belonging to 5 classes.
Found 0 validated image filenames belonging to 5 classes.


In [7]:
logging.info("Training...")
model = models.Sequential()
model.add(layers.Conv2D(128, activation='relu', input_shape=(128, 128, 3), kernel_size=(5, 5), strides=1))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(256, (5, 5), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(512, (5, 5), activation='relu'))
model.add(layers.Dropout(0.25))
model.add(Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(5, activation='softmax'))
model.compile(optimizer='adam', metrics=['accuracy'], loss=tf.keras.losses.categorical_crossentropy)

INFO:Training...


In [8]:
model.fit(train_generator, validation_data=train_generator, epochs=10, verbose=2)
model.save('model')

Epoch 1/10
80/80 - 730s - loss: 1.2175 - accuracy: 0.7290 - val_loss: 0.4757 - val_accuracy: 0.8370 - 730s/epoch - 9s/step
Epoch 2/10
80/80 - 721s - loss: 0.3575 - accuracy: 0.8798 - val_loss: 0.2961 - val_accuracy: 0.9017 - 721s/epoch - 9s/step
Epoch 3/10
80/80 - 722s - loss: 0.3067 - accuracy: 0.8957 - val_loss: 0.2770 - val_accuracy: 0.9036 - 722s/epoch - 9s/step
Epoch 4/10
80/80 - 726s - loss: 0.2795 - accuracy: 0.9034 - val_loss: 0.2358 - val_accuracy: 0.9206 - 726s/epoch - 9s/step
Epoch 5/10
80/80 - 801s - loss: 0.2429 - accuracy: 0.9169 - val_loss: 0.2163 - val_accuracy: 0.9265 - 801s/epoch - 10s/step
Epoch 6/10
80/80 - 721s - loss: 0.2144 - accuracy: 0.9225 - val_loss: 0.1509 - val_accuracy: 0.9497 - 721s/epoch - 9s/step
Epoch 7/10
80/80 - 730s - loss: 0.1814 - accuracy: 0.9341 - val_loss: 0.1297 - val_accuracy: 0.9572 - 730s/epoch - 9s/step
Epoch 8/10
80/80 - 740s - loss: 0.1419 - accuracy: 0.9493 - val_loss: 0.0904 - val_accuracy: 0.9701 - 740s/epoch - 9s/step
Epoch 9/10
80/8



INFO:tensorflow:Assets written to: model\assets


INFO:Assets written to: model\assets


In [9]:
print(test_generator.class_indices)

{'1': 0, '2': 1, '3': 2, '4': 3, '5': 4}


In [16]:
score = model.evaluate(train_generator)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

13/80 [===>..........................] - ETA: 2:12 - loss: 0.0372 - accuracy: 0.9886


KeyboardInterrupt



In [5]:
from tensorflow.python.client import device_lib
def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos]
print(get_available_devices()) 
print("test")

['/device:CPU:0', '/device:GPU:0']
test


In [1]:
import tensorflow as tf
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]