<a href="https://www.kaggle.com/code/george1128/blood-cell-identification-with-images?scriptVersionId=272522325" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

#import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#"This work is using the 'Blood cell images' to show the reusable code for image automentation and pipieline of deep learning. The dataset is from Paul Mooney. https://www.kaggle.com/datasets/paultimothymooney/blood-cells"

In [None]:
#import models
import sklearn
import itertools
import cv2
import scipy
import os
import csv
import matplotlib.pyplot as plt
from keras.optimizers import RMSprop
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, MaxPool2D
from keras.layers import Lambda, BatchNormalization
#import numpy as np # linear algebra
#import pandas as pd


In [None]:
#reads a blood cell image and its corresponding annotation XML file
def annotate_image(image_path, xml_path):
    image = cv2.imread(image_path)
    tree = ET.parse(xml_path)

    for elem in tree.iter():
        if 'object' in elem.tag or 'part' in elem.tag:
            name, xmin, ymin, xmax, ymax = None, None, None, None, None
            for attr in list(elem):
                if 'name' in attr.tag:
                    name = attr.text
                if 'bndbox' in attr.tag:
                    coords = {dim.tag: int(round(float(dim.text))) for dim in list(attr)}
                    xmin, ymin, xmax, ymax = coords['xmin'], coords['ymin'], coords['xmax'], coords['ymax']

            if name:
                color = (0, 255, 0) if name[0] == "R" else (0, 0, 255) if name[0] == "W" else (255, 0, 0)
                cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 1)
                cv2.putText(image, name, (xmin + 10, ymin + 15),
                            cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * image.shape[0], color, 1)
    return image

In [None]:
import xml.etree.ElementTree as ET
image_path = "/kaggle/input/blood-cells/dataset-master/dataset-master/JPEGImages/BloodImage_00010.jpg"
xml_path = "/kaggle/input/blood-cells/dataset-master/dataset-master/Annotations/BloodImage_00010.xml"
annotated_image = annotate_image(image_path, xml_path)

plt.figure(figsize=(16, 16))
plt.imshow(annotated_image)
plt.axis('off')
plt.show()

In [None]:
#read the labels
labels_path ="/kaggle/input/blood-cells/dataset2-master/dataset2-master/labels.csv"
labels = pd.read_csv(labels_path)

In [None]:
labels.head()

In [None]:
# Define augment images
import albumentations as A

augmenter = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=15, p=0.5),
    A.GaussianBlur(blur_limit=3, p=0.3),
    A.RandomBrightnessContrast(p=0.5),
    A.Normalize(mean=(0.5,), std=(0.5,))  # Optional for model input
])

def augment_image(image):
    augmented = augmenter(image=image)
    return augmented['image']

In [None]:
#read the images and labele
from tqdm import tqdm
def get_images_labels(folder):
    X=[]
    y =[]
    wbc_label_map = {
    'NEUTROPHIL': 1,
    'EOSINOPHIL':2,
    'MONOCYTE': 3,
    'LYMPHOCYTE': 4
}

    for wbc_type in os.listdir(folder):
        if not wbc_type.startswith('.'):
            label = wbc_label_map.get(wbc_type, 5)
            for image_filename in tqdm(os.listdir(os.path.join(folder, wbc_type))):
                image_path = os.path.join(folder, wbc_type, image_filename)
                img_file = cv2.imread(image_path)

                if img_file is not None:
                    resized_img = cv2.resize(img_file, (80, 60))  # (width, height)
                    rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
                    image_aug = augment_image(rgb_img)
                    X.append(image_aug)
                    y.append(label)
    
    X=np.asarray(X)
    y=np.asarray(y)
    return X, y
        


In [None]:
train_folder ="/kaggle/input/blood-cells/dataset2-master/dataset2-master/images/TRAIN/"
test_folder = "/kaggle/input/blood-cells/dataset2-master/dataset2-master/images/TEST/"
X_train, y_train = get_images_labels(train_folder)
X_test, y_test = get_images_labels(test_folder)


In [None]:
# nornalize the X-Train, X-test for RGB images
X_train = X_train / 255.0
X_test = X_test / 255.0

In [None]:
print("X_train shape:", np.shape(X_train))
print("y_train shape:", np.shape(y_train))
print("X_test shape:", np.shape(X_test))
print("y_test shape:", np.shape(y_test))

In [None]:
#CNN model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
num_classes = 5 
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(60, 80, 3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

In [None]:
#define Confusion Matrix Callback
from tensorflow.keras.callbacks import Callback
from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

class ConfusionMatrixLogger(Callback):
    def __init__(self, X_val, y_val, label_names, interval=5):
        super().__init__()
        self.X_val = X_val
        self.y_val = y_val
        self.label_names = label_names
        self.interval = interval

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.interval == 0:
            y_pred = self.model.predict(self.X_val)
            y_pred_classes = np.argmax(y_pred, axis=1)
            y_true = np.argmax(self.y_val, axis=1) if len(self.y_val.shape) > 1 else self.y_val

            cm = confusion_matrix(y_true, y_pred_classes)
            print(f"\nConfusion Matrix at Epoch {epoch+1}:\n{cm}")

            # Optional: plot the matrix
            plt.figure(figsize=(6, 5))
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                        xticklabels=self.label_names,
                        yticklabels=self.label_names)
            plt.xlabel('Predicted')
            plt.ylabel('True')
            plt.title(f'Confusion Matrix at Epoch {epoch+1}')
            plt.show()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
label_names = ['NEUTROPHIL', 'EOSINOPHIL', 'MONOCYTE', 'LYMPHOCYTE', 'OTHER']
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1),
    ConfusionMatrixLogger(X_test, y_test, label_names, interval=5)  # your custom callback
]

In [None]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    epochs=50,
                    batch_size=32,
                    callbacks=callbacks)

In [None]:
#view the loss and val-loss
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Model Loss Over Epochs')
plt.legend()
plt.grid(True)
plt.show()