# WBC Object Detection

In this Notebook I have created the model for Detecting th WBCs and it's types and create Bounding Boes around them, Using Yolo11

* **My linkedin id** : [saurabh-maulekhi](https://www.linkedin.com/in/saurabh-maulekhi-326584241/)

* [**YOLO V1 Paper Reimplimentation**](https://www.kaggle.com/code/saurabhmaulekhi/yolo-2017-v1-paper-reimplementation)

* [**Model Web Deployment**](https://huggingface.co/spaces/saurabh091/WBC_Detection_Using_Yolo11)

* [**Github Repo**](https://github.com/Saurabh-Maulekhi/WBCs-s-Object-Detection)

* [**Get my WBCs Object Detection Model**](https://www.kaggle.com/models/saurabhmaulekhi/wbc_detection_with_yolo11)

## Importing Libraries

In [None]:
# Install ultralytics
!pip -q install  ultralytics

In [None]:
# Import libraries
import pandas as pd
import os
from pathlib import Path
import shutil
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
import cv2
import yaml
import matplotlib.pyplot as plt
from ultralytics import YOLO
import multiprocessing

## Data Collection and Directories Organisation for training

In [None]:
# Path to where your data is stored
DATA_DIR = Path('/kaggle/input/cytologia-data-challenge')

# Preview data files available
os.listdir(DATA_DIR)

In [None]:
# Set up directoris for training a yolo model

# Training,Val,Test directories
DATASET_DIR = Path('/kaggle/working')

TRAIN_DIR = DATASET_DIR/"train"
VAL_DIR = DATASET_DIR/"val"
TEST_DIR = DATASET_DIR/"test"

## images directories
TRAIN_IMAGES_DIR = TRAIN_DIR/"images"
VAL_IMAGES_DIR = VAL_DIR/"images"
TEST_IMAGES_DIR = TEST_DIR/"images"

## label directories
TRAIN_LABELS_DIR = TRAIN_DIR/"labels"
VAL_LABELS_DIR = VAL_DIR/"labels"
TEST_LABELS_DIR = TEST_DIR/"labels"

In [None]:
# Creqting Directories

os.mkdir(TRAIN_DIR)
os.mkdir(VAL_DIR)
os.mkdir(TEST_DIR)

os.mkdir(TRAIN_IMAGES_DIR)
os.mkdir(VAL_IMAGES_DIR)
os.mkdir(TEST_IMAGES_DIR)

os.mkdir(TRAIN_LABELS_DIR)
os.mkdir(VAL_LABELS_DIR)
os.mkdir(TEST_LABELS_DIR)

## Data Overview

In [None]:
# Load train and test files
train = pd.read_csv('/kaggle/input/cytologia-data-challenge/cytologia-data.csv')
test = pd.read_csv('/kaggle/input/cytologia-data-challenge/cytologia-data Test .csv')

# Add an image_path column
OG_IMAGES_DIR  = Path("/kaggle/input/cytologia-data-challenge/images/dataset_cytologia")
train['image_path'] = [Path( OG_IMAGES_DIR / x) for x in train.NAME]
test['image_path'] = [Path( OG_IMAGES_DIR / x) for x in test.NAME]

# Creating dictionary of key->class_name and value->unique_integer
class_list = train["class"].unique()
class_len = len(class_list)
CLASS_DICT = {class_list[x]: x for x in range(0,class_len)}

# Map str classes to ints (label encoding targets)
train['class_id'] = train['class'].map(CLASS_DICT)

# Preview the head of the train set
train.head()

In [None]:
test.head()

## Data Splitting and Data Transformation

In [None]:
# Split data into training and validation
train_unique_imgs_df = train.drop_duplicates(subset = ['NAME'], ignore_index = True)
X_train, X_val = train_test_split(train_unique_imgs_df, test_size = 0.25, stratify=train_unique_imgs_df['class'], random_state=42)

X_train = train[train.NAME.isin(X_train.NAME)]
X_val = train[train.NAME.isin(X_val.NAME)]

# Check shapes of training and validation data
X_train.shape, X_val.shape

In [None]:
X_train.head(10)

In [None]:
# Preview target distribution, seems there a class imbalance that needs to be handled
X_train['class'].value_counts(normalize = True), X_val['class'].value_counts(normalize = True)

In [None]:
# Copying Train, Val, Test images from input to their respective working directories

for img in tqdm(X_train.image_path.unique()):
  shutil.copy(img, TRAIN_IMAGES_DIR / img.parts[-1])

for img in tqdm(X_val.image_path.unique()):
  shutil.copy(img, VAL_IMAGES_DIR / img.parts[-1])

for img in tqdm(test.image_path.unique()):
  shutil.copy(img, TEST_IMAGES_DIR / img.parts[-1])

In [None]:
print("TRAIN_IMAGES_DIR: ", len(os.listdir(TRAIN_IMAGES_DIR)))
print("VAL_IMAGES_DIR: ", len(os.listdir(VAL_IMAGES_DIR)))
print("TEST_IMAGES_DIR: ", len(os.listdir(TEST_IMAGES_DIR)))

## Creating yaml file

In [None]:
# Function to convert the bboxes to yolo format and save them
def save_yolo_annotation(row):
    image_path, class_id, output_dir = row['image_path'], row['class_id'], row['output_dir']

    img = cv2.imread(image_path)
    if img is None:
        raise ValueError(f"Could not read image from path: {image_path}")

    height, width, _ = img.shape

    label_file = Path(output_dir) / f"{Path(image_path).stem}.txt"

    ymin, xmin, ymax, xmax = row['y1'], row['x1'], row['y2'], row['x2']

    # Normalize the coordinates
    x_center = (xmin + xmax) / 2 / width
    y_center = (ymin + ymax) / 2 / height
    bbox_width = (xmax - xmin) / width
    bbox_height = (ymax - ymin) / height

    with open(label_file, 'a') as f:
        f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {bbox_width:.6f} {bbox_height:.6f}\n")

# Parallelize the annotation saving process
def process_dataset(dataframe, output_dir):
    dataframe['output_dir'] = output_dir
    with multiprocessing.Pool() as pool: ##to apply the save_yolo_annotation function to each row of the DataFrame in parallel.
        list(tqdm(pool.imap(save_yolo_annotation, dataframe.to_dict('records')), total=len(dataframe)))

# Save train and validation labels to their respective dirs
process_dataset(X_train, TRAIN_LABELS_DIR)
process_dataset(X_val, VAL_LABELS_DIR)

In [None]:
label = os.listdir('/kaggle/working/train/labels')[10]
label_file = open('/kaggle/working/train/labels/'+ label)
label_file.read()

In [None]:
X_train.head()

In [None]:
# Train images dir, Train labels dir
TRAIN_IMAGES_DIR, TRAIN_LABELS_DIR

In [None]:
# Create a data.yaml file required by yolo
class_names = train['class'].unique().tolist()
num_classes = len(class_names)

data_yaml = {
    'train': str(TRAIN_DIR),
    'val': str(VAL_DIR),
    'test': str(TEST_DIR),
    
    'nc': num_classes,
    
    'names': class_names
}

yaml_path = 'data.yaml'
with open(yaml_path, 'w') as file:
    yaml.dump(data_yaml, file, default_flow_style=False)

# Preview data yaml file
data_yaml

In [None]:
# Plot some images and their bboxes to ensure the conversion was done correctly
def load_annotations(label_path):
    with open(label_path, 'r') as f:
        lines = f.readlines()
    boxes = []
    for line in lines:
        class_id, x_center, y_center, width, height = map(float, line.strip().split())
        boxes.append((class_id, x_center, y_center, width, height))
    return boxes

# Function to plot an image with its bounding boxes
def plot_image_with_boxes(image_path, boxes):
    # Load the image
    image = cv2.imread(str(image_path))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Get image dimensions
    h, w, _ = image.shape

    # Plot the image
    plt.figure(figsize=(10, 10))
    plt.imshow(image)

    # Plot each bounding box
    for box in boxes:
        class_id, x_center, y_center, width, height = box
        # Convert YOLO format to corner coordinates
        xmin = int((x_center - width / 2) * w)
        ymin = int((y_center - height / 2) * h)
        xmax = int((x_center + width / 2) * w)
        ymax = int((y_center + height / 2) * h)

        # Draw the bounding box
        plt.gca().add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                          edgecolor='red', facecolor='none', linewidth=2))
        plt.text(xmin, ymin - 10, f'Class {int(class_id)}', color='red', fontsize=12, weight='bold')

    plt.axis('off')
    plt.show()

# Directories for images and labels
IMAGE_DIR = TRAIN_IMAGES_DIR
LABEL_DIR = TRAIN_LABELS_DIR

# Plot a few images with their annotations
for image_name in os.listdir(TRAIN_IMAGES_DIR)[:3]:
    image_path = IMAGE_DIR / image_name
    label_path = LABEL_DIR / (image_name.replace('.jpg', '.txt').replace('.png', '.txt'))

    # if label_path.exists():
    boxes = load_annotations(label_path)
    print(f"Plotting {image_name} with {len(boxes)} bounding boxes.")
    plot_image_with_boxes(image_path, boxes)
    # else:
        # print(f"No annotations found for {image_name}.")


## Model Training

In [None]:
# Load a yolo pretrained model
model = YOLO('yolo11m.pt')

# Fine tune model to our data
model.train(
    data='data.yaml',          # Path to the dataset configuration
    epochs=30,                 # Number of epochs
    imgsz=640,                # Image size (height, width)
    batch=8,                   # Batch size
    device=(0,1),                  # Device to use (0 for the first GPU, 1 for the second GPU)
    patience=5)

## Testing Model

In [None]:
## Output Directory list 

os.listdir('/kaggle/working/runs/detect/train/weights')

In [None]:
# Validate the model on the validation set
model = YOLO('/kaggle/working/runs/detect/train/weights/best.pt') # model path
results = model.val()

## Predting all test images Detection and Detection csv file

In [None]:
# Load the trained YOLO model
model = YOLO('/kaggle/working/runs/detect/train/weights/best.pt')

# Path to the test images directory
test_dir_path = TEST_IMAGES_DIR

# Get a list of all image files in the test directory
image_files = os.listdir(test_dir_path)

# Initialize an empty list to store the results for all images
all_data = []

# Iterate through each image in the directory
for image_file in tqdm(image_files):
    # Full path to the image
    img_path = os.path.join(test_dir_path, image_file)

    # Make predictions on the image
    results = model(img_path)

    # Extract bounding boxes, confidence scores, and class labels
    boxes = results[0].boxes.xyxy.tolist()  # Bounding boxes in xyxy format
    classes = results[0].boxes.cls.tolist()  # Class indices
    confidences = results[0].boxes.conf.tolist()  # Confidence scores
    names = results[0].names  # Class names dictionary

    if not boxes:
        # If no detections, add NEG as the class
        all_data.append({
            'Image_ID': image_file,
            'class': 'NEG',
            'confidence': 1.0,  # You can set this to a default value
            'ymin': 0,  # Default value (no detection)
            'xmin': 0,  # Default value (no detection)
            'ymax': 0,  # Default value (no detection)
            'xmax': 0   # Default value (no detection)
        })
    else:
        # Iterate through the results for this image
        for box, cls, conf in zip(boxes, classes, confidences):
            x1, y1, x2, y2 = box
            detected_class = names[int(cls)]  # Get the class name from the names dictionary

            # Add the result to the all_data list
            all_data.append({
                'Image_ID': image_file,
                'class': detected_class,
                'confidence': conf,
                'ymin': y1,
                'xmin': x1,
                'ymax': y2,
                'xmax': x2
            })

# Convert the list to a DataFrame for all images
detection = pd.DataFrame(all_data)

In [None]:
detection.head()

In [None]:
detection['class'].value_counts()

In [None]:
# D
detection.to_csv('Detection.csv', index = False)