<a href="https://colab.research.google.com/github/TharinsaMudalige/Neuron-Brain_Tumor_Detection_Classification_with_XAI/blob/Detection-Classficiation-CNN/Generating_Annotations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Required Libraries

In [1]:
# Install necessary libraries
!pip install tensorflow tensorflow-hub tensorflow-addons opencv-python

# Import libraries
import os
import random
import shutil
import uuid
import cv2
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from google.colab import drive
from PIL import Image
import xml.etree.ElementTree as ET
from xml.dom.minidom import parseString

Collecting tensorflow-addons
  Downloading tensorflow_addons-0.23.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting typeguard<3.0.0,>=2.7 (from tensorflow-addons)
  Downloading typeguard-2.13.3-py3-none-any.whl.metadata (3.6 kB)
Downloading tensorflow_addons-0.23.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (611 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.8/611.8 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow-addons
  Attempting uninstall: typeguard
    Found existing installation: typeguard 4.4.2
    Uninstalling typeguard-4.4.2:
      Successfully uninstalled typeguard-4.4.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
inflect 7.5.0 requires typeguard>=4.0.1, b

Mount Google Drive and Define paths

In [2]:
# Mount Google Drive
drive.mount('/content/drive')

# Define dataset paths
RAW_DATASET_PATH = "/content/drive/MyDrive/DSGP/DSGP_dataset"
OUTPUT_PATH = "/content/drive/MyDrive/DSGP/CNN_Dataset"

# Detect tumor classes
tumor_classes = [folder for folder in os.listdir(RAW_DATASET_PATH) if os.path.isdir(os.path.join(RAW_DATASET_PATH, folder))]
print("Detected tumor classes:", tumor_classes)

# Create output folders
for split in ["Train", "Val", "Test"]:
    for subdir in ["Images", "Annotations"]:
        for tumor_class in tumor_classes:
            os.makedirs(os.path.join(OUTPUT_PATH, split, subdir, tumor_class), exist_ok=True)

Mounted at /content/drive
Detected tumor classes: ['tuberculoma', 'granuloma', 'no_tumour', 'papiloma', 'schwannoma', 'meduloblastoma', 'pituitary', 'neurocitoma', 'oligodendroglioma', 'meningioma', 'germinoma', 'astrocitoma', 'glioblastoma', 'ependimoma', 'ganglioglioma', 'carcinoma']


Build & Compile U-Net Model for Segmentation

In [3]:
def build_unet(input_size=(256, 256, 3)):
    """Build a U-Net model for RGB image segmentation."""
    inputs = keras.Input(input_size)

    # Encoder
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c3)
    p3 = layers.MaxPooling2D((2, 2))(c3)

    # Bottleneck
    c4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(p3)
    c4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(c4)

    # Decoder
    u5 = layers.Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(c4)
    u5 = layers.concatenate([u5, c3])
    c5 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(u5)

    u6 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = layers.concatenate([u6, c2])
    c6 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u6)

    u7 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = layers.concatenate([u7, c1])
    c7 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u7)

    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c7)
    return keras.Model(inputs=inputs, outputs=outputs)

# Compile U-Net
unet_model = build_unet()
unet_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print("U-Net model built and compiled.")

U-Net model built and compiled.


Image Preprocessing & Segmentation Functions

In [4]:
TARGET_SIZE = (256, 256)

def preprocess_image(image_path):
    """Load and resize RGB MRI image."""
    image = load_img(image_path, color_mode="rgb")
    image = image.resize(TARGET_SIZE)
    return img_to_array(image) / 255.0

def get_segmentation_mask(image):
    """Generate tumor segmentation mask using U-Net."""
    img_resized = tf.image.resize(image, TARGET_SIZE)
    img_resized = tf.expand_dims(img_resized, 0)
    mask = unet_model.predict(img_resized)[0]
    return (mask > 0.5).astype(np.uint8)

def mask_to_bbox(mask, is_no_tumor=False):
    """Generate bounding box from mask."""
    if is_no_tumor:
        return [0, 0, TARGET_SIZE[0], TARGET_SIZE[1]]
    y_indices, x_indices = np.where(mask > 0)
    if y_indices.size == 0:
        return [0, 0, TARGET_SIZE[0], TARGET_SIZE[1]]
    return [np.min(x_indices), np.min(y_indices), np.max(x_indices), np.max(y_indices)]

Data Augmentation & Balancing

In [5]:
augmentor = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

def augment_image(image, count, save_dir, tumor_class):
    """Augment image and save with unique names."""
    image = np.expand_dims(image, axis=0)
    for i in range(count):
        aug_img = next(augmentor.flow(image, batch_size=1))[0]
        unique_id = str(uuid.uuid4())
        save_path = os.path.join(save_dir, f"{tumor_class}_aug_{unique_id}.png")
        Image.fromarray((aug_img * 255).astype(np.uint8)).save(save_path)

Data Splitting

In [6]:
def split_and_balance(images, max_count, save_dir, tumor_class):
    """Split and balance images across Train, Val, Test."""
    train, temp = train_test_split(images, test_size=0.3, random_state=42)
    val, test = train_test_split(temp, test_size=0.5, random_state=42)

    # Balance training set
    current_count = len(train)
    if current_count < max_count:
        augment_image(train[0], max_count - current_count, save_dir, tumor_class)

    return train, val, test

Generating Annotations

In [7]:
def create_pascal_voc_xml(image_path, bbox, label, save_dir):
    """Generate PASCAL VOC XML annotations."""
    image_name = os.path.basename(image_path)
    xml_filename = os.path.splitext(image_name)[0] + ".xml"

    root = ET.Element("annotation")
    ET.SubElement(root, "filename").text = image_name
    size = ET.SubElement(root, "size")
    ET.SubElement(size, "width").text = str(TARGET_SIZE[0])
    ET.SubElement(size, "height").text = str(TARGET_SIZE[1])

    obj = ET.SubElement(root, "object")
    ET.SubElement(obj, "name").text = label
    bbox_elem = ET.SubElement(obj, "bndbox")
    for tag, value in zip(["xmin", "ymin", "xmax", "ymax"], bbox):
        ET.SubElement(bbox_elem, tag).text = str(value)

    with open(os.path.join(save_dir, xml_filename), "w") as f:
        f.write(parseString(ET.tostring(root)).toprettyxml())

Dataset Preprocessing

In [8]:
def process_dataset():
    """Preprocess images, generate masks, and save with annotations."""
    max_count = max(len(os.listdir(os.path.join(RAW_DATASET_PATH, cls))) for cls in tumor_classes)

    for tumor_class in tumor_classes:
        class_path = os.path.join(RAW_DATASET_PATH, tumor_class)
        images = [os.path.join(class_path, img) for img in os.listdir(class_path)]
        train, val, test = split_and_balance(images, max_count, class_path, tumor_class)

        for split, dataset in zip(["Train", "Val", "Test"], [train, val, test]):
            img_dest = os.path.join(OUTPUT_PATH, split, "Images", tumor_class)
            ann_dest = os.path.join(OUTPUT_PATH, split, "Annotations", tumor_class)
            os.makedirs(img_dest, exist_ok=True)
            os.makedirs(ann_dest, exist_ok=True)

            for img_path in dataset:
                img = preprocess_image(img_path)
                mask = get_segmentation_mask(img) if tumor_class != "no_tumour" else np.zeros(TARGET_SIZE[:2])
                bbox = mask_to_bbox(mask)
                unique_id = str(uuid.uuid4())
                save_name = f"{os.path.splitext(os.path.basename(img_path))[0]}_{unique_id}.png"
                save_path = os.path.join(img_dest, save_name)
                shutil.copy(img_path, save_path)
                create_pascal_voc_xml(save_path, bbox, tumor_class, ann_dest)

                print(f"{split}: {tumor_class} - {save_name}")

Class Distribution Visualization

In [9]:
def count_images_per_class(base_path):
    """Count images per tumor class."""
    class_counts = {}
    for tumor_class in tumor_classes:
        class_path = os.path.join(base_path, tumor_class)
        num_images = len([img for img in os.listdir(class_path) if img.lower().endswith(('.png', '.jpg', '.jpeg'))])
        class_counts[tumor_class] = num_images
    return class_counts


def plot_class_distributions(before_counts, after_counts):
    """Plot side-by-side bar charts of class distributions before and after balancing."""
    fig, axes = plt.subplots(1, 2, figsize=(18, 6))

    # Before Balancing
    axes[0].bar(before_counts.keys(), before_counts.values(), color='skyblue')
    axes[0].set_title("Class Distribution Before Balancing")
    axes[0].set_ylabel("Number of Images")
    axes[0].set_xlabel("Tumor Classes")
    axes[0].tick_params(axis='x', rotation=45)

    # After Balancing
    axes[1].bar(after_counts.keys(), after_counts.values(), color='salmon')
    axes[1].set_title("Class Distribution After Balancing")
    axes[1].set_ylabel("Number of Images")
    axes[1].set_xlabel("Tumor Classes")
    axes[1].tick_params(axis='x', rotation=45)

    plt.tight_layout()
    plt.show()

Main Function

In [10]:
def main_pipeline():
    print("Starting preprocessing pipeline...")

    # Count classes before balancing
    before_balancing_counts = count_images_per_class(RAW_DATASET_PATH)
    print("Class counts before balancing:", before_balancing_counts)

    # Process dataset with U-Net & Balancing
    process_dataset()

    # Count classes after balancing
    after_balancing_counts = count_images_per_class(os.path.join(OUTPUT_PATH, "Train/Images"))
    print("Class counts after balancing:", after_balancing_counts)

    # Plot class distribution comparison
    plot_class_distributions(before_balancing_counts, after_balancing_counts)

    print("Dataset preprocessing complete and saved at:", OUTPUT_PATH)


# Execute the final pipeline
main_pipeline()

Starting preprocessing pipeline...
Class counts before balancing: {'tuberculoma': 138, 'granuloma': 78, 'no_tumour': 2000, 'papiloma': 227, 'schwannoma': 453, 'meduloblastoma': 126, 'pituitary': 1757, 'neurocitoma': 457, 'oligodendroglioma': 220, 'meningioma': 1645, 'germinoma': 97, 'astrocitoma': 574, 'glioblastoma': 197, 'ependimoma': 150, 'ganglioglioma': 59, 'carcinoma': 186}


ValueError: could not convert string to float: '/content/drive/MyDrive/DSGP/DSGP_dataset/tuberculoma/7b1e6ea5eb22b9e55a58357dd2ee04_big_gallery.jpeg'

Visualize Tumor with Bounding Box

In [None]:
def visualize_tumor_with_bbox(image_path):
    """Visualize segmented tumor with bounding box."""
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
    mask = get_segmentation_mask(image)
    bbox = mask_to_bbox(mask)
    cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2)
    plt.imshow(image)
    plt.title("Tumor Segmentation with Bounding Box")
    plt.axis('off')
    plt.show()

visualize_tumor_with_bbox()