<a href="https://colab.research.google.com/github/TharinsaMudalige/Neuron-Brain_Tumor_Detection_Classification_with_XAI/blob/Detection-Classficiation-CNN/Generating_Annotations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Required Libraries

In [15]:
# Install TensorFlow and TensorFlow Hub
!pip install tensorflow tensorflow-hub tensorflow-addons opencv-python

# Import libraries
import os
import random
import shutil
import cv2
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.image import resize
from google.colab import drive
from PIL import Image



Mount Google Drive and Define paths

In [16]:
# Mount Google Drive
drive.mount('/content/drive')

# Define paths
RAW_DATASET_PATH = "/content/drive/MyDrive/DSGP/DSGP_dataset"  # Your raw MRI dataset
OUTPUT_PATH = "/content/drive/MyDrive/DSGP/CNN_Dataset"

# Define class names (tumor types)
tumor_classes = [folder for folder in os.listdir(RAW_DATASET_PATH) if os.path.isdir(os.path.join(RAW_DATASET_PATH, folder))]
print("Detected classes:", tumor_classes)

# Create output directories for Train, Val, Test
for split in ["Train", "Val", "Test"]:
    for subdir in ["Images", "Annotations"]:
        for tumor_class in tumor_classes:
            os.makedirs(os.path.join(OUTPUT_PATH, split, subdir, tumor_class), exist_ok=True)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Detected classes: ['tuberculoma', 'granuloma', 'no_tumour', 'papiloma', 'schwannoma', 'meduloblastoma', 'pituitary', 'neurocitoma', 'oligodendroglioma', 'meningioma', 'germinoma', 'astrocitoma', 'glioblastoma', 'ependimoma', 'ganglioglioma', 'carcinoma']


Load U-Net Model

In [17]:
def unet_model(input_size=(256, 256, 3)):
    inputs = keras.Input(input_size)

    # Encoder (downsampling)
    conv1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv2)

    # Bottleneck
    conv3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv3)

    # Decoder (upsampling)
    up4 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv3)
    merge4 = layers.Concatenate()([conv2, up4])
    conv4 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(merge4)
    conv4 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv4)

    up5 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv4)
    merge5 = layers.Concatenate()([conv1, up5])
    conv5 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(merge5)
    conv5 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv5)

    # Output layer
    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(conv5)

    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

# Initialize and compile the U-Net model
unet_model = unet_model()
unet_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print("Custom U-Net model built and compiled!")

Custom U-Net model built and compiled!


Resize and Preprocess Images

In [18]:
# Define target size for resizing images (adjust based on your model's input size)
TARGET_SIZE = (256, 256)

def preprocess_image(image_path):
    """Load and resize the MRI image."""
    image = load_img(image_path)
    image = image.resize(TARGET_SIZE)
    image = img_to_array(image) / 255.0
    return image

Generate Segmentation Masks

In [19]:
def get_segmentation_mask(image):
    """Generate segmentation mask using U-Net."""
    img_resized = tf.image.resize(image, TARGET_SIZE)
    img_resized = tf.expand_dims(img_resized, 0)  # Add batch dimension
    mask = unet_model(img_resized)
    mask = tf.squeeze(mask, axis=0)  # Remove batch dimension
    mask = np.array(mask > 0.5, dtype=np.uint8)  # Apply threshold
    return mask

Convert Masks to Bounding Boxes

In [20]:
def mask_to_bbox(mask):
    """Convert a binary mask (2D) to bounding box coordinates (xmin, ymin, xmax, ymax)."""
    if len(mask.shape) == 3:  # If mask has channels, convert to grayscale
        mask = np.max(mask, axis=-1)  # Collapse channels to a single mask

    y_indices, x_indices = np.where(mask > 0)

    if y_indices.size == 0 or x_indices.size == 0:  # No tumor detected
        return None

    xmin, xmax = np.min(x_indices), np.max(x_indices)
    ymin, ymax = np.min(y_indices), np.max(y_indices)

    return [int(xmin), int(ymin), int(xmax), int(ymax)]

Generate Annotations in PASCAL VOC Format

In [21]:
import xml.etree.ElementTree as ET
from xml.dom.minidom import parseString

def create_pascal_voc_xml(image_path, bbox, label, save_dir):
    """Generate PASCAL VOC XML annotations."""
    image_name = os.path.basename(image_path)
    xml_filename = os.path.splitext(image_name)[0] + ".xml"
    img = cv2.imread(image_path)
    height, width, _ = img.shape

    # Root element
    root = ET.Element("annotation")
    ET.SubElement(root, "folder").text = "Dataset"
    ET.SubElement(root, "filename").text = image_name
    ET.SubElement(root, "path").text = image_path

    size = ET.SubElement(root, "size")
    ET.SubElement(size, "width").text = str(width)
    ET.SubElement(size, "height").text = str(height)
    ET.SubElement(size, "depth").text = str(3)

    obj = ET.SubElement(root, "object")
    ET.SubElement(obj, "name").text = label
    bbox_elem = ET.SubElement(obj, "bndbox")
    ET.SubElement(bbox_elem, "xmin").text = str(bbox[0])
    ET.SubElement(bbox_elem, "ymin").text = str(bbox[1])
    ET.SubElement(bbox_elem, "xmax").text = str(bbox[2])
    ET.SubElement(bbox_elem, "ymax").text = str(bbox[3])

    xml_str = ET.tostring(root)
    xml_pretty = parseString(xml_str).toprettyxml()

    with open(os.path.join(save_dir, xml_filename), "w") as xml_file:
        xml_file.write(xml_pretty)

Split Dataset & Handle Class Imbalance

In [22]:
def split_and_balance_dataset(images):
    """Split dataset into Train, Val, Test with class balancing."""
    train, temp = train_test_split(images, test_size=0.3, random_state=42)
    val, test = train_test_split(temp, test_size=0.5, random_state=42)
    return train, val, test

Preprocess the Dataset

In [23]:
def process_dataset():
    """Preprocess images, generate masks, and save with bounding boxes."""
    for tumor_class in tumor_classes:
        class_path = os.path.join(RAW_DATASET_PATH, tumor_class)
        images = [os.path.join(class_path, img) for img in os.listdir(class_path) if img.lower().endswith(('.png', '.jpg', '.jpeg'))]

        # Split into Train, Val, Test
        train, val, test = split_and_balance_dataset(images)

        for split, split_data in zip(["Train", "Val", "Test"], [train, val, test]):
            for image_path in split_data:
                image = preprocess_image(image_path)
                mask = get_segmentation_mask(image)
                bbox = mask_to_bbox(mask)

                if bbox:
                    img_dest = os.path.join(OUTPUT_PATH, split, "Images", tumor_class)
                    ann_dest = os.path.join(OUTPUT_PATH, split, "Annotations", tumor_class)

                    shutil.copy(image_path, img_dest)
                    create_pascal_voc_xml(image_path, bbox, tumor_class, ann_dest)

                    print(f"{split}: {os.path.basename(image_path)} -> {tumor_class}")

process_dataset()
print("Dataset preprocessing complete!")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Train: 1728.jpg -> pituitary
Train: 0444.jpg -> pituitary
Train: 0796.jpg -> pituitary
Train: 0207.jpg -> pituitary
Train: 1722.jpg -> pituitary
Train: 0475.jpg -> pituitary
Train: 1351.jpg -> pituitary
Train: 0406.jpg -> pituitary
Train: 0903.jpg -> pituitary
Train: 1378.jpg -> pituitary
Train: 0394.jpg -> pituitary
Train: 1510.jpg -> pituitary
Train: 1050.jpg -> pituitary
Train: 0323.jpg -> pituitary
Train: 0670.jpg -> pituitary
Train: 1058.jpg -> pituitary
Train: 1399.jpg -> pituitary
Train: 0044.jpg -> pituitary
Train: 1043.jpg -> pituitary
Train: 1708.jpg -> pituitary
Train: 0665.jpg -> pituitary
Train: 0795.jpg -> pituitary
Train: 1349.jpg -> pituitary
Train: 0157.jpg -> pituitary
Train: 1021.jpg -> pituitary
Train: 0392.jpg -> pituitary
Train: 0658.jpg -> pituitary
Train: 1267.jpg -> pituitary
Train: 1744.jpg -> pituitary
Train: 0051.jpg -> pituitary
Train: 1079.jpg -> pituitary
Train: 0552.jpg -> pituitary
Train: 

verify Dataset Structure

In [24]:
import pprint

def check_folder_structure(base_path):
    """Print the folder structure after preprocessing."""
    for root, dirs, files in os.walk(base_path):
        level = root.replace(base_path, "").count(os.sep)
        indent = " " * 2 * level
        print(f"{indent}{os.path.basename(root)}/")
        sub_indent = " " * 2 * (level + 1)
        for file in files:
            print(f"{sub_indent}{file}")

check_folder_structure(OUTPUT_PATH)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
        1703.jpg
        1899.jpg
        0081.jpg
        0836.jpg
        1855.jpg
        0666.jpg
        1432.jpg
        1134.jpg
        0249.jpg
        1032.jpg
        0079.jpg
        0498.jpg
        1318.jpg
        1017.jpg
        0518.jpg
        1626.jpg
        0621.jpg
        1803.jpg
        1403.jpg
        1196.jpg
        1267.jpg
        0405.jpg
        1119.jpg
        0716.jpg
        1468.jpg
        0456.jpg
        1532.jpg
        0069.jpg
        0014.jpg
        1578.jpg
        0037.jpg
        0685.jpg
        1080.jpg
        0751.jpg
        1376.jpg
        0773.jpg
        1770.jpg
        1905.jpg
        1845.jpg
        0947.jpg
        1004.jpg
        1264.jpg
        1362.jpg
        1904.jpg
        0009.jpg
        1954.jpg
        1801.jpg
        1333.jpg
        1554.jpg
        0235.jpg
        1779.jpg
        0916.jpg
        0483.jpg
        0045.jpg
        1507.jpg
