<a href="https://colab.research.google.com/github/TharinsaMudalige/Neuron-Brain_Tumor_Detection_Classification_with_XAI/blob/Detection-Classficiation-CNN/Generating_Annotations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import libraries & Mount Google Drive

In [None]:
import os
import random
import shutil
import xml.etree.ElementTree as ET
from xml.dom.minidom import parseString
import cv2
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


Define Paths

In [None]:
# Paths
DATASET_PATH = "/content/drive/MyDrive/DSGP/Preprocessed_Dataset"
OUTPUT_PATH = "/content/drive/MyDrive/DSGP/CNN_dataset"

# Ensure output directories exist
os.makedirs(OUTPUT_PATH, exist_ok=True)

Split dataset

In [None]:
# Splitting ratios
TRAIN_RATIO = 0.7
VAL_RATIO = 0.15
TEST_RATIO = 0.15

# Create folders for Train, Val, Test splits
for split in ["Train", "Val", "Test"]:
    os.makedirs(os.path.join(OUTPUT_PATH, split, "Images"), exist_ok=True)
    os.makedirs(os.path.join(OUTPUT_PATH, split, "Annotations"), exist_ok=True)

Generate Annotations

In [None]:
def create_pascal_voc_xml(image_path, bbox, label, save_dir):
    """Generate PASCAL VOC XML annotations."""
    image_name = os.path.basename(image_path)
    xml_filename = os.path.splitext(image_name)[0] + ".xml"

    img = cv2.imread(image_path)
    height, width, _ = img.shape

    root = ET.Element("annotation")
    ET.SubElement(root, "folder").text = "Dataset"
    ET.SubElement(root, "filename").text = image_name
    ET.SubElement(root, "path").text = image_path

    size = ET.SubElement(root, "size")
    ET.SubElement(size, "width").text = str(width)
    ET.SubElement(size, "height").text = str(height)
    ET.SubElement(size, "depth").text = str(3)

    obj = ET.SubElement(root, "object")
    ET.SubElement(obj, "name").text = label
    bbox_elem = ET.SubElement(obj, "bndbox")

    ET.SubElement(bbox_elem, "xmin").text = str(bbox[0])
    ET.SubElement(bbox_elem, "ymin").text = str(bbox[1])
    ET.SubElement(bbox_elem, "xmax").text = str(bbox[2])
    ET.SubElement(bbox_elem, "ymax").text = str(bbox[3])

    xml_str = ET.tostring(root)
    xml_pretty = parseString(xml_str).toprettyxml()

    with open(os.path.join(save_dir, xml_filename), "w") as xml_file:
        xml_file.write(xml_pretty)

Main function

In [None]:
def process_and_split_data():
    """Generate annotations & split dataset into Train, Val, Test while keeping proper structure."""
    for tumor_class in os.listdir(DATASET_PATH):
        class_path = os.path.join(DATASET_PATH, tumor_class)

        if not os.path.isdir(class_path):
            continue  # Skip non-folder files

        images = [img for img in os.listdir(class_path) if img.lower().endswith(('.png', '.jpg', '.jpeg'))]
        random.shuffle(images)

        # Compute split sizes
        total_images = len(images)
        train_size = int(total_images * TRAIN_RATIO)
        val_size = int(total_images * VAL_RATIO)
        test_size = total_images - train_size - val_size

        splits = {
            "Train": images[:train_size],
            "Val": images[train_size:train_size + val_size],
            "Test": images[train_size + val_size:]
        }

        for split, split_images in splits.items():
            image_dest = os.path.join(OUTPUT_PATH, split, "Images", tumor_class)
            annotation_dest = os.path.join(OUTPUT_PATH, split, "Annotations", tumor_class)

            os.makedirs(image_dest, exist_ok=True)
            os.makedirs(annotation_dest, exist_ok=True)

            for image_file in split_images:
                image_path = os.path.join(class_path, image_file)

                # Assign labels
                if tumor_class.lower() == "no tumour":
                    bbox = [0, 0, 224, 224]  # Full image bounding box
                    label = "No_Tumor"
                else:
                    bbox = [50, 50, 200, 200]  # Placeholder bounding box
                    label = tumor_class

                # Copy image
                shutil.copy(image_path, os.path.join(image_dest, image_file))

                # Generate and save annotation
                create_pascal_voc_xml(image_path, bbox, label, annotation_dest)

                print(f"{split}: {image_file} -> {label}")

process_and_split_data()
print("Dataset processing & splitting completed successfully")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Train: image_5747.png -> Ependimoma
Train: image_4108.png -> Ependimoma
Train: image_5612.png -> Ependimoma
Train: image_4791.png -> Ependimoma
Train: image_5638.png -> Ependimoma
Train: image_5923.png -> Ependimoma
Train: image_5683.png -> Ependimoma
Train: image_4382.png -> Ependimoma
Train: image_4187.png -> Ependimoma
Train: image_4007.png -> Ependimoma
Train: image_4664.png -> Ependimoma
Train: image_4230.png -> Ependimoma
Train: image_4280.png -> Ependimoma
Train: image_4851.png -> Ependimoma
Train: image_5351.png -> Ependimoma
Train: image_5547.png -> Ependimoma
Train: image_4626.png -> Ependimoma
Train: image_4437.png -> Ependimoma
Train: image_4605.png -> Ependimoma
Train: image_5332.png -> Ependimoma
Train: image_5897.png -> Ependimoma
Train: image_5077.png -> Ependimoma
Train: image_5353.png -> Ependimoma
Train: image_5090.png -> Ependimoma
Train: image_4050.png -> Ependimoma
Train: image_5427.png -> Ependimoma