<a href="https://colab.research.google.com/github/TharinsaMudalige/Neuron-Brain_Tumor_Detection_Classification_with_XAI/blob/Detection-Classficiation-CNN/Preprocessing_for_Faster_R_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries, Mount Google Drive and Extract Dataset

In [14]:
# Import libraries
import os
import cv2
import numpy as np
import random
from sklearn.model_selection import train_test_split
from google.colab import drive
import zipfile
from lxml import etree

# Mount Google Drive
drive.mount('/content/drive')

# Paths
drive_path = "/content/drive/My Drive/DSGP/Original Dataset.zip"  # Path to your zip file
original_dataset_dir = "/content/original_dataset"
preprocessed_images_dir = "/content/drive/My Drive/DSGP/Preprocessed Dataset/Preprocessed Images"
annotations_dir = "/content/drive/My Drive/DSGP/Preprocessed Dataset/Annotations"

# Unzip dataset
if not os.path.exists(original_dataset_dir):
    with zipfile.ZipFile(drive_path, 'r') as zip_ref:
        zip_ref.extractall(original_dataset_dir)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Define Image Enhancements

CLAHE for Contrast Enhancement

In [15]:
def apply_clahe(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced = clahe.apply(gray)
    return cv2.merge([enhanced, enhanced, enhanced])

Gaussian Blur to Reduce Noise

In [16]:
def apply_gaussian_blur(image):
    return cv2.GaussianBlur(image, (5, 5), 0)

# Skull Stripping

In [17]:
def skull_strip(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, mask = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    mask = cv2.dilate(mask, None, iterations=2)
    return cv2.bitwise_and(image, image, mask=mask)

# Resize, Normalize and Padding

Resize and Normalize

In [18]:
def resize_and_normalize(image):
    resized = cv2.resize(image, (256, 256))
    normalized = resized / 255.0
    return normalized

Pad to Maintatin Aspect Ratio

In [19]:
def pad_to_square(image):
    h, w, _ = image.shape
    size = max(h, w)
    padded = np.zeros((size, size, 3), dtype=np.uint8)
    padded[:h, :w, :] = image
    return cv2.resize(padded, (256, 256))

# Generate Bounding Boxes

In [20]:
def generate_bounding_box(image):
    # Placeholder: Use the center of the image for bounding box
    h, w, _ = image.shape
    x_min, y_min = int(w * 0.3), int(h * 0.3)
    x_max, y_max = int(w * 0.7), int(h * 0.7)
    return [(x_min, y_min, x_max, y_max)]

# Saving Bounding Boxes as Annotations

In [21]:
def save_annotations(image_path, bboxes, label, output_folder):
    xml_root = etree.Element("annotation")
    etree.SubElement(xml_root, "filename").text = os.path.basename(image_path)

    for bbox in bboxes:
        obj = etree.SubElement(xml_root, "object")
        etree.SubElement(obj, "name").text = label
        bbox_elem = etree.SubElement(obj, "bndbox")
        etree.SubElement(bbox_elem, "xmin").text = str(bbox[0])
        etree.SubElement(bbox_elem, "ymin").text = str(bbox[1])
        etree.SubElement(bbox_elem, "xmax").text = str(bbox[2])
        etree.SubElement(bbox_elem, "ymax").text = str(bbox[3])

    output_path = os.path.join(output_folder, f"{os.path.splitext(os.path.basename(image_path))[0]}.xml")
    with open(output_path, "wb") as f:
        f.write(etree.tostring(xml_root, pretty_print=True))

# Data Augmentation

In [22]:
def augment_image(image):
    augmented = []
    augmented.append(cv2.flip(image, 1))  # Horizontal Flip
    angle = random.randint(-15, 15)  # Random Rotation
    center = (256 // 2, 256 // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    augmented.append(cv2.warpAffine(image, M, (256, 256)))
    return augmented

# Handle Class Imbalance

In [23]:
def handle_class_imbalance(images, target_count=100):
    if len(images) < target_count:
        diff = target_count - len(images)
        images += random.choices(images, k=diff)
    return images

# Split Dataset

In [24]:
def split_dataset(images):
    return train_test_split(images, test_size=0.2, random_state=42)

# Preprocess and Save Dataset

In [25]:
def preprocess_and_save_dataset():
    for root, _, files in os.walk(original_dataset_dir):
        images = [os.path.join(root, f) for f in files if f.endswith(('.png', '.jpg', '.jpeg'))]

        if len(images) == 0:
            print(f"No images found in folder: {root}")
            continue

        # Handle class imbalance
        images = handle_class_imbalance(images, target_count=100)

        # Split into train/test
        train_images, test_images = split_dataset(images)

        for split, split_images in [("train", train_images), ("test", test_images)]:
            for img_path in split_images:
                # Determine relative folder structure
                relative_path = os.path.relpath(img_path, original_dataset_dir)
                subfolder = os.path.dirname(relative_path)

                # Create output directories
                image_output_folder = os.path.join(preprocessed_images_dir, split, subfolder)
                annotation_output_folder = os.path.join(annotations_dir, split, subfolder)
                os.makedirs(image_output_folder, exist_ok=True)
                os.makedirs(annotation_output_folder, exist_ok=True)

                # Load and preprocess image
                img = cv2.imread(img_path)
                img = skull_strip(img)
                img = apply_gaussian_blur(img)
                img = apply_clahe(img)
                img = pad_to_square(img)
                img = resize_and_normalize(img)

                # Save preprocessed image
                output_image_path = os.path.join(image_output_folder, os.path.basename(img_path))
                cv2.imwrite(output_image_path, (img * 255).astype(np.uint8))

                # Generate and save bounding box annotations
                bboxes = generate_bounding_box(img)
                save_annotations(output_image_path, bboxes, "tumor", annotation_output_folder)

# Run Preprocessing

In [26]:
# Run preprocessing
preprocess_and_save_dataset()

# Confirm completion
print("Preprocessing complete! Images and annotations saved.")

No images found in folder: /content/original_dataset
No images found in folder: /content/original_dataset/Original Dataset
No images found in folder: /content/original_dataset/Original Dataset/Tumor
No images found in folder: /content/original_dataset/Original Dataset/Tumor/glioma
Preprocessing complete! Images and annotations saved.
