<a href="https://colab.research.google.com/github/TharinsaMudalige/Neuron-Brain_Tumor_Detection_Classification_with_XAI/blob/Detection-Classficiation-CNN/Preprocessing_for_Faster_R_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries, Mount Google Drive ,Extract Dataset

In [1]:
# Import libraries
import os
import cv2
import numpy as np
import random
from sklearn.model_selection import train_test_split
from google.colab import drive
import zipfile

# Mount Google Drive
drive.mount('/content/drive')

# Paths
drive_path = "/content/drive/My Drive/DSGP/Original Dataset.zip"  # Path to your zip file
original_dataset_dir = "/content/original_dataset"
preprocessed_dataset_dir = "/content/drive/My Drive/DSGP/Preprocessed Dataset"  # Save preprocessed data here

# Unzip dataset
if not os.path.exists(original_dataset_dir):
    with zipfile.ZipFile(drive_path, 'r') as zip_ref:
        zip_ref.extractall(original_dataset_dir)

Mounted at /content/drive


# Define Image Enhancements

CLAHE for Contrast Enhancement

In [2]:
def apply_clahe(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced = clahe.apply(gray)
    return cv2.merge([enhanced, enhanced, enhanced])

Gaussian Blur to Reduce Noise

In [3]:
def apply_gaussian_blur(image):
    return cv2.GaussianBlur(image, (5, 5), 0)

# Skull Stripping

In [4]:
def skull_strip(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, mask = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    mask = cv2.dilate(mask, None, iterations=2)
    return cv2.bitwise_and(image, image, mask=mask)

# Resize, Normalize and Padding

Resize and Normalize

In [5]:
def resize_and_normalize(image):
    resized = cv2.resize(image, (256, 256))
    normalized = resized / 255.0
    return normalized

Pad to Maintatin Aspect Ratio

In [6]:
def pad_to_square(image):
    h, w, _ = image.shape
    size = max(h, w)
    padded = np.zeros((size, size, 3), dtype=np.uint8)
    padded[:h, :w, :] = image
    return cv2.resize(padded, (256, 256))

# Data Augmentation

In [7]:
def augment_image(image):
    augmented = []
    augmented.append(cv2.flip(image, 1))  # Horizontal Flip
    angle = random.randint(-15, 15)  # Random Rotation
    center = (256 // 2, 256 // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    augmented.append(cv2.warpAffine(image, M, (256, 256)))
    return augmented

# Handle Class Imbalance

In [8]:
def handle_class_imbalance(images, target_count=100):
    if len(images) < target_count:
        diff = target_count - len(images)
        images += random.choices(images, k=diff)
    return images

# Split Dataset

In [9]:
def split_dataset(images):
    return train_test_split(images, test_size=0.2, random_state=42)

# Preprocess and Save Dataset

In [10]:
def preprocess_and_save_dataset():
    """
    Processes the dataset by applying preprocessing techniques, data augmentation,
    class balancing, and splitting into train/test sets while preserving the folder structure.
    """
    for root, _, files in os.walk(original_dataset_dir):
        # Collect image paths
        images = [os.path.join(root, f) for f in files if f.endswith(('.png', '.jpg', '.jpeg'))]

        if len(images) == 0:
            print(f"No images found in folder: {root}")
            continue

        print(f"Processing folder: {root}")
        print(f"Number of images: {len(images)}")

        # Handle class imbalance
        images = handle_class_imbalance(images, target_count=100)

        # Split into train/test
        train_images, test_images = split_dataset(images)

        for split, split_images in [("train", train_images), ("test", test_images)]:
            for img_path in split_images:
                # Determine relative folder structure
                relative_path = os.path.relpath(img_path, original_dataset_dir)
                subfolder = os.path.dirname(relative_path)

                # Create output directory
                output_folder = os.path.join(preprocessed_dataset_dir, split, subfolder)
                os.makedirs(output_folder, exist_ok=True)

                # Load and preprocess the image
                img = cv2.imread(img_path)
                img = skull_strip(img)
                img = apply_gaussian_blur(img)
                img = apply_clahe(img)
                img = pad_to_square(img)
                img = resize_and_normalize(img)

                # Save preprocessed image
                output_path = os.path.join(output_folder, os.path.basename(img_path))
                cv2.imwrite(output_path, (img * 255).astype(np.uint8))

                # Augment training images
                if split == "train":
                    augmented_images = augment_image(img)
                    for i, aug_img in enumerate(augmented_images):
                        aug_output_path = os.path.join(output_folder, f"{os.path.splitext(os.path.basename(img_path))[0]}_aug{i}.jpg")
                        cv2.imwrite(aug_output_path, (aug_img * 255).astype(np.uint8))

# Run Preprocessing

In [11]:
# Run preprocessing
preprocess_and_save_dataset()

# Confirm completion
print("Preprocessing complete! Preprocessed data is saved in:", preprocessed_dataset_dir)

No images found in folder: /content/original_dataset
No images found in folder: /content/original_dataset/Original Dataset
No images found in folder: /content/original_dataset/Original Dataset/Tumor
Processing folder: /content/original_dataset/Original Dataset/Tumor/meningioma
Number of images: 1645
No images found in folder: /content/original_dataset/Original Dataset/Tumor/glioma
Processing folder: /content/original_dataset/Original Dataset/Tumor/glioma/Meningioma T2
Number of images: 233
Processing folder: /content/original_dataset/Original Dataset/Tumor/glioma/Ganglioglioma T1
Number of images: 20
Processing folder: /content/original_dataset/Original Dataset/Tumor/glioma/Carcinoma T1
Number of images: 65
Processing folder: /content/original_dataset/Original Dataset/Tumor/glioma/Glioblastoma T2
Number of images: 55
Processing folder: /content/original_dataset/Original Dataset/Tumor/glioma/Meningioma T1C+
Number of images: 369
Processing folder: /content/original_dataset/Original Data