<a href="https://colab.research.google.com/github/TharinsaMudalige/Neuron-Brain_Tumor_Detection_Classification_with_XAI/blob/Image-Preprocesing/Grp_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install TensorFlow 2.18.0 and segmentation-models 1.0.1
!pip install --upgrade tensorflow==2.18.0 segmentation-models==1.0.1

# Uninstall and reinstall protobuf to force version 3.20.3
!pip uninstall -y protobuf
!pip install protobuf==3.20.3

# Install kagglehub (if not already installed)
!pip install kagglehub

# Set environment variables BEFORE importing other packages
import os
os.environ["SM_FRAMEWORK"] = "tf.keras"
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

# Import necessary libraries and mount Google Drive (if needed)
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
import tensorflow.keras as keras
print("tf.keras is available.")

import segmentation_models as sm
import glob
import cv2
import nibabel as nib
import kagglehub
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from google.colab import drive
drive.mount('/content/drive')

Download Dataset From Kaggle

In [None]:
# Download the BraTS2020 dataset (training+validation) from Kaggle
DATASET_NAME = "awsaf49/brats20-dataset-training-validation"
dataset_path = kagglehub.dataset_download(DATASET_NAME)
print("Dataset downloaded to:", dataset_path)

# Inspect the downloaded folder structure
!ls -lh "{dataset_path}"

# Copy the extracted TrainingData folder (which contains the patient subdirectories)
!mkdir -p /content/brats_data
!cp -r "{dataset_path}/BraTS2020_TrainingData/MICCAI_BraTS2020_TrainingData" /content/brats_data/

# Set the training data directory to the copied folder
BRATS_DATA_DIR = "/content/brats_data/MICCAI_BraTS2020_TrainingData"
!ls -lh "{BRATS_DATA_DIR}"

Preprocessing Functions

In [None]:
def extract_slices(nifti_path):
    """Load a 3D .nii volume and return a list of normalized 2D slices."""
    nii_img = nib.load(nifti_path)
    data = nii_img.get_fdata()  # Shape: (H, W, Depth)
    slices = []
    for i in range(data.shape[2]):  # Iterate over axial slices
        slice_data = data[:, :, i]
        # Normalize to [0, 1]
        slice_norm = (slice_data - np.min(slice_data)) / (np.max(slice_data) - np.min(slice_data) + 1e-8)
        # Resize to 256x256
        slice_resized = cv2.resize(slice_norm, (256, 256))
        slices.append(slice_resized)
    return slices

def extract_mask_slices(nifti_path):
    """Load a 3D mask (.nii) and return 2D binary slices."""
    nii_mask = nib.load(nifti_path)
    data = nii_mask.get_fdata()
    slices = []
    for i in range(data.shape[2]):
        slice_data = data[:, :, i]
        # Convert multi-class labels to binary: tumor vs. background
        binary_slice = (slice_data > 0).astype(np.float32)
        slice_resized = cv2.resize(binary_slice, (256, 256))
        slices.append(slice_resized)
    return slices

def process_brats_data(brats_dir, save_dir="brats_slices"):
    """
    Processes BraTS data from a directory containing patient subdirectories.
    Each patient directory should contain files like *_flair.nii and *_seg.nii.
    save_dir: Folder where 2D slices will be stored (subfolders "images" and "masks" will be created).
    """
    images_save_dir = os.path.join(save_dir, "images")
    masks_save_dir  = os.path.join(save_dir, "masks")
    os.makedirs(images_save_dir, exist_ok=True)
    os.makedirs(masks_save_dir, exist_ok=True)

    # Get all patient subdirectories in brats_dir (e.g., BraTS20_Training_314, etc.)
    patient_dirs = glob.glob(os.path.join(brats_dir, "BraTS20_Training_*"))
    slice_count = 0

    for patient_dir in patient_dirs:
        print("Processing patient:", patient_dir)
        # Look for flair and seg files within each patient directory
        flair_files = glob.glob(os.path.join(patient_dir, "*_flair.nii"))
        seg_files   = glob.glob(os.path.join(patient_dir, "*_seg.nii"))
        if len(flair_files) == 0 or len(seg_files) == 0:
            print("No flair or seg file found in", patient_dir)
            continue

        flair_file = flair_files[0]
        seg_file = seg_files[0]

        flair_slices = extract_slices(flair_file)
        seg_slices = extract_mask_slices(seg_file)

        n_slices = min(len(flair_slices), len(seg_slices))
        print("Found", n_slices, "slices in", flair_file)

        for i in range(n_slices):
            # Convert flair slice to RGB by stacking the grayscale image into 3 channels
            img_gray = (flair_slices[i] * 255).astype(np.uint8)
            img_rgb = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2RGB)
            msk = (seg_slices[i] * 255).astype(np.uint8)

            base_name = os.path.basename(flair_file).replace("_flair.nii", "")
            img_filename = os.path.join(images_save_dir, f"{base_name}_slice_{i}.png")
            msk_filename = os.path.join(masks_save_dir, f"{base_name}_slice_{i}.png")

            cv2.imwrite(img_filename, img_rgb)
            cv2.imwrite(msk_filename, msk)
            slice_count += 1

    print(f"Total 2D slices saved: {slice_count}")

# Run the processing function
process_brats_data(BRATS_DATA_DIR, save_dir="brats_slices")

# Verify that PNG files were created
!ls -lh brats_slices/images
!ls -lh brats_slices/masks