In [34]:
import numpy as np
import nibabel as nib
import glob
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
from tiffile import imsave

from sklearn.preprocessing import MinMaxScaler

In [35]:
scaler = MinMaxScaler()

In [73]:
TRAIN_DATASET_PATH = ".\BraTS20\BraTS2020_TrainingData\MICCAI_BraTS2020_TrainingData"
OUTPUT_IMAGE_PATH = '.\BraTS20\BraTS2020_TrainingData\input_data_4channels\images\\'
OUTPUT_MASK_PATH = '.\BraTS20\BraTS2020_TrainingData\input_data_4channels\masks\\'

In [82]:
import os
# Ensure output directories exist
os.makedirs(OUTPUT_IMAGE_PATH, exist_ok=True)
os.makedirs(OUTPUT_MASK_PATH, exist_ok=True)

In [74]:
# Define lists of files for each modality and the segmentation mask
t1_list = sorted(glob.glob(TRAIN_DATASET_PATH+'/*/*t1.nii'))
t2_list = sorted(glob.glob(TRAIN_DATASET_PATH+'/*/*t2.nii'))
t1ce_list = sorted(glob.glob(TRAIN_DATASET_PATH+'/*/*t1ce.nii'))
flair_list = sorted(glob.glob(TRAIN_DATASET_PATH+'/*/*flair.nii'))
mask_list = sorted(glob.glob(TRAIN_DATASET_PATH+'/*/*seg.nii'))  # remane the mask in  the training dataset 355 (Seg -> seg)


In [75]:
print(len(t1_list ),len(t1ce_list), len(t2_list ), len(flair_list ), len(mask_list ))

369 369 369 369 369


In [76]:
# Define crop boundaries
CROP_BOUNDARIES = (slice(56, 184), slice(56, 184), slice(13, 141))

**Why Crop?**
- **Focus on Relevant Areas:** By cropping, you ensure that the model processes only the most relevant portions of the image, which can improve training efficiency.
- **Memory and Computation:** 3D medical images are large and require significant computational resources. Cropping reduces the size of the data, making it more manageable for GPU memory and speeding up training.
- **Patch Size Compatibility:** Many deep learning architectures, especially those dealing with 3D data, require input sizes divisible by a specific number (like 64 in this code). Cropping ensures the volume fits this requirement.

**Drawbacks of Cropping:**
- **Risk of Losing Important Information:** If the cropping is too aggressive or poorly designed, important parts of the image (like a tumor) could be excluded, which could negatively impact the model's performance.
- **Assumption of Consistency:** Cropping assumes that the region of interest (ROI) is consistently located within the same region across all images. This might not hold true for all patients or images, potentially leading to information loss.


In [77]:
# Function to load and scale a NIfTI image
def load_and_scale_image(filepath):
    image = nib.load(filepath).get_fdata()
    scaled_image = scaler.fit_transform(image.reshape(-1, image.shape[-1])).reshape(image.shape)
    return scaled_image

In [83]:
# Process and save each image-mask pair in to a numpy array

for img_idx in range(len(t2_list)):
    print(f"Processing image and mask number: {img_idx}")

    # Load and scale images
    image_t1 = load_and_scale_image(t1_list[img_idx])
    image_t2 = load_and_scale_image(t2_list[img_idx])
    image_t1ce = load_and_scale_image(t1ce_list[img_idx])
    image_flair = load_and_scale_image(flair_list[img_idx])

    # Load and process the mask
    mask = nib.load(mask_list[img_idx]).get_fdata().astype(np.uint8)
    mask[mask == 4] = 3  # Reassign mask values 4 to 3

    # Combine the images into a multi-channel array
    combined_image = np.stack([image_t1, image_flair, image_t1ce, image_t2], axis=-1)

    # Crop the combined image and mask
    cropped_image = combined_image[CROP_BOUNDARIES]
    cropped_mask = mask[CROP_BOUNDARIES]

    # Check if the mask contains at least 1% non-zero values
    _, counts = np.unique(cropped_mask, return_counts=True)
    non_zero_ratio = 1 - (counts[0] / counts.sum())
    
    if non_zero_ratio > 0.01:  # At least 1% useful volume
        # One-hot encode the mask
        categorical_mask = to_categorical(cropped_mask, num_classes=4)

        # Save the processed image and mask as NumPy arrays
        np.save(f'{OUTPUT_IMAGE_PATH}image_{img_idx}.npy', cropped_image)
        np.save(f'{OUTPUT_MASK_PATH}mask_{img_idx}.npy', categorical_mask)
        print("Saved image and mask.")
    else:
        print("Skipped due to low non-zero ratio.")

print("Preprocessing complete.")

Processing image and mask number: 0
Saved image and mask.
Processing image and mask number: 1
Saved image and mask.
Processing image and mask number: 2
Saved image and mask.
Processing image and mask number: 3
Saved image and mask.
Processing image and mask number: 4
Skipped due to low non-zero ratio.
Processing image and mask number: 5
Saved image and mask.
Processing image and mask number: 6
Saved image and mask.
Processing image and mask number: 7
Saved image and mask.
Processing image and mask number: 8
Saved image and mask.
Processing image and mask number: 9
Saved image and mask.
Processing image and mask number: 10
Saved image and mask.
Processing image and mask number: 11
Saved image and mask.
Processing image and mask number: 12
Saved image and mask.
Processing image and mask number: 13
Saved image and mask.
Processing image and mask number: 14
Saved image and mask.
Processing image and mask number: 15
Saved image and mask.
Processing image and mask number: 16
Saved image and 

OSError: problem writing element 257024 to file