Dependencies

In [1]:
# Dependencies 
import albumentations as A
import SimpleITK as sitk
import numpy as np
import pathlib 
import os 
import sys
from natsort import natsorted
#import matplotlib.pyplot as plt
import shutil
from tqdm import tqdm

  check_for_updates()


Paths

In [6]:
path_prefix = "/home/kanthoulis/spider/dataset/"

train_img_slice_dir = pathlib.Path(path_prefix + "train_image_slices")
train_label_slice_dir = pathlib.Path(path_prefix + "train_label_slices")

# Test directory to write in 
#train_img_augmented_slice_dir = pathlib.Path(r"D:/Spider Data/train_image_augmented_slices")
#train_label_augmented_slice_dir = pathlib.Path(r"D:/Spider Data/train_label_augmented_slices")

train_img_augmented_slice_dir = pathlib.Path(path_prefix + "train_augmented_image_slices")
train_label_augmented_slice_dir = pathlib.Path(path_prefix + "train_augmented_label_slices")

# Get lists of the files in the directories 
image_train_dir_list = os.listdir(train_img_slice_dir) 
label_train_dir_list = os.listdir(train_label_slice_dir)

# Sort lists just to be safe
image_train_dir_list = natsorted(image_train_dir_list)
label_train_dir_list = natsorted(label_train_dir_list)

train_len = len(image_train_dir_list)
test_len = len(label_train_dir_list)

'''
if (train_len != test_len):
    print("Error: Directories aren't of equal size")
    sys.exit()
'''
    
dirlen = train_len

Augmentation Parameters

In [7]:
# Set number of augmented images to generate per image 
# also applies for flipped so in total you get 2 x augmented_no instances per image 
augmented_no = 4

# Define the augmentation pipeline for the image
noise_transform = A.Compose([
    A.GaussNoise(var_limit=(10, 30), mean=0, p=0.8)  # Gaussian Noise  
])

# Define the elastic transform pipeline
elastic_transform = A.ElasticTransform(alpha=50, sigma=10, p=0.8)
"""

# Define the augmentation pipeline for the image
noise_transform = A.Compose([
    A.GaussNoise(var_limit=(0.002, 0.05), mean=0, p=1)  # Gaussian Noise  
])

# Define the elastic transform pipeline
elastic_transform = A.ElasticTransform(alpha=25, sigma=4, p=1)
"""

'\n\n# Define the augmentation pipeline for the image\nnoise_transform = A.Compose([\n    A.GaussNoise(var_limit=(0.002, 0.05), mean=0, p=1)  # Gaussian Noise  \n])\n\n# Define the elastic transform pipeline\nelastic_transform = A.ElasticTransform(alpha=25, sigma=4, p=1)\n'

Augmentation of slices and saving to new directories 

In [8]:
# Loop through each image in the training set
for idx in tqdm(range(0, dirlen), desc="Augmenting training set", unit="image"):
    # Paths to the current image and label
    image_path = train_img_slice_dir.joinpath(image_train_dir_list[idx])
    label_path = train_label_slice_dir.joinpath(label_train_dir_list[idx])

    # Read the image and label using SimpleITK
    image_sitk = sitk.ReadImage(image_path)
    label_sitk = sitk.ReadImage(label_path)

    # Convert the images to NumPy arrays for processing
    image_np = sitk.GetArrayFromImage(image_sitk).astype(np.float32)
    label_np = sitk.GetArrayFromImage(label_sitk)

    # Apply augmentations and generate the augmented images
    for aug_idx in range(augmented_no):
        # Normalize the image before applying Gaussian noise
        image_min = np.min(image_np)
        image_max = np.max(image_np)

        # Normalize the image, handling edge cases where image_max == image_min
        if image_max != image_min:
            image_normalised_np = (image_np - image_min) / (image_max - image_min)
        else:
            image_normalised_np = np.zeros_like(image_np)  # Default to zero array

        # Apply elastic deformation to both the image and the label
        # This ensures that both the image and its corresponding label are deformed similarly
        elastic_result = elastic_transform(image=image_np, mask=label_np)
        image_deformed_np = elastic_result["image"]
        label_deformed_np = elastic_result["mask"]

        # Apply Gaussian noise only to the image after deformation
        noise_result = noise_transform(image=image_deformed_np)
        image_augmented_np = noise_result["image"]

        # Revert normalization to restore the original intensity range of the image
        image_augmented_np = np.clip(image_augmented_np, 0, 1)  # Ensure values are within [0, 1]
        image_augmented_np = image_augmented_np * (image_max - image_min) + image_min

        # Convert the augmented image and label back to SimpleITK format
        image_augmented_sitk = sitk.GetImageFromArray(image_augmented_np)
        label_deformed_sitk = sitk.GetImageFromArray(label_deformed_np)

        # Prepare filenames for saving augmented images and labels
        input_path_split = image_train_dir_list[idx].split(".")
        pre = input_path_split[0]  # '1_t1_0'
        post = input_path_split[1]  # File extension (e.g., '.mha')

        # Create a unique identifier for the augmented files
        pre_img = pre + "_" + str(aug_idx)
        augmented_filename = pre_img + "." + post

        # Paths for saving augmented images and labels
        image_augmented_path = train_img_augmented_slice_dir.joinpath(augmented_filename)
        label_augmented_path = train_label_augmented_slice_dir.joinpath(augmented_filename)

        # Save the augmented image and label
        sitk.WriteImage(image_augmented_sitk, image_augmented_path)
        sitk.WriteImage(label_deformed_sitk, label_augmented_path)


Augmenting training set: 100%|██████████| 11484/11484 [06:37<00:00, 28.87image/s]
