## Create augmented data

In [1]:
from dltoolkit.utils.generic import list_images

from keras.preprocessing.image import ImageDataGenerator

import numpy as np
import os, time, cv2, shutil, glob

TRAINING_PATH = "../data/MSC8002/training"
FLDR_GROUND_TRUTH = "groundtruths"                      # folder with the ground truths
FLDR_IMAGES = "images"                                  # folder with the images
RANDOM_STATE = 122177

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def create_random_patient(source_patient_ID, target_patient_ID,
                          base_path, imgs_subfolder, msks_subfolder,
                          img_exts, data_gen_args, seed, verbose=False):
    """
    Takes an existing patient's volume and creates a new, slightly augmented one, transforming
    both the images as well as ground truths. Every slice in a volume is transformed in exactly
    the same way. To ensure patient to patient variability it is important to use a different seed
    for every new patient.
    :param source_patient_ID: name of the folder with the source volume
    :param target_patient_ID: name of the folder where the augmented volume will be created
    :param base_path: path where the images and ground truths subfolders are located
    :param imgs_subfolder: name of the subbolder containing images
    :param msks_subfolder: name of the subbolder containing ground truths
    :param img_exts: extension of the slice images 
    :param data_gen_args: ImageDataGenerator parameters
    :param seed: ImageDataGenerator seed 
    :return: N/A
    """
    start_time = time.time()
    
    print("Augmenting {} to {}".format(source_patient_ID, target_patient_ID))
    
    # Create the two identical data generators
    img_datagen = ImageDataGenerator(**data_gen_args)
    msk_datagen = ImageDataGenerator(**data_gen_args)

    # Construct the path to the source patient
    source_img_path = os.path.join(base_path, imgs_subfolder + "/" + source_patient_ID)
    source_msk_path = os.path.join(base_path, msks_subfolder + "/" + source_patient_ID)

    if verbose:
        print("Source:")
        print(source_img_path)
        print(source_msk_path)
    
    # Construct the path to save the new patient to
    target_img_path = os.path.join(base_path, imgs_subfolder + "/" + target_patient_ID)
    target_msk_path = os.path.join(base_path, msks_subfolder + "/" + target_patient_ID)

    if verbose:
        print("Target:")
        print(target_img_path)
        print(target_msk_path)
    
    # Create target folders, delete any existing folders
    if os.path.exists(target_img_path):
        shutil.rmtree(target_img_path)
    os.makedirs(target_img_path)

    if os.path.exists(target_msk_path):
        shutil.rmtree(target_msk_path)
    os.makedirs(target_msk_path)

    img_list = sorted(list(list_images(basePath=source_img_path, validExts=img_exts)))
    msk_list = sorted(list(list_images(basePath=source_msk_path, validExts=img_exts)))
    
    if verbose:
        print("       # of images: {}".format(len(img_list)))
        print("# of ground truths: {}".format(len(msk_list)))

    # Augment the files
    for i, (tmp_img, tmp_msk) in enumerate(zip(img_list, msk_list)):
        # Read the image
        img = cv2.imread(tmp_img, cv2.IMREAD_GRAYSCALE)
        img = np.reshape(img, (img.shape[0], img.shape[1], 1))

        # Create the generator
        img_gen = img_datagen.flow(np.asarray([img]),
                                              seed=seed,
                                              batch_size=1,
                                              save_to_dir=target_img_path,
                                              save_prefix=target_patient_ID + "_slice" + str(i+1) + "_",
                                              save_format="jpeg",
                                            )
        # Create the new image
        next(img_gen)

        # Read the ground truth
        msk = cv2.imread(tmp_msk, cv2.IMREAD_GRAYSCALE)
        msk = np.reshape(msk, (msk.shape[0], msk.shape[1], 1))

        msk_gen = msk_datagen.flow(np.asarray([msk]),
                                              seed=seed,
                                              batch_size=1,
                                              save_to_dir=target_msk_path,
                                              save_prefix=target_patient_ID + "_slice" + str(i+1) + "_",
                                              save_format="jpeg",
                                             )
        # Create the new ground truth
        next(msk_gen)
        
    def rename_jpeg_jpg(rename_path):
        # Rename from *.jpeg to *.jpg
        files = glob.glob(rename_path)

        for file in files:
            new_name = file.replace('.jpeg', '.jpg')
            os.rename(file, new_name)

    rename_jpeg_jpg(target_img_path + "/*.jpeg")
    rename_jpeg_jpg(target_msk_path + "/*.jpeg")
            
    print("Elapsed augmentation time: {:.2f} min\n".format(int((time.time() - start_time))/60))
    
    return target_img_path, target_msk_path

In [3]:
# ImageDataGenerator settings
data_gen_args = dict(
    rotation_range=2.,
    width_shift_range=0.01,
    height_shift_range=0.01,
    shear_range=1.2,
    zoom_range=0.01,
    fill_mode='nearest'
)

src_patient_ID = "patient_1"

# Create the first copy
tgt_patient_ID = "patient_2"
_, _ = create_random_patient(src_patient_ID, tgt_patient_ID,
                      TRAINING_PATH, FLDR_IMAGES, FLDR_GROUND_TRUTH,
                      ".jpg", data_gen_args, RANDOM_STATE, True)

# Create the second copy
tgt_patient_ID = "patient_3"
_, _ = create_random_patient(src_patient_ID, tgt_patient_ID,
                      TRAINING_PATH, FLDR_IMAGES, FLDR_GROUND_TRUTH, 
                      ".jpg", data_gen_args, RANDOM_STATE * 3, True)

print("Data augmentation complete.")

Augmenting patient_1 to patient_2
Source:
../data/MSC8002/training/images/patient_1
../data/MSC8002/training/groundtruths/patient_1
Target:
../data/MSC8002/training/images/patient_2
../data/MSC8002/training/groundtruths/patient_2
       # of images: 247
# of ground truths: 247
Elapsed augmentation time: 0.08 min

Augmenting patient_1 to patient_3
Source:
../data/MSC8002/training/images/patient_1
../data/MSC8002/training/groundtruths/patient_1
Target:
../data/MSC8002/training/images/patient_3
../data/MSC8002/training/groundtruths/patient_3
       # of images: 247
# of ground truths: 247
Elapsed augmentation time: 0.08 min

Data augmentation complete.
