
# Data Augmentation



Source and ideas for other augmentation methods:

- https://www.kaggle.com/corochann/bengali-albumentations-data-augmentation-tutorial

- https://albumentations.readthedocs.io/en/latest/

- https://albumentations-demo.herokuapp.com

In [1]:
import os
import glob
import cv2
from tqdm import tqdm
from glob import glob
from sklearn.model_selection import train_test_split
from albumentations import CenterCrop, RandomRotate90, GridDistortion, HorizontalFlip, VerticalFlip, ShiftScaleRotate, RandomBrightnessContrast, 
Crop, Rotate, ElasticTransform, Resize

Set the Path to the directory, where the images and masks are saved.

In [2]:
#define paths
dir = os.getcwd()
parent_dir = os.path.dirname(dir)

PATH = parent_dir + '/data/fat_muscle'                              

import sys
sys.path.append(parent_dir)

The following code delets all images which are currently in the folder where we want to save our augmented images. Before creating new augmented images, run these lines. 

### Delete Images

In [3]:
files = glob(PATH + '/augmented_data/images/*')
for f in files:
   os.remove(f)

### Delete Masks

In [4]:
files = glob(PATH + '/augmented_data/masks/*')
for f in files:
   os.remove(f)

In [5]:
def load_data(path, split=0.1):
    images = sorted(glob(os.path.join(path, "images/*"))) #path to ultrasound images
    masks = sorted(glob(os.path.join(path, "masks/*"))) #path to masks

    total_size = len(images)
    valid_size = int(split * total_size)
    test_size = int(split * total_size)

    train_x, valid_x = train_test_split(images, test_size=valid_size, random_state=42)
    train_y, valid_y = train_test_split(masks, test_size=valid_size, random_state=42)

    train_x, test_x = train_test_split(train_x, test_size=test_size, random_state=42)
    train_y, test_y = train_test_split(train_y, test_size=test_size, random_state=42)

    return (train_x, train_y), (valid_x, valid_y), (test_x, test_y)

The following code creates new folders, where we save the pictures. If folders already exist, do not run the code

In [12]:
#def create_dir(path):
#    if not os.path.exists(path):
#        os.makedirs(path)

In [13]:
#create_dir('/gdrive/My Drive/Caressoma/Data/fat_muscle/augmented_data/test_images') # folder for images
#create_dir('/gdrive/My Drive/Caressoma/Data/fat_muscle/augmented_data/test_masks') # folder for masks

The following function includes all the data augmentation techniques we apply the images. 

In [None]:
def augment_data_composite(images, masks, save_path, augment=True, nr_of_aug_images= 6):
    ''' applies multiple transformations on an a single image and returns a composite image of those'''

    for x, y in tqdm(zip(images, masks), total=len(images)):
        name = x.split("/")[-1].split(".")

        """ Extracting the name and extension of the image and the mask. """
        image_name = name[0]
        image_extn = name[1]

        name = y.split("/")[-1].split(".")
        mask_name = name[0]
        mask_extn = name[1]

        """ Reading image and mask. """
        x = cv2.imread(x, cv2.IMREAD_COLOR)
        y = cv2.imread(y, cv2.IMREAD_COLOR)
        ORIG_WIDTH = x.shape[1]
        ORIG_HEIGHT = x.shape[0]
        save_images = [x]
        save_masks = [y]

        """ Augmentation """
        if augment == True:   
            for i in range(nr_of_aug_images):

                    aug = Rotate(limit=10, p=0.5, border_mode = cv2.BORDER_REFLECT_101)
                    augmented = aug(image=x, mask=y)
                    x_aug = augmented['image']
                    y_aug = augmented['mask']
                    
                    aug = HorizontalFlip(p=0.5)
                    augmented = aug(image=x_aug, mask=y_aug)
                    x_aug = augmented['image']
                    y_aug = augmented['mask']
                    
#                     # Custom Stretching Transformation: Stretch in y direction by cropping and subsequent resizing
#                     if int.from_bytes(os.urandom(8), byteorder="big") / ((1 << 64) - 1) > 0.5:          #random number generator. In order to apply
#                                                                                                         #this function only in 50% of cases
#                         aug = Crop(x_min=0, y_min=round(ORIG_HEIGHT/13), x_max=ORIG_WIDTH, y_max=round(ORIG_HEIGHT/3), p=1)
#                         augmented = aug(image=x_aug, mask=y_aug)
#                         x_aug = augmented['image']
#                         y_aug = augmented['mask']    
#                         aug = Resize(height=ORIG_HEIGHT, width=ORIG_WIDTH, p=1)
#                         augmented = aug(image=x_aug, mask=y_aug)
#                         x_aug = augmented['image']
#                         y_aug = augmented['mask'] 
                        
#                     aug = GridDistortion(num_steps=2, distort_limit=0.2, border_mode=cv2.BORDER_REFLECT_101, value=None, mask_value=None, p=0.25)
#                     augmented = aug(image=x_aug, mask=y_aug)
#                     x_aug = augmented['image']
#                     y_aug = augmented['mask']
                        
#                     aug = GridDistortion(num_steps=5, distort_limit=0.2, border_mode=cv2.BORDER_REFLECT_101, value=None, mask_value=None, p=0.25)
#                     augmented = aug(image=x_aug, mask=y_aug)
#                     x_aug = augmented['image']
#                     y_aug = augmented['mask']

#                     aug = ShiftScaleRotate(border_mode=cv2.BORDER_CONSTANT, value=0, mask_value=0, shift_limit_x= 0, shift_limit_y= [-0.1, 0.4],
#                                            scale_limit=0,rotate_limit=0, p=0.25)
#                     augmented = aug(image=x_aug, mask=y_aug)
#                     x_aug = augmented['image']
#                     y_aug = augmented['mask']
                        
#                     aug = ElasticTransform(alpha=1, sigma=50, alpha_affine=50, border_mode=cv2.BORDER_REFLECT_101, value=None, mask_value=None,  p=1) 
#                     augmented = aug(image=x, mask=y)
#                     x_aug = augmented['image']
#                     y_aug = augmented['mask']

                    #Brightness and contrast transformation NOT applied to mask
                    aug = RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.1, p=0.5)
                    augmented = aug(image=x_aug)
                    x_aug = augmented['image']   
            
            
                    save_images.append(x_aug)
                    save_masks.append(y_aug)

        """ Saving the image and mask. """
        idx = 0
        for i, m in zip(save_images, save_masks):


            if len(images) == 1:
                tmp_img_name = f"{image_name}.{image_extn}"
                tmp_mask_name = f"{mask_name}.{mask_extn}"

            else:
                tmp_img_name = f"{image_name}_{idx}.{image_extn}"
                tmp_mask_name = f"{mask_name}_{idx}.{mask_extn}"

            image_path = os.path.join(save_path, "images", tmp_img_name)
            mask_path = os.path.join(save_path, "masks", tmp_mask_name)

            cv2.imwrite(image_path, i)
            cv2.imwrite(mask_path, m)

            idx += 1


In [8]:
def augment_data(images, masks, save_path, augment=True):
    H = 256
    W = 256

    for x, y in tqdm(zip(images, masks), total=len(images)):
        name = x.split("/")[-1].split(".")

        """ Extracting the name and extension of the image and the mask. """
        image_name = name[0]
        image_extn = name[1]

        name = y.split("/")[-1].split(".")
        mask_name = name[0]
        mask_extn = name[1]

        """ Reading image and mask. """
        x = cv2.imread(x, cv2.IMREAD_COLOR)
        y = cv2.imread(y, cv2.IMREAD_COLOR)

        """ Augmentation """
        if augment == True:     

            aug = RandomRotate90(p=1.0)  # p= probability, that method is applied to image 1 = 100%
            augmented = aug(image=x, mask=y)
            x1 = augmented["image"]
            y1 = augmented["mask"]

            aug = Rotate(limit=45, p=1.0)
            augmented = aug(image=x, mask=y)
            x2 = augmented['image']
            y2 = augmented['mask']

            aug = GridDistortion(num_steps=7, distort_limit=0.5, p=1.0)
            augmented = aug(image=x, mask=y)
            x3 = augmented['image']
            y3 = augmented['mask']

            aug = ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1,rotate_limit=45, p=1.0)
            augmented = aug(image=x, mask=y)
            x4 = augmented['image']
            y4 = augmented['mask']

            aug = RandomBrightnessContrast(brightness_limit=(-0.5, 1.5), contrast_limit=0.2, p=1.0)
            augmented = aug(image=x, mask=y)
            x5 = augmented['image']
            y5 = augmented['mask']
            
            aug = Crop(p=1.0, x_min=0, y_min=0, x_max=529, y_max=259)
            augmented = aug(image=x, mask=y)
            x6 = augmented['image']
            y6 = augmented['mask']

            save_images = [x, x1, x2, x3, x4, x5, x6]
            save_masks =  [y, y1, y2, y3, y4, y5, y6]

        else:
            save_images = [x]
            save_masks = [y]

        """ Saving the image and mask. """
        idx = 0
        for i, m in zip(save_images, save_masks):
            i = cv2.resize(i, (W, H))
            m = cv2.resize(m, (W, H))

            if len(images) == 1:
                tmp_img_name = f"{image_name}.{image_extn}"
                tmp_mask_name = f"{mask_name}.{mask_extn}"

            else:
                tmp_img_name = f"{image_name}_{idx}.{image_extn}"
                tmp_mask_name = f"{mask_name}_{idx}.{mask_extn}"

            image_path = os.path.join(save_path, "images", tmp_img_name)
            mask_path = os.path.join(save_path, "masks", tmp_mask_name)

            cv2.imwrite(image_path, i)
            cv2.imwrite(mask_path, m)

            idx += 1

We can now apply the function to our data. *IMPORTANT* Just apply on train data!

In [9]:
# we load data 
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_data(PATH)
print(f"Train Images: {len(train_x)} - Train Masks: {len(train_y)}")
print(f"Valid Images: {len(valid_x)} - Valid Masks: {len(valid_y)}")
print(f"Test Images: {len(test_x)} - Test Masks: {len(test_y)}")

Train Images: 14 - Train Masks: 14
Valid Images: 1 - Valid Masks: 1
Test Images: 1 - Test Masks: 1


In [11]:
# augment data -> just use train_x and train_y
augment_data(train_x, train_y, PATH + '/augmented_data/', augment=True)

100%|██████████| 14/14 [00:01<00:00, 10.79it/s]
