This code can do the following (as adapted):

1. Shows a simple UNET architecture
2. Create augmented images
3. Divide the images into training, validation, and testing dataset
4. Visualize the original image and binary mask side by side

### Import

In [1]:
import os
import numpy as np
import cv2            #read and write images
from glob import glob #extract file paths
from tqdm import tqdm #track the iterations and remaining time
from sklearn.model_selection import train_test_split
from albumentations import HorizontalFlip, CoarseDropout, RandomBrightness, RandomContrast


### Loading dataset: images and masks

In [3]:
def load_dataset(path):
    images = sorted(glob(os.path.join(path, "images", "*")))
    masks = sorted(glob(os.path.join(path, "masks", "*")))
    
    return images, masks


### Dataset split

In [4]:
def split_dataset(images, masks, split=0.2):
    split_size = int(len(images) * split)
    
    train_x, valid_x = train_test_split(images, test_size = split_size, random_state=42)
    train_y, valid_y = train_test_split(masks, test_size = split_size, random_state=42)
    
    train_x, test_x = train_test_split(train_x, test_size = split_size, random_state=42)
    train_y, test_y = train_test_split(train_y, test_size = split_size, random_state=42)
    
    return (train_x, train_y), (valid_x, valid_y), (test_x, test_y)

### Saving the dataset

#### Creating a folder

In [5]:
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

#### Saving the dataset

##### 1. Creating the folders

In [13]:
save_dir = os.path.join("dataset")
for item in ["train", "valid", "test"]:
    create_dir(os.path.join(save_dir, item, "images"))
    create_dir(os.path.join(save_dir, item, "masks"))    

### Executing the program

#### Loading dataset

In [7]:
def save_dataset(images, masks, save_dir, augment=False):
    for x,y in tqdm(zip(images, masks), total=len(images)):
        name = x.split("/")[-1].split(".")[0]
        
        x = cv2.imread(x, cv2.IMREAD_COLOR)
        y = cv2.imread(y, cv2.IMREAD_COLOR)
        
        if augment == True:
            aug = HorizontalFlip(p=1)
            augmented = aug(image=x, mask=y)
            x1 = augmented["image"]
            y1 = augmented["mask"]
            
            aug = CoarseDropout(p=1, max_holes=10, max_height=32, max_width=32)
            augmented = aug(image=x, mask=y)
            x2 = augmented["image"]
            y2 = augmented["mask"]
            
            aug = RandomBrightness(p=1.0)
            augmented = aug(image=x, mask=y)
            x3 = augmented["image"]
            y3 = augmented["mask"]
            
            aug = RandomContrast(p=1.0)
            augmented = aug(image=x, mask=y)
            x4 = augmented["image"]
            y4 = augmented["mask"]
            
            aug_x = [x, x1, x2, x3, x4]
            aug_y = [y, y1, y2, y3, y4]
        
        else:
            aug_x = [x]
            aug_y = [y]
        
        idx = 0
        for ax, ay in zip(aug_x, aug_y):
            aug_name = f"{name}_{idx}.TIF"
            
            save_image_path = os.path.join(save_dir, "images", aug_name)
            save_mask_path = os.path.join(save_dir, "masks", aug_name)
        
            cv2.imwrite(save_image_path, ax)
            cv2.imwrite(save_mask_path, ay)
            
            idx += 1

In [None]:
dataset_path = os.getcwd()
images, masks = load_dataset(dataset_path)
print(f"Images: {len(images)} - Masks: {len(masks)}")

Images: 64 - Masks: 64


#### Dataset split

In [9]:
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = split_dataset(images, masks, split=0.2)

##### 2. Saving the training dataset

In [10]:
save_dataset(train_x, train_y, os.path.join(save_dir, "train"), augment=True)

100%|██████████| 40/40 [07:51<00:00, 11.78s/it]


##### 3. Saving the validation dataset

In [11]:
save_dataset(valid_x, valid_y, os.path.join(save_dir, "valid"), augment=False)

100%|██████████| 12/12 [00:37<00:00,  3.09s/it]


##### 4. Saving the testing dataset

In [12]:
save_dataset(test_x, test_y, os.path.join(save_dir, "test"), augment=False)

100%|██████████| 12/12 [00:11<00:00,  1.03it/s]


In [42]:
print(f"Train: {len(train_x)} - {len(train_y)}")
print(f"Valid: {len(valid_x)} - {len(valid_y)}")
print(f"Test: {len(test_x)} - {len(test_y)}")

Train: 116 - 116
Valid: 28 - 28
