# Data preprocessing

In [None]:
import os
import shutil
from pathlib import Path
import imgaug as ia
import imgaug.augmenters as iaa
import imageio
from imgaug.augmenters.size import Resize

## initial directory structure

- the folders 'images' and 'labes' contain the images and associated annotations used for training, validation and testing respectively
- at the beginning, these correspond to the unaugmented training, validation or test data sets
- if an augmented dataset is to be used for training/validation/tesing, the folder names must be adapted to 'images' and 'labels'

## data distribution

In [None]:
# Path of the combined dataset
SOURCE_PATH_IMG = Path("/Users/paulawi/Downloads/dataset/combined_dataset/images")
SOURCE_PATH_LBL = Path("/Users/paulawi/Downloads/dataset/combined_dataset/labels")

# Training dataset (70%)

FILENAMES_FILE_PATH = Path("/Users/paulawi/Downloads/combined_dataset/txt_of_ECU+SFA+SCH+HGR/train.txt")

DESTINATION_PATH_TRIMG = Path("/Users/paulawi/Downloads/dataset/train/images_orig")
DESTINATION_PATH_TRLBL = Path("/Users/paulawi/Downloads/dataset/train/labels_orig")

with FILENAMES_FILE_PATH.open(encoding="utf-8") as lines:
    for line in lines:
        image_file_path = SOURCE_PATH_IMG / (line.strip() + ".jpg")
        if image_file_path.is_file():
                shutil.copy(image_file_path, DESTINATION_PATH_TRIMG)

with FILENAMES_FILE_PATH.open(encoding="utf-8") as lines:
    for line in lines:
        image_file_path = SOURCE_PATH_LBL / (line.strip() + ".png")
        if image_file_path.is_file():
                shutil.copy(image_file_path, DESTINATION_PATH_TRLBL)

# Validation dataset (15%)

FILENAMES_FILE_PATH = Path("/Users/paulawi/Downloads/combined_dataset/txt_of_ECU+SFA+SCH+HGR/val.txt")

DESTINATION_PATH_VIMG = Path("/Users/paulawi/Downloads/dataset/valid/images_orig")
DESTINATION_PATH_VLBL = Path("/Users/paulawi/Downloads/dataset/valid/labels_orig")

with FILENAMES_FILE_PATH.open(encoding="utf-8") as lines:
    for line in lines:
        image_file_path = SOURCE_PATH_IMG / (line.strip() + ".jpg")
        if image_file_path.is_file():
                shutil.copy(image_file_path, DESTINATION_PATH_VIMG)

with FILENAMES_FILE_PATH.open(encoding="utf-8") as lines:
    for line in lines:
        image_file_path = SOURCE_PATH_LBL / (line.strip() + ".png")
        if image_file_path.is_file():
                shutil.copy(image_file_path, DESTINATION_PATH_VLBL)

# Test dataset (15%)

FILENAMES_FILE_PATH = Path("/Users/paulawi/Downloads/combined_dataset/txt_of_ECU+SFA+SCH+HGR/test.txt")

DESTINATION_PATH_TIMG = Path("/Users/paulawi/Downloads/dataset/test/images_orig")
DESTINATION_PATH_TLBL = Path("/Users/paulawi/Downloads/dataset/test/labels_orig")

with FILENAMES_FILE_PATH.open(encoding="utf-8") as lines:
    for line in lines:
        image_file_path = SOURCE_PATH_IMG / (line.strip() + ".jpg")
        if image_file_path.is_file():
                shutil.copy(image_file_path, DESTINATION_PATH_TIMG)

with FILENAMES_FILE_PATH.open(encoding="utf-8") as lines:
    for line in lines:
        image_file_path = SOURCE_PATH_LBL / (line.strip() + ".png")
        if image_file_path.is_file():
                shutil.copy(image_file_path, DESTINATION_PATH_TLBL)

## data augmentation

In [None]:
ia.seed(1)

# resize and rotation 90°
seq1 = iaa.Sequential([
    iaa.Resize(512),
    iaa.Rotate((90,90), fit_output=True)
], random_order=True)

# resize and rotation 180°
seq2 = iaa.Sequential([
    iaa.Resize(512),
    iaa.Rotate((180,180), fit_output=True)
], random_order=True)

# resize and rotation 270°
seq3 = iaa.Sequential([
    iaa.Resize(512),
    iaa.Rotate((270,270), fit_output=True)
], random_order=True)

# additive gaussian noise
seq4 = iaa.Sequential([
    iaa.AdditiveGaussianNoise(loc=0, scale=(0.04*255, 0.04*255), per_channel=1)
], random_order=True)

seq5 = iaa.Sequential([
    iaa.Resize(512),
    iaa.AdditiveGaussianNoise(loc=0, scale=(0.04*255, 0.04*255), per_channel=1)
], random_order=True)

# darkening
seq6 = iaa.Sequential([
    iaa.Resize(512),
    iaa.MultiplyBrightness(mul=(0.9,0.9))
], random_order=True)

# brightening
seq7 = iaa.Sequential([
    iaa.Resize(512),
    iaa.MultiplyBrightness(mul=(1.1,1.1))
], random_order=True)

# only resize
seq8 = iaa.Sequential([
    iaa.Resize(512)
], random_order=True)   

seq9 = iaa.Sequential([], random_order=True)

Training dataset

In [None]:
# Source
PATH_IMG = '/Users/paulawi/Downloads/dataset/train/images_orig'
PATH_LBL = '/Users/paulawi/Downloads/dataset/train/labels_orig'

# Destinations
PATH_T_ROT_IMG = '/Users/paulawi/Downloads/dataset/train/images'
PATH_T_ROT_LBL = '/Users/paulawi/Downloads/dataset/train/labels'

PATH_T_ROT_GAUSS_IMG = '/Users/paulawi/Downloads/dataset/train/images_rot_gauss'
PATH_T_ROT_GAUSS_LBL = '/Users/paulawi/Downloads/dataset/train/labels_rot_gauss'

#  in case remove .DS_Store 

#os.remove(PATH_IMG + '/.DS_Store')
#os.remove(PATH_LBL + '/.DS_Store')
#os.remove(PATH_T_ROT_GAUSS_IMG + '/.DS_Store')
#os.remove(PATH_T_ROT_GAUSS_LBL + '/.DS_Store')

# Rotation 90°
i = 0
j = 0
for image in os.listdir(PATH_IMG):
    img = imageio.imread(PATH_IMG + '/' + image)
    img_aug = seq1.augment_image(img)
    imageio.imwrite(os.path.join(PATH_IMG, PATH_T_ROT_IMG + '/' + 'rot90_%04d.jpg' % (i,)), img_aug)       
    i += 1

for label in os.listdir(PATH_LBL):
    lbl = imageio.imread(PATH_LBL + '/' + label)
    lbl_aug = seq1.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_LBL, PATH_T_ROT_LBL + '/' + 'rot90_%04d.png' % (j,)), lbl_aug)
    j += 1

# Rotation 180°
i = 0
j = 0
for image in os.listdir(PATH_IMG):
    img = imageio.imread(PATH_IMG + '/' + image)
    img_aug = seq2.augment_image(img)
    imageio.imwrite(os.path.join(PATH_IMG, PATH_T_ROT_IMG + '/' + 'rot180_%04d.jpg' % (i,)), img_aug)    
    i += 1

for label in os.listdir(PATH_LBL):
    lbl = imageio.imread(PATH_LBL + '/' + label)
    lbl_aug = seq2.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_LBL, PATH_T_ROT_LBL + '/' + 'rot180_%04d.png' % (j,)), lbl_aug)
    j += 1

# Rotation 270°
i = 0
j = 0
for image in os.listdir(PATH_IMG):
    img = imageio.imread(PATH_IMG + '/' + image)
    img_aug = seq3.augment_image(img)
    imageio.imwrite(os.path.join(PATH_IMG, PATH_T_ROT_IMG + '/' + 'rot270_%04d.jpg' % (i,)), img_aug)
    i += 1

for label in os.listdir(PATH_LBL):
    lbl = imageio.imread(PATH_LBL + '/' + label)
    lbl_aug = seq3.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_LBL, PATH_T_ROT_LBL + '/' + 'rot270_%04d.png' % (j,)), lbl_aug)
    j += 1

# Resize original images (512x512) and merging with rotated images
i = 0
j = 0
for image in os.listdir(PATH_IMG):
    img = imageio.imread(PATH_IMG + '/' + image)
    img_aug = seq8.augment_image(img)
    imageio.imwrite(os.path.join(PATH_IMG, PATH_T_ROT_IMG + '/' + 'rot270_%04d.jpg' % (i,)), img_aug)
    i += 1

for label in os.listdir(PATH_LBL):
    lbl = imageio.imread(PATH_LBL + '/' + label)
    lbl_aug = seq8.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_LBL, PATH_T_ROT_LBL + '/' + 'rot270_%04d.png' % (j,)), lbl_aug)
    j += 1

# additive gaussian noise
i = 0
j = 0
#os.remove(PATH_T_ROT_IMG + '/.DS_Store')
#os.remove(PATH_T_ROT_LBL + '/.DS_Store')
for image in os.listdir(PATH_T_ROT_IMG):
    img = imageio.imread(PATH_T_ROT_IMG + '/' + image)
    img_aug = seq4.augment_image(img)
    imageio.imwrite(os.path.join(PATH_T_ROT_IMG, PATH_T_ROT_GAUSS_IMG + '/' + 'gauss_%04d.jpg' % (i,)), img_aug)  
    i += 1

for label in os.listdir(PATH_T_ROT_LBL):
    lbl = imageio.imread(PATH_T_ROT_LBL + '/' + label)
    lbl_aug = seq8.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_T_ROT_LBL, PATH_T_ROT_GAUSS_LBL + '/' + 'gauss_%04d.png' % (j,)), lbl_aug)
    j += 1

# Merging T_rot_gauss
rot_images = os.listdir(PATH_T_ROT_IMG)
for r_img in rot_images:
    shutil.copy(f"{PATH_T_ROT_IMG}/{r_img}", PATH_T_ROT_GAUSS_IMG)

rot_labels = os.listdir(PATH_T_ROT_LBL)
for r_lbl in rot_labels:
    shutil.copy(f"{PATH_T_ROT_LBL}/{r_lbl}", PATH_T_ROT_GAUSS_LBL)

Validation dataset

In [None]:
# Source
PATH_VAL_IMG = '/Users/paulawi/Downloads/dataset/valid/images_orig'
PATH_VAL_LBL = '/Users/paulawi/Downloads/dataset/valid/labels_orig'

# Destinations
PATH_VAL_IMG_512 = '/Users/paulawi/Downloads/dataset/valid/images'
PATH_VAL_LBL_512 = '/Users/paulawi/Downloads/dataset/valid/labels'

PATH_VAL_IMG_GAUSS ='/Users/paulawi/Downloads/dataset/valid/augmented/gauss/images_gauss'
PATH_VAL_LBL_GAUSS = '/Users/paulawi/Downloads/dataset/valid/augmented/gauss/labels_gauss'

PATH_VAL_IMG_DUNKEL = '/Users/paulawi/Downloads/dataset/valid/augmented/dark/images_dark'
PATH_VAL_LBL_DUNKEL = '/Users/paulawi/Downloads/dataset/valid/augmented/dark/labels_dark'

PATH_VAL_IMG_HELL = '/Users/paulawi/Downloads/dataset/valid/augmented/bright/images_bright'
PATH_VAL_LBL_HELL = '/Users/paulawi/Downloads/dataset/valid/augmented/bright/labels_bright'

PATH_VAL_IMG_GH = '/Users/paulawi/Downloads/dataset/valid/augmented/gauss_bright/images_gauss_bright'
PATH_VAL_LBL_GH = '/Users/paulawi/Downloads/dataset/valid/augmented/gauss_bright/labels_gauss_bright'

PATH_VAL_IMG_GD = '/Users/paulawi/Downloads/dataset/valid/augmented/gauss_dark/images_gauss_dark'
PATH_VAL_LBL_GD = '/Users/paulawi/Downloads/dataset/valid/augmented/gauss_dark/labels_gauss_dark'

#  in case remove .DS_Store 

#os.remove(PATH_VAL_IMG + '/.DS_Store')
#os.remove(PATH_VAL_LBL + '/.DS_Store')

# Resize Original Images
i = 0
j = 0
for image in os.listdir(PATH_VAL_IMG):
    img = imageio.imread(PATH_VAL_IMG + '/' + image)
    img_aug = seq8.augment_image(img)
    imageio.imwrite(os.path.join(PATH_VAL_IMG, PATH_VAL_IMG_512 + '/' + 'resized_%04d.jpg' % (i,)), img_aug)   
    i += 1

for label in os.listdir(PATH_VAL_LBL):
    lbl = imageio.imread(PATH_VAL_LBL + '/' + label)
    lbl_aug = seq8.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_VAL_LBL, PATH_VAL_LBL_512 + '/' + 'resized_%04d.png' % (j,)), lbl_aug)
    j += 1

# additive gaussian noise
i = 0
j = 0
for image in os.listdir(PATH_VAL_IMG):
    img = imageio.imread(PATH_VAL_IMG + '/' + image)
    img_aug = seq5.augment_image(img)
    imageio.imwrite(os.path.join(PATH_VAL_IMG, PATH_VAL_IMG_GAUSS + '/' + 'gauss_%04d.jpg' % (i,)), img_aug)   
    i += 1

for label in os.listdir(PATH_VAL_LBL):
    lbl = imageio.imread(PATH_VAL_LBL + '/' + label)
    lbl_aug = seq8.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_VAL_LBL, PATH_VAL_LBL_GAUSS + '/' + 'gauss_%04d.png' % (j,)), lbl_aug)
    j += 1

# darkening
i = 0
j = 0 
for image in os.listdir(PATH_VAL_IMG):
    img = imageio.imread(PATH_VAL_IMG + '/' + image)
    img_aug = seq6.augment_image(img)
    imageio.imwrite(os.path.join(PATH_VAL_IMG, PATH_VAL_IMG_DUNKEL + '/' + 'dunkel_%04d.jpg' % (i,)), img_aug)
    i +=1

for label in os.listdir(PATH_VAL_LBL):
    lbl = imageio.imread(PATH_VAL_LBL + '/' + label)
    lbl_aug = seq8.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_VAL_LBL, PATH_VAL_LBL_DUNKEL + '/' + 'dunkel_%04d.png' % (j,)), lbl_aug)
    j += 1

# brightening
i = 0
j = 0
for image in os.listdir(PATH_VAL_IMG):
    img = imageio.imread(PATH_VAL_IMG + '/' + image)
    img_aug = seq7.augment_image(img)
    imageio.imwrite(os.path.join(PATH_VAL_IMG, PATH_VAL_IMG_HELL + '/' + 'hell_%04d.jpg' % (i,)), img_aug)
    i += 1

for label in os.listdir(PATH_VAL_LBL):
    lbl = imageio.imread(PATH_VAL_LBL + '/' + label)
    lbl_aug = seq8.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_VAL_LBL, PATH_VAL_LBL_HELL + '/' + 'hell_%04d.png' % (j,)), lbl_aug)
    j += 1

# additive gaussian noise with darkening and brightening
i = 0
j = 0
for image in os.listdir(PATH_VAL_IMG_HELL):
    img = imageio.imread(PATH_VAL_IMG_HELL + '/' + image)
    img_aug = seq5.augment_image(img)
    imageio.imwrite(os.path.join(PATH_VAL_IMG_HELL, PATH_VAL_IMG_GH + '/' + 'gauss_hell_%04d.jpg' % (i,)), img_aug)
    i += 1

for label in os.listdir(PATH_VAL_LBL_HELL):
    lbl = imageio.imread(PATH_VAL_LBL_HELL + '/' + label)
    lbl_aug = seq8.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_VAL_LBL_HELL, PATH_VAL_LBL_GH + '/' + 'gauss_hell_%04d.png' % (j,)), lbl_aug)
    j += 1

i = 0
j = 0
for image in os.listdir(PATH_VAL_IMG_DUNKEL):
    img = imageio.imread(PATH_VAL_IMG_DUNKEL + '/' + image)
    img_aug = seq5.augment_image(img)
    imageio.imwrite(os.path.join(PATH_VAL_IMG_DUNKEL, PATH_VAL_IMG_GD + '/' + 'gauss_dunkel_%04d.jpg' % (i,)), img_aug)
    i += 1

for label in os.listdir(PATH_VAL_LBL_DUNKEL):
    lbl = imageio.imread(PATH_VAL_LBL_DUNKEL + '/' + label)
    lbl_aug = seq8.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_VAL_LBL_DUNKEL, PATH_VAL_LBL_GD + '/' + 'gauss_dunkel_%04d.png' % (j,)), lbl_aug)
    j += 1

Test dataset

In [None]:
# Source
PATH_TE_IMG = '/Users/paulawi/Downloads/dataset/test/images_orig'
PATH_TE_LBL = '/Users/paulawi/Downloads/dataset/test/labels_orig'

# Destination
PATH_TE_IMG_512 = '/Users/paulawi/Downloads/dataset/test/images'
PATH_TE_LBL_512 = '/Users/paulawi/Downloads/dataset/test/labels'

PATH_TE_IMG_GAUSS ='/Users/paulawi/Downloads/dataset/test/augmented/gauss/images_gauss'
PATH_TE_LBL_GAUSS = '/Users/paulawi/Downloads/dataset/test/augmented/gauss/labels_gauss'

PATH_TE_IMG_DUNKEL = '/Users/paulawi/Downloads/dataset/test/augmented/dark/images_dark'
PATH_TE_LBL_DUNKEL = '/Users/paulawi/Downloads/dataset/test/augmented/dark/labels_dark'

PATH_TE_IMG_HELL = '/Users/paulawi/Downloads/dataset/test/augmented/bright/images_bright'
PATH_TE_LBL_HELL = '/Users/paulawi/Downloads/dataset/test/augmented/bright/labels_bright'

PATH_TE_IMG_GH = '/Users/paulawi/Downloads/dataset/test/augmented/gauss_bright/images_gauss_bright'
PATH_TE_LBL_GH = '/Users/paulawi/Downloads/dataset/test/augmented/gauss_bright/labels_gauss_bright'

PATH_TE_IMG_GD = '/Users/paulawi/Downloads/dataset/test/augmented/gauss_dark/images_gauss_dark'
PATH_TE_LBL_GD = '/Users/paulawi/Downloads/dataset/test/augmented/gauss_dark/labels_gauss_dark'

#  in case remove .DS_Store 

#os.remove(PATH_TE_IMG + '/.DS_Store')
#os.remove(PATH_TE_LBL + '/.DS_Store')

# Resize original images
i = 0
j = 0
for image in os.listdir(PATH_TE_IMG):
    img = imageio.imread(PATH_TE_IMG + '/' + image)
    img_aug = seq8.augment_image(img)
    imageio.imwrite(os.path.join(PATH_TE_IMG, PATH_TE_IMG_512 + '/' + 'resized_%04d.jpg' % (i,)), img_aug)
    i += 1

for label in os.listdir(PATH_TE_LBL):
    lbl = imageio.imread(PATH_TE_LBL + '/' + label)
    lbl_aug = seq8.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_TE_LBL, PATH_TE_LBL_512 + '/' + 'resized_%04d.png' % (j,)), lbl_aug)
    j += 1

# additive gaussian noise
i = 0
j = 0
for image in os.listdir(PATH_TE_IMG):
    img = imageio.imread(PATH_TE_IMG + '/' + image)
    img_aug = seq5.augment_image(img)
    imageio.imwrite(os.path.join(PATH_TE_IMG, PATH_TE_IMG_GAUSS + '/' + 'gauss_%04d.jpg' % (i,)), img_aug)
    i += 1

for label in os.listdir(PATH_TE_LBL):
    lbl = imageio.imread(PATH_TE_LBL + '/' + label)
    lbl_aug = seq8.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_TE_LBL, PATH_TE_LBL_GAUSS + '/' + 'gauss_%04d.png' % (j,)), lbl_aug)
    j += 1

# darkening 
i = 0
j = 0
for image in os.listdir(PATH_TE_IMG):
    img = imageio.imread(PATH_TE_IMG + '/' + image)
    img_aug = seq6.augment_image(img)
    imageio.imwrite(os.path.join(PATH_TE_IMG, PATH_TE_IMG_DUNKEL + '/' + 'dunkel_%04d.jpg' % (i,)), img_aug)
    i += 1

for label in os.listdir(PATH_TE_LBL):
    lbl = imageio.imread(PATH_TE_LBL + '/' + label)
    lbl_aug = seq8.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_TE_LBL, PATH_TE_LBL_DUNKEL + '/' + 'dunkel_%04d.png' % (j,)), lbl_aug)
    j += 1

# brightening
i = 0
j = 0
for image in os.listdir(PATH_TE_IMG):
    img = imageio.imread(PATH_TE_IMG + '/' + image)
    img_aug = seq7.augment_image(img)
    imageio.imwrite(os.path.join(PATH_TE_IMG, PATH_TE_IMG_HELL + '/' + 'hell_%04d.jpg' % (i,)), img_aug)
    i += 1

for label in os.listdir(PATH_TE_LBL):
        lbl = imageio.imread(PATH_TE_LBL + '/' + label)
        lbl_aug = seq8.augment_image(lbl)
        imageio.imwrite(os.path.join(PATH_TE_LBL, PATH_TE_LBL_HELL + '/' + 'hell_%04d.png' % (j,)), lbl_aug)
        j += 1

# additive gaussian noise with darkening and brightening 
i = 0
j = 0
for image in os.listdir(PATH_TE_IMG_HELL):
    img = imageio.imread(PATH_TE_IMG_HELL + '/' + image)
    img_aug = seq5.augment_image(img)
    imageio.imwrite(os.path.join(PATH_TE_IMG_HELL, PATH_TE_IMG_GH + '/' + 'gauss_hell_%04d.jpg' % (i,)), img_aug)
    i += 1

for label in os.listdir(PATH_TE_LBL_HELL):
    lbl = imageio.imread(PATH_TE_LBL_HELL + '/' + label)
    lbl_aug = seq8.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_TE_LBL_HELL, PATH_TE_LBL_GH + '/' + 'gauss_hell_%04d.png' % (j,)), lbl_aug)
    j += 1

i = 0
j = 0
for image in os.listdir(PATH_TE_IMG_DUNKEL):
    img = imageio.imread(PATH_TE_IMG_DUNKEL + '/' + image)
    img_aug = seq5.augment_image(img)
    imageio.imwrite(os.path.join(PATH_TE_IMG_DUNKEL, PATH_TE_IMG_GD + '/' + 'gauss_dunkel_%04d.jpg' % (i,)), img_aug)
    i += 1

for label in os.listdir(PATH_TE_LBL_DUNKEL):
    lbl = imageio.imread(PATH_TE_LBL_DUNKEL + '/' + label)
    lbl_aug = seq8.augment_image(lbl)
    imageio.imwrite(os.path.join(PATH_TE_LBL_DUNKEL, PATH_TE_LBL_GD + '/' + 'gauss_dunkel_%04d.png' % (j,)), lbl_aug)
    j += 1