In [2]:
!tree -d -L 2 ~/datasets/data/cityscapes

[01;34m/home/skywatcher/datasets/data/cityscapes[00m
├── [01;34mgtCoarse[00m
│   ├── [01;34mtrain[00m
│   ├── [01;34mtrain_extra[00m
│   └── [01;34mval[00m
├── [01;34mgtFine[00m
│   ├── [01;34mtest[00m
│   ├── [01;34mtrain[00m
│   └── [01;34mval[00m
└── [01;34mleftImg8bit[00m
    ├── [01;34mtest[00m
    ├── [01;34mtrain[00m
    ├── [01;34mtrain_extra[00m
    └── [01;34mval[00m

13 directories


In [11]:
#Let's do some data preprocessing

#Import the packages
import os
from shutil import copyfile
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

'''
The task of this function will be to take the extracted cityscapes data
given by source, which I will assume is a directory containing the following
structure:
cityscapes
├── gtCoarse
│   ├── train
│   ├── train_extra
│   └── val
├── gtFine
│   ├── test
│   ├── train
│   └── val
└── leftImg8bit
    ├── test
    ├── train
    ├── train_extra
    └── val

This will be the structure if you downloaded the cityscapes dataset and extracted it.
It will move all the colour images from train to a directory called training_masks. They
will be grabbed from the gtFine train directory. If there are any of the Coarse or trainextra
directories in here, then the function will take these too. 

@param source - the directory for cityscapes that has the structure mentioned above (This is best done as an absolute path)

'''
def cs_preprocessing_files(source):
    #Create all the variables
    fine_masks = source + '/train/masks'
    coarse_masks = source + '/train/weak_masks'
    fine_train = source + '/train/images'
    coarse_train = source + '/train/weak_images'
    fine_masks_val = source + '/val/masks'
    coarse_masks_val = source + '/val/weak_masks'
    fine_val = source + '/val/images'
    coarse_val = source + '/val/weak_images'
    
    #Create all the directories
    os.makedirs(fine_masks)
    os.makedirs(coarse_masks)
    os.makedirs(fine_train)
    os.makedirs(coarse_train)
    os.makedirs(fine_masks_val)
    os.makedirs(coarse_masks_val)
    os.makedirs(fine_val)
    os.makedirs(coarse_val)
    
    #Go through the directory
    for root, dirs, files in os.walk(source):
        #Save on walking time
        if 'test' in root:
            continue
        #If we are looking at a dir in the gtFine or gtCoarse dirs
        elif 'gtFine' in root or 'gtCoarse' in root:
            #Get the files in that dir
            for file in files:
                #Create a string for easy reference of the file
                file_str = '/' + file
                #If its got color in the file name, then its a mask
                if 'color' in file:
                    #If we're doing the fine annotations
                    if 'gtFine' in root:
                        #Then if its the training masks
                        if 'train' in root:
                            #Write to training directory
                            copyfile(root + file_str, fine_masks + file_str)
                        elif 'val' in root:
                            #Otherwise if its the validation dir, write to val instead
                            copyfile(root + file_str, fine_masks_val + file_str)
                    else:
                        if 'train' in root:
                            #Write to training directory
                            copyfile(root + file_str, coarse_masks  + file_str)
                        elif 'val' in root:
                            #Otherwise if its the validation dir, write to val instead
                            copyfile(root + file_str, coarse_masks_val + file_str)
        #If its the actual images directory we're looking at
        elif 'leftImg8bit' in root:
            #Get all the files
            for file in files:
                file_str = '/' + file
                #If we are looking at the extra training images
                if 'extra' in root:
                    if 'train' in root:
                        #Copy to just the weak training directory
                        copyfile(root + file_str, coarse_train  + file_str)
                    elif 'val' in root:
                        #Copy to just the weak validation directory
                        copyfile(root + file_str, coarse_val + file_str)
                else:
                    if 'train' in root:
                        #Copy to both training directories (As coarse annotations are on all images)
                        copyfile(root + file_str, fine_train  + file_str)
                        copyfile(root + file_str, coarse_train  + file_str)
                    elif 'val' in root:
                        #Copy to both validation directories for same reason as training
                        copyfile(root + file_str, fine_val  + file_str)
                        copyfile(root + file_str, coarse_val + file_str)

'''
An ImageDataGenerator will be created using keras ready for use in training TF2.0 models
like FastSCNN. 

@param images - The directory where all the training images are kept
@param masks - The directory where all the masks are kept
@param val_images - The directory where all validation images are kept
@param val_masks - The directory where all validation masks are kept
@param target_size - Should be a tuple of two numbers, the image size needed for the model should be put here. 
                     This will be largely dependent on the network you are training.
@return ImageDataGenerator - The ImageDataGenerator
'''
def cs_create_generators(images, masks, val_images, val_masks, target_size):
    
    # we create two instances with the same arguments
    data_gen_args = dict(rotation_range=90,
                         width_shift_range=0.1,
                         height_shift_range=0.1,
                         zoom_range=0.2)
    image_datagen = ImageDataGenerator(**data_gen_args)
    mask_datagen = ImageDataGenerator(**data_gen_args)
    
    # Provide the same seed to the flow methods
    seed = 42

    #Create the generators
    image_generator = image_datagen.flow_from_directory(
        images,
        class_mode=None,
        seed=seed)

    mask_generator = mask_datagen.flow_from_directory(
        masks,
        class_mode=None,
        seed=seed)
    
    #Create the generators
    val_image_generator = image_datagen.flow_from_directory(
        val_images,
        class_mode=None,
        seed=seed)

    val_mask_generator = mask_datagen.flow_from_directory(
        val_masks,
        class_mode=None,
        seed=seed)

    # combine generators into one which yields image and masks
    train_generator = zip(image_generator, mask_generator)
    val_generator = zip(val_image_generator, val_mask_generator)
    return train_generator, val_generator

In [164]:
cs_preprocessing_files('/home/skywatcher/datasets/data/cityscapes')

In [3]:
!tree -d -L 2 /home/skywatcher/datasets/data/cityscapes

[01;34m/home/skywatcher/datasets/data/cityscapes[00m
├── [01;34mgtCoarse[00m
│   ├── [01;34mtrain[00m
│   ├── [01;34mtrain_extra[00m
│   └── [01;34mval[00m
├── [01;34mgtFine[00m
│   ├── [01;34mtest[00m
│   ├── [01;34mtrain[00m
│   └── [01;34mval[00m
└── [01;34mleftImg8bit[00m
    ├── [01;34mtest[00m
    ├── [01;34mtrain[00m
    ├── [01;34mtrain_extra[00m
    └── [01;34mval[00m

13 directories


In [None]:
images = '/home/skywatcher/datasets/data/cityscapes/train/images'
masks = '/home/skywatcher/datasets/data/cityscapes/train/masks'
val_images = '/home/skywatcher/datasets/data/cityscapes/val/images'
val_masks = '/home/skywatcher/datasets/data/cityscapes/train/masks'
train_generator, val_generator = cs_create_generators(images, masks, val_images, val_masks, (224, 224))