In [None]:
import os
import numpy as np
import shutil
import math
import glob

TEST_DATASET_PERCENT = 5
LABELBOX_OUTPUT_DIR = '../labelbox_parser/output'
IMAGE_SPLITTER_OUTPUT_DIR = '../image_splitter/output/with_labels_only'

TARGET_TEST_DIR = '../images_distributer/output/stage1_test'
TARGET_AUGMENTATION_DIR = '../augmentor/input/'

In [None]:
def get_splitted_test_files():
    all_labelbox_files = os.listdir(os.path.join(LABELBOX_OUTPUT_DIR, 'images'))
    labelbox_test_files = set(np.random.choice(
        all_labelbox_files, int(math.ceil(TEST_DATASET_PERCENT / 100 * len(all_labelbox_files)))
    ))

    splitted_test_files = []

    for labelbox_test_file in labelbox_test_files:
        splitted_test_files.extend(
            map(
                (lambda x: os.path.basename(x)),
                glob.glob('{}/images/{}*'.format(IMAGE_SPLITTER_OUTPUT_DIR, os.path.splitext(labelbox_test_file)[0]))
            )
        )
    return splitted_test_files


In [None]:
splitted_all_files = os.listdir(os.path.join(IMAGE_SPLITTER_OUTPUT_DIR, 'images'))
splitted_test_files = get_splitted_test_files()

In [None]:
# remove any test dataset from previous run
if os.path.exists(TARGET_TEST_DIR):
    shutil.rmtree(TARGET_TEST_DIR)

# and create clean slate dir
os.makedirs(TARGET_TEST_DIR)

In [None]:
for splitted_test_file in splitted_test_files:
    target_image_dir = os.path.join(TARGET_TEST_DIR, splitted_test_file, 'images')
    os.makedirs(target_image_dir)
    
    shutil.copy(
        os.path.join(IMAGE_SPLITTER_OUTPUT_DIR, 'images', splitted_test_file),
        os.path.join(target_image_dir, splitted_test_file)
    )
    
    target_mask_dir = os.path.join(TARGET_TEST_DIR, splitted_test_file, 'masks')
    os.makedirs(target_mask_dir)
    
    shutil.copy(
        os.path.join(IMAGE_SPLITTER_OUTPUT_DIR, 'masks', splitted_test_file),
        os.path.join(target_mask_dir, splitted_test_file)
    )


In [None]:
target_augmentation_images_dir = os.path.join(TARGET_AUGMENTATION_DIR, 'images')
target_augmentation_masks_dir = os.path.join(TARGET_AUGMENTATION_DIR, 'masks')

# remove any augmentation input dirs for images and masks from previous run
if os.path.exists(target_augmentation_images_dir):
    shutil.rmtree(target_augmentation_images_dir)
if os.path.exists(target_augmentation_masks_dir):
    shutil.rmtree(target_augmentation_masks_dir)

# and create clean slate dirs
os.makedirs(target_augmentation_images_dir)
os.makedirs(target_augmentation_masks_dir)

for augmentation_file in (set(splitted_all_files) - set(splitted_test_files)):
    shutil.copy(
        os.path.join(IMAGE_SPLITTER_OUTPUT_DIR, 'images', augmentation_file),
        os.path.join(target_augmentation_images_dir, augmentation_file)
    )
    
    shutil.copy(
        os.path.join(IMAGE_SPLITTER_OUTPUT_DIR, 'masks', augmentation_file),
        os.path.join(target_augmentation_masks_dir, augmentation_file)
    )