In [None]:
'''
jupyter nbconvert --to script 'separate_test_and_train.ipynb'
'''

In [1]:
import os
import numpy as np
import shutil
import math
import glob
import re

In [2]:
TEST_DATASET_PERCENT = 20

In [3]:
labelbox_output_dir = "./output/"
image_splitter_output_dir = "./first_predictions/"
output_test_dir = "./test_set/"
output_augmentation_dir = "./to_be_augmented/"
labelbox_class_names = ["healthy_leaves", "unhealthy_leaf", "black_sigatoka"]

In [4]:
def get_splitted_test_files(image_class):
    all_labelbox_files =  list(map(
        (lambda x: os.path.basename(x)),
        glob.glob('{}/images/*_class_{}*'.format(labelbox_output_dir, image_class))
    ))

    labelbox_test_files = set(np.random.choice(
        all_labelbox_files, int(math.ceil(TEST_DATASET_PERCENT / 100 * len(all_labelbox_files)))
    ))

    splitted_test_files = []
    for labelbox_test_file in labelbox_test_files:
        splitted_test_files.extend(
            map(
                (lambda x: os.path.basename(x)),
                glob.glob('{}/images/{}*'.format(image_splitter_output_dir, os.path.splitext(labelbox_test_file)[0]))
            )
        )
    return splitted_test_files

In [5]:
def normalise_class_name(class_name):
    class_name = re.sub('/', '-', class_name)
    class_name = re.sub(' ', '_', class_name)
    return class_name

In [6]:
splitted_all_files = os.listdir(os.path.join(image_splitter_output_dir, 'images'))

In [7]:
splitted_test_files =  []
for class_name in labelbox_class_names:
    splitted_test_files.extend(get_splitted_test_files(
        normalise_class_name(class_name)
    ))

In [8]:
# remove any test dataset from previous run
if os.path.exists(output_test_dir):
    shutil.rmtree(output_test_dir)

In [9]:
# and create clean slate dir
os.makedirs(output_test_dir)
for splitted_test_file in splitted_test_files:
    target_image_dir = os.path.join(output_test_dir, splitted_test_file, 'images')
    os.makedirs(target_image_dir)

    shutil.copy(
        os.path.join(image_splitter_output_dir, 'images', splitted_test_file),
        os.path.join(target_image_dir, splitted_test_file)
    )

    target_mask_dir = os.path.join(output_test_dir, splitted_test_file, 'masks')
    os.makedirs(target_mask_dir)

    shutil.copy(
        os.path.join(image_splitter_output_dir, 'masks', splitted_test_file),
        os.path.join(target_mask_dir, splitted_test_file)
    )

In [10]:
target_augmentation_images_dir = os.path.join(output_augmentation_dir, 'images')
target_augmentation_masks_dir = os.path.join(output_augmentation_dir, 'masks')

In [11]:
# remove any augmentation input dirs for images and masks from previous run
if os.path.exists(target_augmentation_images_dir):
    shutil.rmtree(target_augmentation_images_dir)
if os.path.exists(target_augmentation_masks_dir):
    shutil.rmtree(target_augmentation_masks_dir)

In [12]:
# and create clean slate dirs
os.makedirs(target_augmentation_images_dir)
os.makedirs(target_augmentation_masks_dir)

In [13]:
for augmentation_file in (set(splitted_all_files) - set(splitted_test_files)):
    shutil.copy(
        os.path.join(image_splitter_output_dir, 'images', augmentation_file),
        os.path.join(target_augmentation_images_dir, augmentation_file)
    )

    shutil.copy(
        os.path.join(image_splitter_output_dir, 'masks', augmentation_file),
        os.path.join(target_augmentation_masks_dir, augmentation_file)
    )