In [None]:
### use this command in the terminal to turn this notebook to a py file:
### jupyter nbconvert --to script 'separate_train_and_val2.ipynb'

In [None]:
import os
import numpy as np
import shutil
import math
import glob
import re
import argparse

In [None]:
VAL_DATASET_PERCENT = 5

In [None]:
input_dir = "./train_set/"
output_dir = "./train_val_set/"
labelbox_class_names = ["healthy_leaves", "unhealthy_leaf", "black_sigatoka"]

In [None]:
def get_val_files(class_name, dir, val_percent):
    class_files = list(map(
        (lambda x: os.path.basename(x)),
        glob.glob('{}/images/*_class_{}*'.format(dir, class_name))
    ))

    return set(np.random.choice(
        class_files,
        int(math.ceil(val_percent/ 100 * len(class_files)))
    ))

In [None]:
def normalise_class_name(class_name):
    class_name = re.sub('/', '-', class_name)
    class_name = re.sub(' ', '_', class_name)
    return class_name

In [None]:
train_dir = os.path.join(output_dir, 'stage1_train')
val_dir = os.path.join(output_dir, 'val')

In [None]:
# remove any dataset from previous run
if os.path.exists(train_dir):
    shutil.rmtree(train_dir)
if os.path.exists(val_dir):
    shutil.rmtree(val_dir)

In [None]:
os.makedirs(train_dir)
os.makedirs(val_dir)

In [None]:
all_files = os.listdir(os.path.join(input_dir, 'images'))

In [None]:
val_files = []
for class_name in labelbox_class_names:
    val_files.extend(get_val_files(
        normalise_class_name(class_name),
        input_dir,
        VAL_DATASET_PERCENT
    ))

In [None]:
for val_file in val_files:
    target_image_dir = os.path.join(val_dir, val_file, 'images')
    os.makedirs(target_image_dir)

    shutil.copy(
        os.path.join(input_dir, 'images', val_file),
        os.path.join(target_image_dir, val_file)
    )

    target_mask_dir = os.path.join(val_dir, val_file, 'masks')
    os.makedirs(target_mask_dir)

    shutil.copy(
        os.path.join(input_dir, 'masks', val_file),
        os.path.join(target_mask_dir, val_file)
    )

for train_file in (set(all_files) - set(val_files)):
    target_image_dir = os.path.join(train_dir, train_file, 'images')
    os.makedirs(target_image_dir)

    shutil.copy(
        os.path.join(input_dir, 'images', train_file),
        os.path.join(target_image_dir, train_file)
    )

    target_mask_dir = os.path.join(train_dir, train_file, 'masks')
    os.makedirs(target_mask_dir)

    shutil.copy(
        os.path.join(input_dir, 'masks', train_file),
        os.path.join(target_mask_dir, train_file)
    )