In [1]:
# import the necessary packages
from imutils import paths
import random
import shutil
import os
import cv2

In [2]:
# initialize the path to the *original* input directory of images
ORIG_INPUT_DATASET = "input/dataset_rgba_jpg"

# initialize the base path to the *new* directory that will contain
# our images after computing the training and testing split
BASE_PATH = "input/dataset_built_rgba_jpg"

# derive the training, validation, and testing directories
TRAIN_PATH = os.path.sep.join([BASE_PATH, "training"])
VAL_PATH = os.path.sep.join([BASE_PATH, "validation"])
TEST_PATH = os.path.sep.join([BASE_PATH, "testing"])

# define the amount of data that will be used training
TRAIN_SPLIT = 0.8

# the amount of validation data will be a percentage of the
# *training* data
VAL_SPLIT = 0.1

In [3]:
# grab the paths to all input images in the original input directory
# and shuffle them
imagePaths = list(paths.list_images(ORIG_INPUT_DATASET))
random.seed(42)
random.shuffle(imagePaths)

In [4]:
# compute the training and testing split
i = int(len(imagePaths) * TRAIN_SPLIT)
trainPaths = imagePaths[:i]
testPaths = imagePaths[i:]

In [5]:
# we'll be using part of the training data for validation
i = int(len(trainPaths) * VAL_SPLIT)
valPaths = trainPaths[:i]
trainPaths = trainPaths[i:]

In [6]:
# define the datasets that we'll be building
datasets = [
	("training", trainPaths, TRAIN_PATH),
	("validation", valPaths, VAL_PATH),
	("testing", testPaths, TEST_PATH)
]

In [7]:
# loop over the datasets
for (dType, imagePaths, baseOutput) in datasets:
    # show which data split we are creating
    print("[INFO] building '{}' split".format(dType))

    # if the output base output directory does not exist, create it
    if not os.path.exists(baseOutput):
        print("[INFO] 'creating {}' directory".format(baseOutput))
        os.makedirs(baseOutput)

    # loop over the input image paths
    for inputPath in imagePaths:
        # extract the filename of the input image along with its
        # corresponding class label
        filename = inputPath.split(os.path.sep)[-1]
        label = inputPath.split(os.path.sep)[-2]

        # build the path to the label directory
        labelPath = os.path.sep.join([baseOutput, label])

        # if the label output directory does not exist, create it
        if not os.path.exists(labelPath):
            print("[INFO] 'creating {}' directory".format(labelPath))
            os.makedirs(labelPath)

        # construct the path to the destination image and then copy
        # the image itself
        p = os.path.sep.join([labelPath, filename])
        shutil.copy2(inputPath, p)

[INFO] building 'training' split
[INFO] 'creating input/dataset_split_rgba_jpg/training' directory
[INFO] 'creating input/dataset_split_rgba_jpg/training/Field_1' directory
[INFO] 'creating input/dataset_split_rgba_jpg/training/Field_4' directory
[INFO] building 'validation' split
[INFO] 'creating input/dataset_split_rgba_jpg/validation' directory
[INFO] 'creating input/dataset_split_rgba_jpg/validation/Field_4' directory
[INFO] 'creating input/dataset_split_rgba_jpg/validation/Field_1' directory
[INFO] building 'testing' split
[INFO] 'creating input/dataset_split_rgba_jpg/testing' directory
[INFO] 'creating input/dataset_split_rgba_jpg/testing/Field_1' directory
[INFO] 'creating input/dataset_split_rgba_jpg/testing/Field_4' directory
