In [1]:
import glob
import os
import random
import shutil

In [2]:
basepath = "D:/users/holgerv/Ditches"

In [3]:
# Pretraining data directory
pretraining_dir = f"{basepath}/working/deep_learning/data/pretraining"

In [4]:
# Input HPMF files
fp_hpmf_list = sorted(glob.glob(f"{basepath}/working/deep_learning/data/OnlyDitches/*/hpmf/*.tif"))
hpmf_files = [os.path.basename(fp_hpmf) for fp_hpmf in fp_hpmf_list]

# Input labels
fp_labels_list = sorted(glob.glob(f"{basepath}/working/deep_learning/data/OnlyDitches/*/labels/*.tif"))
labels_files = [os.path.basename(fp_labels) for fp_labels in fp_labels_list]

# Loop over labels and collect matching image pairs
image_pairs = {}
for i in range(len(labels_files)):
    labels_file = labels_files[i]
    if labels_file in hpmf_files:
        fp_hpmf = glob.glob(f"{basepath}/working/deep_learning/data/OnlyDitches/*/hpmf/{labels_file}")[0]
        fp_labels = glob.glob(f"{basepath}/working/deep_learning/data/OnlyDitches/*/labels/{labels_file}")[0]
        image_pairs[fp_hpmf] = fp_labels

In [5]:
pct_train = 0.8
pct_test = 1 - pct_train
n_samples = len(image_pairs)
n_train = round(n_samples * pct_train)
n_test = n_samples - n_train

In [6]:
n_train, n_test

(1088, 272)

In [7]:
# Split HPMF files into training and test samples
random.seed(0)
fp_hpmf_list_train = random.sample(list(image_pairs.keys()), n_train)
fp_hpmf_list_test = [file for file in list(image_pairs.keys()) if file not in fp_hpmf_list_train]

In [8]:
len(fp_hpmf_list_train), len(fp_hpmf_list_test)

(1088, 272)

# Generate training data directory

In [9]:
# Output directories
out_dir = f"{pretraining_dir}/training"
if os.path.exists(out_dir):
    shutil.rmtree(out_dir)
os.mkdir(out_dir)
out_dir_hpmf = f"{out_dir}/hpmf"
os.mkdir(out_dir_hpmf)
out_dir_labels = f"{out_dir}/labels"
os.mkdir(out_dir_labels)

In [10]:
%%time

# Copy image pairs to corresponding directories
for fp_hpmf in fp_hpmf_list_train:
    fp_labels = image_pairs[fp_hpmf]
    shutil.copy(fp_hpmf, out_dir_hpmf)
    shutil.copy(fp_labels, out_dir_labels)

CPU times: total: 2.12 s
Wall time: 10.7 s


In [11]:
len(os.listdir(out_dir_hpmf)) == n_train

True

In [12]:
len(os.listdir(out_dir_labels)) == n_train

True

# Generate test data directory

In [13]:
# Output directories
out_dir = f"{pretraining_dir}/testing"
if os.path.exists(out_dir):
    shutil.rmtree(out_dir)
os.mkdir(out_dir)
out_dir_hpmf = f"{out_dir}/hpmf"
os.mkdir(out_dir_hpmf)
out_dir_labels = f"{out_dir}/labels"
os.mkdir(out_dir_labels)

In [14]:
%%time

# Copy image pairs to corresponding directories
for fp_hpmf in fp_hpmf_list_test:
    fp_labels = image_pairs[fp_hpmf]
    shutil.copy(fp_hpmf, out_dir_hpmf)
    shutil.copy(fp_labels, out_dir_labels)

CPU times: total: 547 ms
Wall time: 3.55 s


In [15]:
len(os.listdir(out_dir_hpmf)) == n_test

True

In [16]:
len(os.listdir(out_dir_labels)) == n_test

True