In [1]:
import os
import shutil
import random
from pathlib import Path

In [2]:
DATA_ROOT = Path("data")   # change to your data folder
IMG_DIR = DATA_ROOT / "images"   # contains .tif
LBL_DIR = DATA_ROOT / "labels"   # contains .png

TRAIN_IMG = DATA_ROOT / "train/images"
TRAIN_LBL = DATA_ROOT / "train/labels"
VAL_IMG = DATA_ROOT / "val/images"
VAL_LBL = DATA_ROOT / "val/labels"

In [3]:
for d in [TRAIN_IMG, TRAIN_LBL, VAL_IMG, VAL_LBL]:
    d.mkdir(parents=True, exist_ok=True)

In [4]:
image_files = sorted(list(IMG_DIR.glob("*.tif")))
label_files = sorted(list(LBL_DIR.glob("*.png")))

In [6]:
img_dict = {f.stem: f for f in image_files}
lbl_dict = {f.stem: f for f in label_files}

common_keys = sorted(list(set(img_dict.keys()) & set(lbl_dict.keys())))
print(f"Found {len(common_keys)} matched pairs")

Found 306 matched pairs


In [7]:
random.seed(42)  # reproducibility
random.shuffle(common_keys)

split_ratio = 0.8
split_idx = int(len(common_keys) * split_ratio)

train_keys = common_keys[:split_idx]
val_keys = common_keys[split_idx:]

def copy_pairs(keys, dest_img, dest_lbl):
    for k in keys:
        shutil.copy(img_dict[k], dest_img / f"{k}.tif")
        shutil.copy(lbl_dict[k], dest_lbl / f"{k}.png")

In [8]:
copy_pairs(train_keys, TRAIN_IMG, TRAIN_LBL)
copy_pairs(val_keys, VAL_IMG, VAL_LBL)

print(f"Training set: {len(train_keys)} images")
print(f"Validation set: {len(val_keys)} images")

Training set: 244 images
Validation set: 62 images
