## COD10K

In [None]:
import os, random, shutil

random.seed(42)

root = "COD10K"
train_img = os.path.join(root, "Train_Image_CAM")
train_mask = os.path.join(root, "Train_instance_gt_CAM")

out = "COD10K_SPLIT"
for split in ["train", "eval", "test"]:
    os.makedirs(f"{out}/{split}/img", exist_ok=True)
    os.makedirs(f"{out}/{split}/mask", exist_ok=True)

imgs = sorted(os.listdir(train_img))
random.shuffle(imgs)

eval_ratio = 0.15
n_eval = int(len(imgs) * eval_ratio)

eval_imgs = imgs[:n_eval]
train_imgs = imgs[n_eval:]

def copy_set(img_list, split):
    for f in img_list:
        shutil.copy(
            f"{train_img}/{f}",
            f"{out}/{split}/img/{f}"
        )
        shutil.copy(
            f"{train_mask}/{f.replace('.jpg','.png')}",
            f"{out}/{split}/mask/{f.replace('.jpg','.png')}"
        )

copy_set(train_imgs, "train")
copy_set(eval_imgs, "eval")

# copy test nguyên bản
for f in os.listdir(f"{root}/Test_Image_CAM"):
    shutil.copy(
        f"{root}/Test_Image_CAM/{f}",
        f"{out}/test/img/{f}"
    )
    shutil.copy(
        f"{root}/Test_instance_gt_CAM/{f.replace('.jpg','.png')}",
        f"{out}/test/mask/{f.replace('.jpg','.png')}"
    )


## MSRA-B

In [1]:
import os, random, shutil

random.seed(42)

root = "MSRA-B"
out = "MSRA-B_SPLIT"

for s in ["train", "eval", "test"]:
    os.makedirs(f"{out}/{s}/img", exist_ok=True)
    os.makedirs(f"{out}/{s}/mask", exist_ok=True)

# lấy danh sách ảnh
images = sorted([f for f in os.listdir(root) if f.endswith(".jpg")])
random.shuffle(images)

n = len(images)
train_n = int(0.7 * n)
eval_n = int(0.15 * n)

train_imgs = images[:train_n]
eval_imgs  = images[train_n:train_n + eval_n]
test_imgs  = images[train_n + eval_n:]

def copy_split(img_list, split):
    for img in img_list:
        name = img.replace(".jpg", "")
        shutil.copy(f"{root}/{name}.jpg", f"{out}/{split}/img/{name}.jpg")
        shutil.copy(f"{root}/{name}.png", f"{out}/{split}/mask/{name}.png")

copy_split(train_imgs, "train")
copy_split(eval_imgs, "eval")
copy_split(test_imgs, "test")


## DUTS-TE

In [2]:
import os, random, shutil

random.seed(42)

# đường dẫn gốc
img_dirs = [
    "DUTS-TE/DUTS-TR/DUTS-TR-Image",
    "DUTS-TE/DUTS-TE/DUTS-TE-Image"
]

mask_dirs = [
    "DUTS-TE/DUTS-TR/DUTS-TR-Mask",
    "DUTS-TE/DUTS-TE/DUTS-TE-Mask"
]

out = "DUTS_SPLIT"
for s in ["train", "eval", "test"]:
    os.makedirs(f"{out}/{s}/img", exist_ok=True)
    os.makedirs(f"{out}/{s}/mask", exist_ok=True)

# gom danh sách basename
names = []
for img_dir in img_dirs:
    for f in os.listdir(img_dir):
        if f.endswith(".jpg"):
            names.append(f.replace(".jpg", ""))

random.shuffle(names)

n = len(names)
train_n = int(0.7 * n)
eval_n  = int(0.15 * n)

splits = {
    "train": names[:train_n],
    "eval":  names[train_n:train_n+eval_n],
    "test":  names[train_n+eval_n:]
}

def find_file(name, dirs, ext):
    for d in dirs:
        path = f"{d}/{name}{ext}"
        if os.path.exists(path):
            return path
    return None

for split, items in splits.items():
    for name in items:
        img_path  = find_file(name, img_dirs, ".jpg")
        mask_path = find_file(name, mask_dirs, ".png")

        shutil.copy(img_path,  f"{out}/{split}/img/{name}.jpg")
        shutil.copy(mask_path, f"{out}/{split}/mask/{name}.png")


## ESSCD

In [4]:
import os, random, shutil

random.seed(42)

img_dir = "ECSSD/images"
mask_dir = "ECSSD/ground_truth_mask"

out = "ECSSD_SPLIT"
for s in ["train", "eval", "test"]:
    os.makedirs(f"{out}/{s}/img", exist_ok=True)
    os.makedirs(f"{out}/{s}/mask", exist_ok=True)

names = [f.replace(".jpg", "") for f in os.listdir(img_dir) if f.endswith(".jpg")]
random.shuffle(names)

n = len(names)
train_n = int(0.7 * n)
eval_n  = int(0.15 * n)

splits = {
    "train": names[:train_n],
    "eval":  names[train_n:train_n+eval_n],
    "test":  names[train_n+eval_n:]
}

for split, items in splits.items():
    for name in items:
        shutil.copy(
            f"{img_dir}/{name}.jpg",
            f"{out}/{split}/img/{name}.jpg"
        )
        shutil.copy(
            f"{mask_dir}/{name}.png",
            f"{out}/{split}/mask/{name}.png"
        )


## Kvasir-SEG

In [6]:
import os
import shutil

ROOT = "Kvasir-SEG/Kvasir-SEG/Kvasir-SEG"
VAL_TXT = "Kvasir-SEG/val.txt"
OUT = "Kvasir_VAL"

IMG_SRC = os.path.join(ROOT, "images")
MASK_SRC = os.path.join(ROOT, "masks")

IMG_DST = os.path.join(OUT, "images")
MASK_DST = os.path.join(OUT, "masks")

os.makedirs(IMG_DST, exist_ok=True)
os.makedirs(MASK_DST, exist_ok=True)

with open(VAL_TXT, "r") as f:
    names = [line.strip() for line in f]

print(f"Found {len(names)} validation samples")

for name in names:
    shutil.copy(
        os.path.join(IMG_SRC, name + ".jpg"),
        os.path.join(IMG_DST, name + ".jpg")
    )
    shutil.copy(
        os.path.join(MASK_SRC, name + ".jpg"),
        os.path.join(MASK_DST, name + ".jpg")
    )


Found 120 validation samples
