In [1]:
import os
import shutil
import random
from tqdm import tqdm

In [2]:
RAW_DIR = "../data/rraw/tomato"
PROCESSED_DIR = "../data/processed"

TRAIN_RATIO = 0.7
VAL_RATIO = 0.15
TEST_RATIO = 0.15

random.seed(42)

In [3]:
classes = os.listdir(RAW_DIR)
print("Found classes:", classes)

Found classes: ['Tomato___Bacterial_spot', 'Tomato___Early_blight', 'Tomato___healthy', 'Tomato___Late_blight', 'Tomato___Leaf_Mold', 'Tomato___Septoria_leaf_spot', 'Tomato___Spider_mites Two-spotted_spider_mite', 'Tomato___Target_Spot', 'Tomato___Tomato_mosaic_virus', 'Tomato___Tomato_Yellow_Leaf_Curl_Virus']


In [4]:
for cls in classes:
    cls_path = os.path.join(RAW_DIR, cls)
    images = os.listdir(cls_path)
    random.shuffle(images)

    total = len(images)
    train_end = int(TRAIN_RATIO * total)
    val_end = train_end + int(VAL_RATIO * total)

    splits = {
        "train": images[:train_end],
        "val": images[train_end:val_end],
        "test": images[val_end:]
    }

    for split, split_images in splits.items():
        split_cls_dir = os.path.join(PROCESSED_DIR, split, cls)
        os.makedirs(split_cls_dir, exist_ok=True)

        for img in tqdm(split_images, desc=f"{cls} → {split}"):
            src = os.path.join(cls_path, img)
            dst = os.path.join(split_cls_dir, img)
            shutil.copy(src, dst)


Tomato___Bacterial_spot → train: 100%|██████████| 1191/1191 [00:33<00:00, 35.74it/s]
Tomato___Bacterial_spot → val: 100%|██████████| 255/255 [00:06<00:00, 37.16it/s]
Tomato___Bacterial_spot → test: 100%|██████████| 256/256 [00:06<00:00, 37.18it/s]
Tomato___Early_blight → train: 100%|██████████| 560/560 [00:15<00:00, 36.67it/s]
Tomato___Early_blight → val: 100%|██████████| 120/120 [00:03<00:00, 34.17it/s]
Tomato___Early_blight → test: 100%|██████████| 120/120 [00:03<00:00, 37.88it/s]
Tomato___healthy → train: 100%|██████████| 891/891 [00:23<00:00, 37.35it/s]
Tomato___healthy → val: 100%|██████████| 190/190 [00:05<00:00, 37.37it/s]
Tomato___healthy → test: 100%|██████████| 192/192 [00:05<00:00, 37.71it/s]
Tomato___Late_blight → train: 100%|██████████| 1068/1068 [00:28<00:00, 37.68it/s]
Tomato___Late_blight → val: 100%|██████████| 229/229 [00:06<00:00, 35.59it/s]
Tomato___Late_blight → test: 100%|██████████| 230/230 [00:06<00:00, 35.45it/s]
Tomato___Leaf_Mold → train: 100%|██████████| 532