In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

BASE = "/Users/aditya/Desktop/Repositories/MinorProject-1/Dataset"

# The folder that contains ALL your images
ALL_DIR = os.path.join(BASE, "ALL")

# New split folders
TRAIN_NEW = os.path.join(BASE, "train_new")
VAL_NEW = os.path.join(BASE, "val_new")
TEST_NEW = os.path.join(BASE, "test_new")

In [None]:
total = 0

for cls in os.listdir(ALL_DIR):
    class_path = os.path.join(ALL_DIR, cls)
    if not os.path.isdir(class_path):
        continue

    count = len(os.listdir(class_path))
    print(f"{cls}: {count}")
    total += count

print("\nTotal images:", total)

Strawberry___healthy: 2280
Grape___Black_rot: 2360
Potato___Early_blight: 2424
Blueberry___healthy: 2270
Corn_(maize)___healthy: 2324
Tomato___Target_Spot: 2284
Peach___healthy: 2160
Potato___Late_blight: 2424
Tomato___Late_blight: 2314
Tomato___Tomato_mosaic_virus: 2238
Pepper,_bell___healthy: 2485
Orange___Haunglongbing_(Citrus_greening): 2513
Tomato___Leaf_Mold: 2352
Grape___Leaf_blight_(Isariopsis_Leaf_Spot): 2152
Cherry_(including_sour)___Powdery_mildew: 2104
Apple___Cedar_apple_rust: 2201
Tomato___Bacterial_spot: 2127
Grape___healthy: 2115
Tomato___Early_blight: 2400
Corn_(maize)___Common_rust_: 2384
Grape___Esca_(Black_Measles): 2400
Raspberry___healthy: 2226
Tomato___healthy: 2407
Cherry_(including_sour)___healthy: 2282
Tomato___Tomato_Yellow_Leaf_Curl_Virus: 2452
Apple___Apple_scab: 2521
Corn_(maize)___Northern_Leaf_Blight: 2386
Tomato___Spider_mites Two-spotted_spider_mite: 2176
Peach___Bacterial_spot: 2297
Pepper,_bell___Bacterial_spot: 2391
Tomato___Septoria_leaf_spot: 2181

In [4]:
# Delete existing split folders if any
for folder in [TRAIN_NEW, VAL_NEW, TEST_NEW]:
    if os.path.exists(folder):
        shutil.rmtree(folder)

os.makedirs(TRAIN_NEW, exist_ok=True)
os.makedirs(VAL_NEW, exist_ok=True)
os.makedirs(TEST_NEW, exist_ok=True)

print("Prepared new empty split folders.")


Prepared new empty split folders.


In [5]:
image_paths = []
labels = []

for class_name in os.listdir(ALL_DIR):
    class_folder = os.path.join(ALL_DIR, class_name)
    if not os.path.isdir(class_folder):
        continue

    for img in os.listdir(class_folder):
        image_paths.append(os.path.join(class_folder, img))
        labels.append(class_name)

print("Total images collected:", len(image_paths))


Total images collected: 87873


In [6]:
# Train = 70%
# Val = 20%
# Test = 10%

train_paths, temp_paths, train_labels, temp_labels = train_test_split(
    image_paths,
    labels,
    test_size=0.30,      # temp = 30% (val + test)
    stratify=labels,
    random_state=42
)

val_paths, test_paths, val_labels, test_labels = train_test_split(
    temp_paths,
    temp_labels,
    test_size=(10 / 30),  # test = 10% of total -> 1/3 of temp
    stratify=temp_labels,
    random_state=42
)

print("Train:", len(train_paths))
print("Val:", len(val_paths))
print("Test:", len(test_paths))


Train: 61511
Val: 17574
Test: 8788


In [7]:
def copy_split(paths, labels, target_dir):
    for src, label in zip(paths, labels):
        class_dir = os.path.join(target_dir, label)
        os.makedirs(class_dir, exist_ok=True)
        shutil.copy(src, class_dir)

copy_split(train_paths, train_labels, TRAIN_NEW)
copy_split(val_paths, val_labels, VAL_NEW)
copy_split(test_paths, test_labels, TEST_NEW)

print("All files copied successfully!")


All files copied successfully!


In [8]:
def count_images(folder):
    print("\nChecking:", folder)
    total = 0
    for cls in os.listdir(folder):
        cls_path = os.path.join(folder, cls)
        count = len(os.listdir(cls_path))
        print(f"{cls}: {count}")
        total += count
    print("Total:", total)

count_images(TRAIN_NEW)
count_images(VAL_NEW)
count_images(TEST_NEW)



Checking: /Users/aditya/Desktop/Repositories/MinorProject-1/Dataset/train_new
Strawberry___healthy: 1596
Grape___Black_rot: 1652
Potato___Early_blight: 1697
Blueberry___healthy: 1589
Corn_(maize)___healthy: 1627
Tomato___Target_Spot: 1599
Peach___healthy: 1512
Potato___Late_blight: 1697
Tomato___Late_blight: 1620
Tomato___Tomato_mosaic_virus: 1567
Pepper,_bell___healthy: 1739
Orange___Haunglongbing_(Citrus_greening): 1759
Tomato___Leaf_Mold: 1646
Grape___Leaf_blight_(Isariopsis_Leaf_Spot): 1506
Cherry_(including_sour)___Powdery_mildew: 1473
Apple___Cedar_apple_rust: 1541
Tomato___Bacterial_spot: 1489
Grape___healthy: 1480
Tomato___Early_blight: 1680
Corn_(maize)___Common_rust_: 1669
Grape___Esca_(Black_Measles): 1680
Raspberry___healthy: 1558
Tomato___healthy: 1685
Cherry_(including_sour)___healthy: 1597
Tomato___Tomato_Yellow_Leaf_Curl_Virus: 1716
Apple___Apple_scab: 1765
Corn_(maize)___Northern_Leaf_Blight: 1670
Tomato___Spider_mites Two-spotted_spider_mite: 1523
Peach___Bacterial_s