In [None]:
import os
import shutil
import random
import pandas as pd
from pathlib import Path

# Set random seed for reproducibility
random.seed(42)

# Define actual source folders
folders = {
    "365": r"F:\PIL LAB\New_Leaves_dataset\Leaf Dataset\365NoUV",
    "395": r"F:\PIL LAB\New_Leaves_dataset\Leaf Dataset\395NoUV",
    "white": r"F:\PIL LAB\New_Leaves_dataset\Leaf Dataset\WhiteNoUV"
}

# Path to label CSV
label_csv_path = r"F:\PIL LAB\Resnet18\Leaves_dataset_labels - Sheet1.csv"

# Output directory
output_dir = Path(r"F:\PIL LAB\Resnet18\custom_dataset")
output_dir.mkdir(parents=True, exist_ok=True)

# Create split & class subfolders: train/0, train/1, etc.
for split in ['train', 'val', 'test']:
    for cls in ['0', '1']:
        (output_dir / split / cls).mkdir(parents=True, exist_ok=True)

# Load labels
label_df = pd.read_csv(label_csv_path)
# Convert image names like 0 -> "0.jpg"
label_df['Img_Name'] = label_df['Img_Name'].apply(lambda x: f"{x}.jpg")
# Build dictionary: {'0.jpg': 1, '1.jpg': 0, ...}
label_dict = dict(zip(label_df['Img_Name'], label_df['label']))

# Get list of image names (same across all folders)
common_images = [f for f in os.listdir(folders['365']) if f.lower().endswith(('.jpg','.jpeg','.png'))]
common_images = list(set(common_images) & set(label_dict.keys()))  # Only labeled images

# Shuffle and split keys
random.shuffle(common_images)
total = len(common_images)
train_end = int(0.7 * total)
val_end = int(0.85 * total)
train_imgs = common_images[:train_end]
val_imgs = common_images[train_end:val_end]
test_imgs = common_images[val_end:]

# Copy images into split/class subfolders
for split_name, img_list in zip(['train','val','test'], [train_imgs, val_imgs, test_imgs]):
    for base_img in img_list:
        label = str(label_dict.get(base_img))  # '0' or '1'
        for prefix, folder in folders.items():
            src = Path(folder) / base_img
            if src.exists():
                new_name = f"{prefix}_{base_img}"
                dst = output_dir / split_name / label / new_name
                shutil.copy(src, dst)
            else:
                print(f"⚠️ Missing: {src}")

print("✅ Dataset organized into class subfolders under train/val/test.")


In [None]:
from pathlib import Path

# Assuming output_dir is a pathlib.Path
output_dir = Path(r"F:\PIL LAB\Resnet18\custom_dataset")

for split in ['train', 'val', 'test']:
    split_dir = output_dir / split
    total_count = 0

    print(f"\n📂 {split.upper()}:")
    for cls in ['0', '1']:
        cls_dir = split_dir / cls
        # only count image files
        cnt = len([f for f in cls_dir.iterdir() if f.suffix.lower() in ('.jpg', '.jpeg', '.png')])
        total_count += cnt
        print(f"   └─ Class {cls}: {cnt} images")

    print(f"   → Total: {total_count} images")
