In [5]:
import os
import random
import shutil
from PIL import Image
from tqdm import tqdm  # Import tqdm for progress bar

In [7]:
# Set the root directory of your dataset
root_dir = "RawDatasets"
train_path = "Datasets/train"
val_path = "Datasets/val"

# Set the percentage of images to use for each set
train_ratio = 0.8

val_ratio = 0.2


In [8]:
# loop through each class folder in the original dataset
for class_folder in os.listdir(root_dir):
    class_folder_path = os.path.join(root_dir, class_folder)
    
    # create corresponding class folders in the destination folders
    train_class_path = os.path.join(train_path, class_folder)
    os.makedirs(train_class_path, exist_ok=True)

    val_class_path = os.path.join(val_path, class_folder)
    os.makedirs(val_class_path, exist_ok=True)
    
    # shuffle the images and split them into training and validation sets
    images = os.listdir(class_folder_path)
    random.shuffle(images)
    train_images = images[:int(train_ratio * len(images))]
    val_images = images[int(train_ratio * len(images)):]

    # Resize and save the images to their respective destination folders
    print(f"Processing class: {class_folder}")
    
    # Progress bar for training images
    for image in tqdm(train_images, desc=f"Training images - {class_folder}", unit="image"):
        image_path = os.path.join(class_folder_path, image)
        img = Image.open(image_path)
        
        # Convert to RGB if the image is in RGBA mode (fix for JPEG saving issue)
        if img.mode == 'RGBA':
            img = img.convert('RGB')
        
        img = img.resize((224, 224))  # replace 224 with desired size
        img.save(os.path.join(train_class_path, image))

    # Progress bar for validation images
    for image in tqdm(val_images, desc=f"Validation images - {class_folder}", unit="image"):
        image_path = os.path.join(class_folder_path, image)
        img = Image.open(image_path)
        
        # Convert to RGB if the image is in RGBA mode (fix for JPEG saving issue)
        if img.mode == 'RGBA':
            img = img.convert('RGB')
        
        img = img.resize((224, 224))  # replace 224 with desired size
        img.save(os.path.join(val_class_path, image))

print("Preprocessing complete!")


Processing class: Apple__Healthy


Training images - Apple__Healthy: 100%|█████████████████████████████████████████| 1950/1950 [00:56<00:00, 34.58image/s]
Validation images - Apple__Healthy: 100%|█████████████████████████████████████████| 488/488 [00:13<00:00, 35.00image/s]


Processing class: Apple__Rotten


Training images - Apple__Rotten: 100%|██████████████████████████████████████████| 2344/2344 [00:59<00:00, 39.21image/s]
Validation images - Apple__Rotten: 100%|██████████████████████████████████████████| 586/586 [00:14<00:00, 40.08image/s]


Processing class: Grape__Healthy


Training images - Grape__Healthy: 100%|███████████████████████████████████████████| 160/160 [00:01<00:00, 85.40image/s]
Validation images - Grape__Healthy: 100%|███████████████████████████████████████████| 40/40 [00:00<00:00, 41.73image/s]


Processing class: Grape__Rotten


Training images - Grape__Rotten: 100%|████████████████████████████████████████████| 160/160 [00:02<00:00, 67.73image/s]
Validation images - Grape__Rotten: 100%|████████████████████████████████████████████| 40/40 [00:00<00:00, 70.16image/s]


Processing class: Mango__Healthy


Training images - Mango__Healthy: 100%|████████████████████████████████████████| 1450/1450 [00:08<00:00, 173.54image/s]
Validation images - Mango__Healthy: 100%|████████████████████████████████████████| 363/363 [00:02<00:00, 167.95image/s]


Processing class: Mango__Rotten


Training images - Mango__Rotten: 100%|█████████████████████████████████████████| 1797/1797 [00:07<00:00, 255.08image/s]
Validation images - Mango__Rotten: 100%|█████████████████████████████████████████| 450/450 [00:01<00:00, 249.93image/s]


Processing class: Orange__Healthy


Training images - Orange__Healthy: 100%|████████████████████████████████████████| 1660/1660 [00:59<00:00, 28.11image/s]
Validation images - Orange__Healthy: 100%|████████████████████████████████████████| 415/415 [00:15<00:00, 26.91image/s]


Processing class: Orange__Rotten


Training images - Orange__Rotten: 100%|█████████████████████████████████████████| 1748/1748 [00:44<00:00, 39.13image/s]
Validation images - Orange__Rotten: 100%|█████████████████████████████████████████| 438/438 [00:11<00:00, 39.66image/s]

Preprocessing complete!



