In [9]:
import os
import numpy as np
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# --- Configuration ---

# Original dataset path
input_dir = r"C:\Users\VICTUS\Desktop\ladybird_dataset"

# Folder where augmented images will be saved
output_dir = "augmented_dataset_ladybird"

# Number of augmented images to generate per original image
num_augmented_images = 30  # <<-- THIS VALUE HAS BEEN CHANGED

# --- Correctly Instantiated Augmentation Pipeline ---

# The parameters are INSIDE the parentheses of ImageDataGenerator()
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.12,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

print(f"Starting image augmentation. Generating {num_augmented_images} versions per image...")

# Loop through each ladybird species folder
for folder in os.listdir(input_dir):
    class_path = os.path.join(input_dir, folder)
    if not os.path.isdir(class_path):
        continue  # Skip files that might be in the input_dir

    save_path = os.path.join(output_dir, folder)
    
    # Ensure the output directory for the class exists
    os.makedirs(save_path, exist_ok=True)
    
    print(f"Processing class: {folder}")

    # Loop through each image file in the class folder
    for img_file in os.listdir(class_path):
        img_path = os.path.join(class_path, img_file)
        
        try:
            # Open and prepare the image
            img = Image.open(img_path).convert('RGB')
            img = img.resize((300, 300))

            # Convert image to a NumPy array and add a batch dimension
            x = np.expand_dims(np.array(img), axis=0)

            # Generate augmented images
            i = 0
            for batch in datagen.flow(x, batch_size=1):
                # Convert the augmented numpy array back to a PIL Image
                aug_img_array = batch[0].astype('uint8')
                aug_img = Image.fromarray(aug_img_array)
                
                # Robust file naming
                base_filename = os.path.splitext(img_file)[0]
                
                # Save the augmented image
                aug_img.save(os.path.join(save_path, f"{base_filename}_aug_{i}.jpg"))
                
                i += 1
                if i >= num_augmented_images:
                    break  # Stop after generating 30 images
        
        except Exception as e:
            print(f"Could not process file {img_file} due to error: {e}")

print("\nAugmentation complete.")
print(f"Augmented dataset saved in '{output_dir}' directory.")

Starting image augmentation. Generating 30 versions per image...
Processing class: Anegleis_cardoni
Processing class: Cheilomenes sexmaculata
Processing class: Coceinella septempunctata
Processing class: Conceinella transversalis
Processing class: Henosepilachna spp
Processing class: Micraspis discolor
Processing class: Oenopia sexareata
Could not process file IMG20250624123731(1).zip due to error: cannot identify image file 'C:\\Users\\VICTUS\\Desktop\\ladybird_dataset\\Oenopia sexareata\\IMG20250624123731(1).zip'

Augmentation complete.
Augmented dataset saved in 'augmented_dataset_ladybird' directory.


In [4]:
! pip install tensorflow

