# Image Augmentation

# Working ( Simple using Python Libraries )

In [None]:
import os
import imageio
import numpy as np
import math
from PIL import Image, ImageEnhance, ImageOps, ImageFilter

# Path to the directory containing the images
base_directory = r"K:\Dataset_Save\Climate_Dataset\Breed_without_any_background\Breed_Dataset"
# Output directory to store augmented images
output_directory = r"K:\Dataset_Save\Climate_Dataset\Augmentation_Dataset_Breed"
os.makedirs(output_directory, exist_ok=True)

# Define target constants
TOTAL_DESIRED_IMAGES = 15000
IMAGES_PER_CLASS = 305  # Based on calculation: 305 augmentations per class

# Get a list of class (folder) names
class_names = [name for name in os.listdir(base_directory) if os.path.isdir(os.path.join(base_directory, name))]

# Count total existing images
total_existing_images = 0
images_per_class_dict = {}

for class_name in class_names:
    directory_path = os.path.join(base_directory, class_name)
    images = [file for file in os.listdir(directory_path) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
    num_images = len(images)
    images_per_class_dict[class_name] = num_images
    total_existing_images += num_images

print(f"Total existing images: {total_existing_images}")
print(f"Number of classes: {len(class_names)}")
print(f"Target total images: {TOTAL_DESIRED_IMAGES}")

# Define simple augmentation functions using PIL
def apply_augmentation(image, aug_number):
    """Apply different augmentations based on the augmentation number"""
    # Convert to PIL Image if it's a numpy array
    if isinstance(image, np.ndarray):
        image = Image.fromarray(np.uint8(image))
    
    # Apply different augmentations based on the augmentation number
    if aug_number % 10 == 0:
        # Rotate image
        angle = np.random.randint(-25, 25)
        return image.rotate(angle, expand=True)
    elif aug_number % 10 == 1:
        # Flip horizontally
        return ImageOps.mirror(image)
    elif aug_number % 10 == 2:
        # Adjust brightness
        enhancer = ImageEnhance.Brightness(image)
        factor = np.random.uniform(0.7, 1.3)
        return enhancer.enhance(factor)
    elif aug_number % 10 == 3:
        # Adjust contrast
        enhancer = ImageEnhance.Contrast(image)
        factor = np.random.uniform(0.7, 1.3)
        return enhancer.enhance(factor)
    elif aug_number % 10 == 4:
        # Crop and resize
        width, height = image.size
        left = np.random.randint(0, width//10)
        top = np.random.randint(0, height//10)
        right = width - np.random.randint(0, width//10)
        bottom = height - np.random.randint(0, height//10)
        cropped = image.crop((left, top, right, bottom))
        return cropped.resize((width, height))
    elif aug_number % 10 == 5:
        # Add blur
        return image.filter(ImageFilter.GaussianBlur(radius=np.random.uniform(0, 1.0)))
    elif aug_number % 10 == 6:
        # Adjust color
        enhancer = ImageEnhance.Color(image)
        factor = np.random.uniform(0.7, 1.3)
        return enhancer.enhance(factor)
    elif aug_number % 10 == 7:
        # Adjust sharpness
        enhancer = ImageEnhance.Sharpness(image)
        factor = np.random.uniform(0.7, 1.3)
        return enhancer.enhance(factor)
    elif aug_number % 10 == 8:
        # Rotate and flip
        angle = np.random.randint(-25, 25)
        rotated = image.rotate(angle, expand=True)
        return ImageOps.mirror(rotated)
    else:
        # Combination of contrast and brightness
        enhancer1 = ImageEnhance.Contrast(image)
        factor1 = np.random.uniform(0.8, 1.2)
        enhanced1 = enhancer1.enhance(factor1)
        enhancer2 = ImageEnhance.Brightness(enhanced1)
        factor2 = np.random.uniform(0.8, 1.2)
        return enhancer2.enhance(factor2)

# Iterate over each class (folder)
for class_name in class_names:
    # Construct the full path to the directory containing images for this class
    directory_path = os.path.join(base_directory, class_name)
    
    # Get a list of files (images) in the class directory
    image_files = [file for file in os.listdir(directory_path) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
    
    # Create a directory to store augmented images for this class
    class_output_directory = os.path.join(output_directory, class_name)
    os.makedirs(class_output_directory, exist_ok=True)
    
    # Number of images to generate for this class
    num_to_generate = IMAGES_PER_CLASS
    
    # If we have fewer images than augmentations needed per image
    imgs_count = len(image_files)
    augmentations_per_image = math.ceil(num_to_generate / imgs_count)
    
    print(f"Processing class: {class_name}")
    print(f"  Original images: {imgs_count}")
    print(f"  Augmentations to generate: {num_to_generate}")
    print(f"  Augmentations per image: {augmentations_per_image}")
    
    # Counter to track total augmentations for this class
    augmentation_counter = 0
    
    # Iterate over each image file in the class directory
    for filename in image_files:
        # Break if we've generated enough augmentations
        if augmentation_counter >= num_to_generate:
            break
            
        # Construct the full path to the image file
        image_path = os.path.join(directory_path, filename)

        try:
            # Load the image using PIL
            original_image = Image.open(image_path)
            
            # Perform and save each augmentation individually
            for i in range(augmentations_per_image):
                # Break if we've generated enough augmentations
                if augmentation_counter >= num_to_generate:
                    break
                    
                try:
                    # Apply augmentation
                    augmented_image = apply_augmentation(original_image.copy(), i)
                    
                    # Create output filename
                    output_filename = os.path.splitext(filename)[0] + f"_aug{i}" + os.path.splitext(filename)[1]
                    output_path = os.path.join(class_output_directory, output_filename)
                    
                    # Save augmented image
                    augmented_image.save(output_path)
                    
                    augmentation_counter += 1
                    
                except Exception as e:
                    print(f"Error applying augmentation {i} to {image_path}: {str(e)}")
                    continue
                
        except Exception as e:
            print(f"Error loading {image_path}: {str(e)}")
            continue

# Print summary statistics
total_augmented_images = 0
for class_name in class_names:
    # Count original images
    original_path = os.path.join(base_directory, class_name)
    original_count = len([file for file in os.listdir(original_path) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))])
    
    # Count augmented images
    augmented_path = os.path.join(output_directory, class_name)
    augmented_count = len([file for file in os.listdir(augmented_path) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))])
    
    total_augmented_images += augmented_count
    
    print(f"Class: {class_name}")
    print(f"  Original images: {original_count}")
    print(f"  Augmented images: {augmented_count}")
    print()

print(f"Augmentation completed!")
print(f"Total original images: {total_existing_images}")
print(f"Total augmented images: {total_augmented_images}")
print(f"Total dataset size: {total_existing_images + total_augmented_images}")

# Imgaug Simple Methods 

In [None]:
import os
import imageio
import numpy as np
import math
import imageio.v2 as imageio

# Fix for collections.Iterable issue in newer Python versions
# This patch must be BEFORE importing imgaug
import collections.abc
import collections
collections.Iterable = collections.abc.Iterable

# Now import imgaug
import imgaug as ia
from imgaug import augmenters as iaa

# Path to the directory containing the images
base_directory = r"K:\Dataset_Save\Climate_Dataset\Breed_without_any_background\Breed_Dataset"
# Output directory to store augmented images
output_directory = r"K:\Dataset_Save\Climate_Dataset\Augmentation_Dataset_Breed"
os.makedirs(output_directory, exist_ok=True)

# Set a seed for reproducibility
ia.seed(1)

# Create a sequential augmentation pipeline
seq = iaa.Sequential([
    iaa.Fliplr(0.5),  # horizontal flips with 50% probability
    iaa.Crop(percent=(0, 0.1)),  # random crops
    iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 0.5))),  # blur 50% of images
    iaa.LinearContrast((0.75, 1.5)),  # strengthen or weaken contrast
    iaa.Sometimes(0.5, iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5)),  # add noise
    iaa.Multiply((0.8, 1.2), per_channel=0.2),  # brightness changes
    iaa.Affine(
        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
        translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
        rotate=(-25, 25),
        shear=(-8, 8)
    )
], random_order=True)  # apply augmenters in random order

# Define target constants
TOTAL_DESIRED_IMAGES = 15000
IMAGES_PER_CLASS = 305  # Based on calculation: 305 augmentations per class

# Get a list of class (folder) names
class_names = [name for name in os.listdir(base_directory) if os.path.isdir(os.path.join(base_directory, name))]

# Count total existing images
total_existing_images = 0
images_per_class_dict = {}

for class_name in class_names:
    directory_path = os.path.join(base_directory, class_name)
    images = [file for file in os.listdir(directory_path) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
    num_images = len(images)
    images_per_class_dict[class_name] = num_images
    total_existing_images += num_images

print(f"Total existing images: {total_existing_images}")
print(f"Number of classes: {len(class_names)}")
print(f"Target total images: {TOTAL_DESIRED_IMAGES}")

# Iterate over each class (folder)
for class_name in class_names:
    print(f"\nProcessing class: {class_name}")
    
    # Construct the full path to the directory containing images for this class
    directory_path = os.path.join(base_directory, class_name)
    
    # Get a list of files (images) in the class directory
    image_files = [file for file in os.listdir(directory_path) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
    
    # Create a directory to store augmented images for this class
    class_output_directory = os.path.join(output_directory, class_name)
    os.makedirs(class_output_directory, exist_ok=True)
    
    # Number of images to generate for this class
    num_to_generate = IMAGES_PER_CLASS
    
    # Calculate how many augmentations to create per original image
    num_original_images = len(image_files)
    augmentations_per_image = math.ceil(num_to_generate / num_original_images)
    
    print(f"  Original images: {num_original_images}")
    print(f"  Augmentations to generate: {num_to_generate}")
    print(f"  Augmentations per image: {augmentations_per_image}")
    
    # Counter to track total augmentations for this class
    augmentation_counter = 0
    
    # Iterate over each image file in the class directory
    for filename in image_files:
        # Break if we've generated enough augmentations
        if augmentation_counter >= num_to_generate:
            break
            
        # Construct the full path to the image file
        image_path = os.path.join(directory_path, filename)

        try:
            # Load the image
            image = imageio.imread(image_path)
            
            # Make sure the image has 3 channels (RGB)
            if len(image.shape) == 2:  # If grayscale
                image = np.stack([image, image, image], axis=2)
            
            # Ensure the image is uint8
            if image.dtype != np.uint8:
                if np.max(image) <= 1.0:
                    image = (image * 255).astype(np.uint8)
                else:
                    image = image.astype(np.uint8)
            
            # Create multiple augmentations for this image
            for i in range(augmentations_per_image):
                # Break if we've generated enough augmentations
                if augmentation_counter >= num_to_generate:
                    break
                
                try:
                    # Apply the augmentation sequence
                    augmented_image = seq(image=image)
                    
                    # Create output filename
                    output_filename = os.path.splitext(filename)[0] + f"_aug{i}" + os.path.splitext(filename)[1]
                    output_path = os.path.join(class_output_directory, output_filename)
                    
                    # Save augmented image
                    imageio.imwrite(output_path, augmented_image)
                    
                    augmentation_counter += 1
                    
                    # Print progress every 50 images
                    if augmentation_counter % 50 == 0:
                        print(f"  Progress: {augmentation_counter}/{num_to_generate} augmentations generated")
                        
                except Exception as e:
                    print(f"  Error applying augmentation to {image_path}: {str(e)}")
                    continue
                
        except Exception as e:
            print(f"  Error loading {image_path}: {str(e)}")
            continue
    
    print(f"  Completed: {augmentation_counter} augmentations generated for class {class_name}")

# Print summary statistics
print("\n=== SUMMARY ===")
total_augmented_images = 0

for class_name in class_names:
    # Count original images
    original_path = os.path.join(base_directory, class_name)
    original_count = len([file for file in os.listdir(original_path) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))])
    
    # Count augmented images
    augmented_path = os.path.join(output_directory, class_name)
    if os.path.exists(augmented_path):
        augmented_count = len([file for file in os.listdir(augmented_path) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))])
    else:
        augmented_count = 0
    
    total_augmented_images += augmented_count
    
    print(f"Class: {class_name}")
    print(f"  Original images: {original_count}")
    print(f"  Augmented images: {augmented_count}")
    print()

print(f"Augmentation completed!")
print(f"Total original images: {total_existing_images}")
print(f"Total augmented images: {total_augmented_images}")
print(f"Total dataset size: {total_existing_images + total_augmented_images}")

# Image aug Advance Augmentation & Keep Original Images

In [2]:
# # Patch imgaug directly to avoid np.bool issues
# import sys
# sys.modules['numpy'].bool = bool  # Monkey patch numpy.bool to use Python's bool

In [None]:
import os
import imageio.v2 as imageio
import numpy as np
import math
import warnings
import shutil  # For copying original files

# Suppress specific warnings
warnings.filterwarnings('ignore', category=UserWarning)

# Fix for collections.Iterable issue in newer Python versions
import collections.abc
import collections
collections.Iterable = collections.abc.Iterable

# Now try to patch numpy at system level
import sys
if 'numpy' in sys.modules:
    if hasattr(sys.modules['numpy'], 'bool'):
        sys.modules['numpy'].bool = bool  

# Now import imgaug
import imgaug as ia
from imgaug import augmenters as iaa

# Path to the directory containing the images
base_directory = r"K:\Dataset_Save\Climate_Dataset\Breed_without_any_background\Breed_Dataset"
# Output directory to store augmented images
output_directory = r"K:\Dataset_Save\Climate_Dataset\Augmentation_Dataset_Breed"
os.makedirs(output_directory, exist_ok=True)

# Set a seed for reproducibility
ia.seed(1)

# Define lambda function for Sometimes
sometimes = lambda aug: iaa.Sometimes(0.5, aug)

# Create a sequential augmentation pipeline with transparency preservation
seq = iaa.Sequential([
    # Basic augmentations
    iaa.Fliplr(0.5),  # horizontal flips
    iaa.Flipud(0.2),  # vertical flips
    iaa.Crop(percent=(0, 0.1)),  # random crops
    
    # Adjustments that work with transparency
    iaa.GaussianBlur(sigma=(0, 0.5)),
    iaa.LinearContrast((0.75, 1.5)),
    iaa.Multiply((0.8, 1.2)),
    
    # Careful with affine transformations - use mode constant with transparency
    iaa.Affine(
        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
        translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
        rotate=(-25, 25),
        shear=(-8, 8),
        mode="constant",  # This is important for transparency
        cval=0  # This will be applied only to non-alpha channels
    ),
    
    # Simple distortions
    sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.03)))
], random_order=True)

# Define target constants
TOTAL_DESIRED_IMAGES = 15000
IMAGES_PER_CLASS = 305  # Based on calculation: 305 augmentations per class

# Get a list of class (folder) names
class_names = [name for name in os.listdir(base_directory) if os.path.isdir(os.path.join(base_directory, name))]

# Count total existing images
total_existing_images = 0
for class_name in class_names:
    directory_path = os.path.join(base_directory, class_name)
    images = [file for file in os.listdir(directory_path) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
    total_existing_images += len(images)

print(f"Total existing images: {total_existing_images}")
print(f"Number of classes: {len(class_names)}")
print(f"Target total images: {TOTAL_DESIRED_IMAGES}")

# Iterate over each class (folder)
for class_name in class_names:
    print(f"\nProcessing class: {class_name}")
    
    # Construct the full path to the directory containing images for this class
    directory_path = os.path.join(base_directory, class_name)
    
    # Get a list of files (images) in the class directory
    image_files = [file for file in os.listdir(directory_path) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
    
    # Create a directory to store augmented images for this class
    class_output_directory = os.path.join(output_directory, class_name)
    os.makedirs(class_output_directory, exist_ok=True)
    
    # First, copy all original images to the output directory
    print(f"  Copying original images...")
    original_count = 0
    for filename in image_files:
        source_path = os.path.join(directory_path, filename)
        
        # If the file is not a PNG, convert it to PNG to preserve transparency
        if not filename.lower().endswith('.png'):
            try:
                # Load the image
                image = imageio.imread(source_path)
                
                # Handle grayscale images
                if len(image.shape) == 2:
                    # Convert grayscale to RGBA
                    alpha = np.ones_like(image) * 255  # Full opacity
                    image = np.stack([image, image, image, alpha], axis=2)
                elif len(image.shape) == 3 and image.shape[2] == 3:
                    # Convert RGB to RGBA
                    alpha = np.ones(image.shape[:2], dtype=np.uint8) * 255  # Full opacity
                    image = np.dstack((image, alpha))
                
                # Save as PNG
                dest_filename = os.path.splitext(filename)[0] + ".png"
                dest_path = os.path.join(class_output_directory, dest_filename)
                imageio.imwrite(dest_path, image, format='PNG')
            except Exception as e:
                print(f"  Error converting {source_path} to PNG: {str(e)}")
                # If conversion fails, just copy the original
                dest_path = os.path.join(class_output_directory, filename)
                shutil.copy2(source_path, dest_path)
        else:
            # If already PNG, just copy
            dest_path = os.path.join(class_output_directory, filename)
            shutil.copy2(source_path, dest_path)
        
        original_count += 1
    
    print(f"  Copied {original_count} original images")
    
    # Number of additional augmentations to generate
    num_to_generate = IMAGES_PER_CLASS - original_count
    
    # Only generate augmentations if we need more images
    if num_to_generate <= 0:
        print(f"  No need for augmentations, already have {original_count} images")
        continue
    
    # Calculate how many augmentations to create per original image
    num_original_images = len(image_files)
    augmentations_per_image = math.ceil(num_to_generate / num_original_images)
    
    print(f"  Original images: {num_original_images}")
    print(f"  Additional augmentations to generate: {num_to_generate}")
    print(f"  Augmentations per image: {augmentations_per_image}")
    
    # Counter to track total augmentations for this class
    augmentation_counter = 0
    
    # Iterate over each image file in the class directory
    for filename in image_files:
        # Break if we've generated enough augmentations
        if augmentation_counter >= num_to_generate:
            break
            
        # Construct the full path to the image file
        image_path = os.path.join(directory_path, filename)

        try:
            # Load the image with transparency preserved
            image = imageio.imread(image_path)
            
            # Handle grayscale images
            if len(image.shape) == 2:
                # Convert grayscale to RGBA
                alpha = np.ones_like(image) * 255  # Full opacity
                image = np.stack([image, image, image, alpha], axis=2)
            elif len(image.shape) == 3 and image.shape[2] == 3:
                # Convert RGB to RGBA
                alpha = np.ones(image.shape[:2], dtype=np.uint8) * 255  # Full opacity
                image = np.dstack((image, alpha))
            
            # Ensure the image is uint8
            if image.dtype != np.uint8:
                if np.max(image) <= 1.0:
                    image = (image * 255).astype(np.uint8)
                else:
                    image = image.astype(np.uint8)
            
            # Create multiple augmentations for this image
            for i in range(augmentations_per_image):
                # Break if we've generated enough augmentations
                if augmentation_counter >= num_to_generate:
                    break
                
                try:
                    # Apply the augmentation sequence
                    augmented_image = seq(image=image)
                    
                    # Create output filename - always use PNG to preserve transparency
                    output_filename = os.path.splitext(filename)[0] + f"_aug{i}.png"
                    output_path = os.path.join(class_output_directory, output_filename)
                    
                    # Save augmented image as PNG with transparency
                    imageio.imwrite(output_path, augmented_image, format='PNG')
                    
                    augmentation_counter += 1
                    
                    # Print progress every 50 images
                    if augmentation_counter % 50 == 0:
                        print(f"  Progress: {augmentation_counter}/{num_to_generate} augmentations generated")
                        
                except Exception as e:
                    print(f"  Error with augmentation {i} for {filename}: {str(e)}")
                    continue
                
        except Exception as e:
            print(f"  Error loading {image_path}: {str(e)}")
            continue
    
    print(f"  Completed: {augmentation_counter} augmentations generated for class {class_name}")
    print(f"  Total images in class: {original_count + augmentation_counter}")

# Print summary statistics
print("\n=== SUMMARY ===")
total_output_images = 0

for class_name in class_names:
    # Count images in output directory
    output_path = os.path.join(output_directory, class_name)
    if os.path.exists(output_path):
        output_count = len([file for file in os.listdir(output_path) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))])
    else:
        output_count = 0
    
    total_output_images += output_count
    
    # Count original images
    original_path = os.path.join(base_directory, class_name)
    original_count = len([file for file in os.listdir(original_path) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))])
    
    print(f"Class: {class_name}")
    print(f"  Original images: {original_count}")
    print(f"  Total in output directory: {output_count}")
    print()

print(f"Augmentation completed!")
print(f"Total original images: {total_existing_images}")
print(f"Total images in output directory: {total_output_images}")

### Enter the path to store after splitting

In [9]:
# # Define input folder path containing class folders with images
# # The output_directory is the directory made after augmentation.
# input_folder = output_directory

# # Define output folder path for the split dataset
# output_folder = 'D:\Augmented_Cattle_Breeds_Update_split'

In [10]:
# import os
# import shutil
# from sklearn.model_selection import train_test_split


# # Create output folder if it doesn't exist
# os.makedirs(output_folder, exist_ok=True)

# # Get list of class folders
# class_folders = [folder for folder in os.listdir(input_folder) if os.path.isdir(os.path.join(input_folder, folder))]

# # Define train, test, and validation ratios
# train_ratio = 0.7
# test_ratio = 0.15
# val_ratio = 0.15

# # Iterate over each class folder
# for class_folder in class_folders:
#     # Construct the full path to the class folder
#     class_folder_path = os.path.join(input_folder, class_folder)
    
#     # Get list of image files in the class folder
#     image_files = [f for f in os.listdir(class_folder_path) if os.path.isfile(os.path.join(class_folder_path, f))]
    
#     # Skip splitting if there is only one sample
#     if len(image_files) <= 1:
#         print(f"Skipping splitting for class {class_folder} as it contains only one sample.")
#         continue
    
#     # Split image files into train, test, and validation sets
#     train_files, test_val_files = train_test_split(image_files, test_size=(test_ratio + val_ratio), random_state=1234)
#     test_files, val_files = train_test_split(test_val_files, test_size=(val_ratio / (test_ratio + val_ratio)), random_state=1234)
    
#     # Create corresponding folders in the output directory for train, test, and validation
#     train_class_folder = os.path.join(output_folder, 'train', class_folder)
#     test_class_folder = os.path.join(output_folder, 'test', class_folder)
#     val_class_folder = os.path.join(output_folder, 'val', class_folder)
    
#     os.makedirs(train_class_folder, exist_ok=True)
#     os.makedirs(test_class_folder, exist_ok=True)
#     os.makedirs(val_class_folder, exist_ok=True)
    
#     # Function to copy images from source to destination folder
#     def copy_images(source_files, destination_folder):
#         for file in source_files:
#             src = os.path.join(class_folder_path, file)
#             dst = os.path.join(destination_folder, file)
#             shutil.copyfile(src, dst)
    
#     # Copy images to train, test, and validation folders
#     copy_images(train_files, train_class_folder)
#     copy_images(test_files, test_class_folder)
#     copy_images(val_files, val_class_folder)

# print("Splitting completed successfully!")

Splitting completed successfully!
