# Cell 1: Configuration

In [None]:
import os # Import operating system module for file path handling
from PIL import Image, ImageOps # Import PIL for image manipulation and orientation fixes

# Path to the directory containing your raw image subfolders
SOURCE_DIR = r'D:\Machine Learning project\Natural Images Project\natural_images'

# Path where the newly resized and padded images will be stored
DEST_DIR = r'D:\Machine Learning project\Natural Images Project\processed_images'

# The final dimensions (width, height) for all processed images
TARGET_SIZE = (256, 256)

# RGB value for the padding area (0,0,0 is black)
PADDING_COLOR = (0, 0, 0)

# Cell 2: Image Processing Function

In [None]:
def resize_with_true_padding(img, target_size, padding_color):
    """
    Resizes an image to fit within the target size by adding padding.
    Maintains the original aspect ratio and fixes EXIF orientation.
    """
    
    # Correct the image orientation based on EXIF data (prevents upside-down images)
    img = ImageOps.exif_transpose(img)

    # Check if the image has an alpha (transparency) channel
    if img.mode == 'RGBA':
        # Create a solid background image using the padding color
        bg = Image.new('RGB', img.size, padding_color)
        # Paste the image onto the solid background using the alpha channel as a mask
        bg.paste(img, (0, 0), img.split()[-1])
        img = bg
    # If the image is not already in RGB (e.g., Grayscale), convert it
    elif img.mode != 'RGB':
        img = img.convert('RGB')
    
    # Scale the image down so its largest dimension fits within TARGET_SIZE
    img.thumbnail(target_size, Image.Resampling.LANCZOS)
    
    # Create a new blank canvas of the exact target size
    new_img = Image.new("RGB", target_size, padding_color)
    
    # Calculate horizontal offset to center the image
    paste_x = (target_size[0] - img.width) // 2
    # Calculate vertical offset to center the image
    paste_y = (target_size[1] - img.height) // 2
    # Paste the scaled image onto the center of the blank canvas
    new_img.paste(img, (paste_x, paste_y))
    
    return new_img # Return the final processed image object

# Cell 3: Main Processing Logic

In [None]:
def process_all_images():
    # Print status updates to the console
    print(f"Starting image processing...")
    print(f"Source: '{SOURCE_DIR}'")
    print(f"Destination: '{DEST_DIR}'")
    
    # Identify sub-folders representing different classes
    try:
        class_folders = [f for f in os.listdir(SOURCE_DIR) if os.path.isdir(os.path.join(SOURCE_DIR, f))]
        if not class_folders:
            print(f"Error: No sub-folders found in '{SOURCE_DIR}'.")
            return
    except FileNotFoundError:
        print(f"Error: Source directory '{SOURCE_DIR}' not found.")
        return

    # Iterate through each class folder (e.g., 'cat', 'dog', 'car')
    for folder_name in class_folders:
        src_folder_path = os.path.join(SOURCE_DIR, folder_name) # Full path to source class
        dest_folder_path = os.path.join(DEST_DIR, folder_name) # Full path to destination class

        # Create the destination directory if it doesn't already exist
        os.makedirs(dest_folder_path, exist_ok=True)
        print(f"Processing folder: '{folder_name}'...")

        image_count = 0 # Counter for processed images in this folder
        # Iterate through every file in the current folder
        for image_name in os.listdir(src_folder_path):
            src_image_path = os.path.join(src_folder_path, image_name) # Full path to input file
            dest_image_path = os.path.join(dest_folder_path, image_name) # Full path to output file
    
            # Filter files to ensure they are standard image types
            if image_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                try:
                    # Open the image file
                    with Image.open(src_image_path) as img:
                        # Process the image using the padding function defined in Cell 2
                        img_processed = resize_with_true_padding(img, TARGET_SIZE, PADDING_COLOR)
                        # Save the processed image to the destination path
                        img_processed.save(dest_image_path)
                        image_count += 1 # Increment successful process count
                except Exception as e:
                    # Catch and report any errors during individual image processing
                    print(f"  [!] Failed to process '{image_name}': {e}")
        
        print(f"  > Done. Processed and saved {image_count} images.")

    print("-" * 30)
    print("All folders processed successfully!")

# Cell 4: Execution

In [None]:
if __name__ == "__main__":
    # Reminder to clear old data to avoid mixing results
    print("Reminder: Make sure you delete the old 'processed_images' folder before running.")
    # Execute the processing pipeline
    process_all_images()