In [10]:
import os
import random
from PIL import Image
from torchvision import transforms

# Define paths
original_folder = './../dataset/raw_data/train'  # Path to original dataset
output_folder = './../augmented/augment'  # Path to save cropped images

# Create output directory if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Define the random crop and resize transform
transform = transforms.Compose([
    transforms.RandomCrop((224, 224)),  # Randomly crop to 224x224
    transforms.Resize((300, 400)),       # Resize back to 300x400
])

# Iterate through all class folders in the original folder
for class_folder in os.listdir(original_folder):
    class_folder_path = os.path.join(original_folder, class_folder)
    
    # Check if it is a directory (class folder)
    if os.path.isdir(class_folder_path):
        # Create corresponding output directory for the class
        output_class_folder = os.path.join(output_folder, class_folder)
        os.makedirs(output_class_folder, exist_ok=True)
        
        # Get all image files in the class folder
        image_files = [filename for filename in os.listdir(class_folder_path) if filename.endswith(('.jpg', '.png', '.jpeg'))]

        # Determine how many images to process (40-50% of the total)
        num_images_to_process = random.randint(int(0.4 * len(image_files)), int(0.5 * len(image_files)))

        # Randomly sample images to process
        sampled_images = random.sample(image_files, num_images_to_process)

        # Process each sampled image
        for filename in sampled_images:
            original_image_path = os.path.join(class_folder_path, filename)
            
            # Open the original image
            image = Image.open(original_image_path).convert('RGB')  # Ensure image is in RGB mode
            
            # Create one random cropped image
            cropped_image = transform(image)  # Apply random crop and resize
            cropped_image.save(os.path.join(output_class_folder, f'{filename[:-4]}_crop.png'))  # Save with a new name

print("Augmented images cropped and saved successfully.")


Augmented images cropped and saved successfully.
