In [None]:
import os
import cv2
import shutil
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
dataset_dir = '/content/drive/MyDrive/Classwisefolder'

output_dir = '/content/drive/MyDrive/augmented_dataset'

#output directory
os.makedirs(output_dir, exist_ok=True)

#desired number of images per class
target_images_per_class = 1000

In [None]:
#data generator
data_generator = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)

In [None]:
# Iterate through the class folders
for class_name in os.listdir(dataset_dir):
    class_dir = os.path.join(dataset_dir, class_name)
    output_class_dir = os.path.join(output_dir, class_name)
    os.makedirs(output_class_dir, exist_ok=True)

    #list of image files in the class folder
    image_files = [f for f in os.listdir(class_dir) if f.endswith('.jpg')]

    #the number of images to augment or reduce for this class
    num_images = len(image_files)
    num_images_diff = target_images_per_class - num_images

    #Check if the number of images to be sampled exceeds the available population
    if num_images_diff > num_images:
        num_images_diff = num_images

    #Randomly sample images to augment or reduce
    if num_images_diff > 0:
        images_to_augment = random.sample(image_files, num_images_diff)
        images_to_keep = image_files
    else:
        images_to_augment = []
        images_to_keep = random.sample(image_files, target_images_per_class)

    #copy the images to the output directory
    for image_file in images_to_augment:
        image_path = os.path.join(class_dir, image_file)
        output_image_path = os.path.join(output_class_dir, image_file)
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  
        img = img.reshape((1,) + img.shape) 
        i = 0
        for batch in data_generator.flow(img, batch_size=1, save_to_dir=output_class_dir, save_prefix='augmented', save_format='jpg'):
            i += 1
            if i >= 5:
                break

    for image_file in images_to_keep:
        image_path = os.path.join(class_dir, image_file)
        output_image_path = os.path.join(output_class_dir, image_file)
        shutil.copyfile(image_path, output_image_path)
