## **Data Augmentation Of Minority Class**

In [None]:
from google.colab import drive
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image
from shutil import copyfile

In [None]:
# 1. Mount Google Drive
drive.mount('/content/drive')

In [None]:
trainset_dir = '/content/drive/MyDrive/ML Project/Project 1 Data/Train_Data'
# Get the maximum number of images in any class
class_counts = {}
for class_folder in os.listdir(trainset_dir):
    class_path = os.path.join(trainset_dir, class_folder)
    if os.path.isdir(class_path):
        class_counts[class_folder] = len(os.listdir(class_path))

max_images = max(class_counts.values())
print(f"Maximum number of images in any class: {max_images}")

In [None]:
# 2. Define the directory to save augmented images
augmented_dir = '/content/drive/MyDrive/ML Project/Augmentation3'
os.makedirs(augmented_dir, exist_ok=True)  # Create the directory if it doesn't exist

In [None]:
# Create ImageDataGenerator for augmentation
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [None]:
# Oversample the minority classes
for class_folder in os.listdir(trainset_dir):
    class_path = os.path.join(trainset_dir, class_folder)
    if os.path.isdir(class_path):
        num_images = len(os.listdir(class_path))
        print(f"Class '{class_folder}' has {num_images} images.")

        # If the class has fewer images than the maximum, augment it
        if num_images < 500:
            # Create a folder for augmented data if it doesn't exist
            aug_class_path = os.path.join(augmented_dir, class_folder)
            os.makedirs(aug_class_path, exist_ok=True)

            # Copy original images to the augmented folder
            for img_name in os.listdir(class_path):
                original_img_path = os.path.join(class_path, img_name)
                copyfile(original_img_path, os.path.join(aug_class_path, img_name))

            # 2 times augmentation of minority class images
            target_num_images = num_images
            print(f"Generating {target_num_images} new images for class '{class_folder}'...")

            # Load images from the class folder
            i = 0
            for batch in datagen.flow_from_directory(
                trainset_dir,
                target_size=(300,400),
                batch_size=32,
                classes=[class_folder],
                save_to_dir=aug_class_path,
                save_prefix='aug',
                save_format='jpeg'):

                i += len(batch[0])
                if i >= target_num_images:
                    break

print("Augmentation complete!")