# Oversampling train dataset

In [1]:
import os
import cv2
import random
import numpy as np
from albumentations import (
    HorizontalFlip, VerticalFlip, Rotate, ShiftScaleRotate,
    RandomBrightnessContrast, Compose
)
from albumentations.augmentations.crops.transforms import CenterCrop

# Augmentation pipeline for Albumentations
def get_augmentation_pipeline():
    return Compose([
        HorizontalFlip(p=0.5),
        VerticalFlip(p=0.5),
        Rotate(limit=45, p=0.5),
        ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=30, p=0.5),
        RandomBrightnessContrast(p=0.2),
        CenterCrop(height=150, width=150, p=0.3),
    ])

# Function to load images
def load_images_from_folder(folder_path):
    images = []
    for filename in os.listdir(folder_path):
        img_path = os.path.join(folder_path, filename)
        img = cv2.imread(img_path)
        if img is not None:
            images.append(img)
    return images

# Function to save augmented images
def save_augmented_images(augmented_images, output_folder, base_name, start_index):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    for idx, img in enumerate(augmented_images):
        file_name = f"{base_name}_{start_index + idx}.jpg"
        cv2.imwrite(os.path.join(output_folder, file_name), img)

# Oversampling function
def oversample_minority_classes(base_train_folder, target_count):
    augmentation_pipeline = get_augmentation_pipeline()

    # Iterate over class subdirectories in the train folder
    for class_name in os.listdir(base_train_folder):
        class_folder = os.path.join(base_train_folder, class_name)
        if not os.path.isdir(class_folder):
            continue  # Skip non-directory files

        print(f"Processing class: {class_name}")
        images = load_images_from_folder(class_folder)

        # Filter out images smaller than the crop dimensions
        valid_images = []
        for img in images:
            if img.shape[0] >= 150 and img.shape[1] >= 150:  # Ensure height and width >= 200
                valid_images.append(img)
            else:
                print(f"Skipping image in '{class_name}' due to insufficient dimensions: {img.shape}")

        if not valid_images:
            print(f"No valid images found for class '{class_name}'. Skipping augmentation.")
            continue

        current_count = len(valid_images)
        additional_count = target_count - current_count

        if additional_count <= 0:
            print(f"Class '{class_name}' already has enough images ({current_count}). Skipping augmentation.")
            continue

        augmented_images = []
        for _ in range(additional_count):
            img = random.choice(valid_images)  # Randomly pick an image
            augmented_img = augmentation_pipeline(image=img)['image']  # Apply augmentation
            augmented_images.append(augmented_img)

        # Save augmented images to the same directory
        save_augmented_images(augmented_images, class_folder, class_name, start_index=current_count)
        print(f"Added {additional_count} augmented images to class '{class_name}'.")

# Paths and configuration
base_dataset_path = "C:\\python-course\\Deep-Learning-Projects\\individual-augmented"  # Original dataset location

# Target number of images for all classes
target_images_per_class = 10200  # Equal to Mithila Painting (majority class)

# Perform oversampling on the train dataset
train_folder = os.path.join(base_dataset_path, "train")
oversample_minority_classes(train_folder, target_images_per_class)

print("Oversampling completed. Dataset balanced in place.")


  check_for_updates()


Processing class: Mandala Art
Skipping image in 'Mandala Art' due to insufficient dimensions: (146, 344, 3)
Skipping image in 'Mandala Art' due to insufficient dimensions: (146, 344, 3)
Skipping image in 'Mandala Art' due to insufficient dimensions: (146, 344, 3)
Skipping image in 'Mandala Art' due to insufficient dimensions: (146, 344, 3)
Skipping image in 'Mandala Art' due to insufficient dimensions: (146, 344, 3)
Skipping image in 'Mandala Art' due to insufficient dimensions: (146, 344, 3)
Skipping image in 'Mandala Art' due to insufficient dimensions: (146, 344, 3)
Skipping image in 'Mandala Art' due to insufficient dimensions: (146, 344, 3)
Skipping image in 'Mandala Art' due to insufficient dimensions: (146, 344, 3)
Skipping image in 'Mandala Art' due to insufficient dimensions: (146, 344, 3)
Skipping image in 'Mandala Art' due to insufficient dimensions: (146, 344, 3)
Skipping image in 'Mandala Art' due to insufficient dimensions: (146, 344, 3)
Skipping image in 'Mandala Art' du