In [9]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import shutil
import tensorflow as tf

In [10]:
# Paths
DATASET_DIR = "/home/natalyagrokh/img_datasets/combo_ferck_dataset_1"
AUGMENTED_OUTPUT_DIR = "/home/natalyagrokh/img_datasets/combo_ferck_dataset_2"  # Directory to save augmented dataset
os.makedirs(AUGMENTED_OUTPUT_DIR, exist_ok=True)

In [11]:
# Data augmentation settings
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
)

In [12]:
# Function to augment images for underrepresented classes
def augment_and_save(class_name, original_count, multiplier):
    class_path = os.path.join(DATASET_DIR, class_name)
    augmented_class_path = os.path.join(AUGMENTED_OUTPUT_DIR, class_name)
    os.makedirs(augmented_class_path, exist_ok=True)

    images = [os.path.join(class_path, img) for img in os.listdir(class_path)
              if img.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp'))]

    current_count = len(images)
    target_count = current_count * multiplier
    print(f"Augmenting {class_name}: {current_count} -> {target_count} images")

    # Augment and save new images
    for i in range(target_count - current_count):
        img_path = images[i % current_count]
        img = tf.keras.preprocessing.image.load_img(img_path)
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        img_array = img_array.reshape((1,) + img_array.shape)

        # Save augmented images
        for batch in datagen.flow(img_array, batch_size=1,
                                  save_to_dir=augmented_class_path,
                                  save_prefix=f"aug_{i}",
                                  save_format="jpeg"):
            break  # Generate one image per iteration

In [13]:
# Augment the severely underrepresented classes by 5Ã—
classes_to_augment = {
    "contempt": 5,
    "disgust": 5
}

for class_name, multiplier in classes_to_augment.items():
    augment_and_save(class_name, original_count=len(os.listdir(os.path.join(DATASET_DIR, class_name))), multiplier=multiplier)


Augmenting contempt: 54 -> 270 images
Augmenting disgust: 724 -> 3620 images


In [None]:
import tensorflow as tf
import os
import numpy as np

# Augment dataset only for rare classes
def augment_and_save(class_name, multiplier):
    class_path = os.path.join(DATASET_DIR, class_name)
    images = [os.path.join(class_path, img) for img in os.listdir(class_path) if img.endswith((".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff")))]
    current_count = len(images)
    target_count = current_count * multiplier
    
    datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode="nearest"
    )

    for i in range(target_count - current_count):
        img_path = images[i % current_count]
        img = tf.keras.preprocessing.image.load_img(img_path)
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        img_array = img_array.reshape((1,) + img_array.shape)  # Reshape for ImageDataGenerator
        
        save_path = os.path.join(class_path, f"aug_{i}.jpg")
        for batch in datagen.flow(img_array, batch_size=1, save_to_dir=class_path, save_prefix="aug", save_format="jpg"):
            break  # Only generate one image per iteration

# Apply augmentation to rare classes
augment_and_save("contempt", 10)  # 10x more images for Contempt
augment_and_save("disgust", 3)  # 3x more images for Disgust
