In [6]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE

# Define the parent folder containing subfolders with images
parent_folder = "dataset"  # Change to your parent folder path

# Define the output folder for augmented data (including oversampled images)
output_folder = "augmented_dataset_smote"  # Change to your desired output folder

# Initialize lists to store data and labels
data = []
labels = []

# Iterate through subfolders in the parent folder
for subfolder in os.listdir(parent_folder):
    subfolder_path = os.path.join(parent_folder, subfolder)

    # Check if the subfolder is a directory
    if os.path.isdir(subfolder_path):
        # Iterate through image files in the current subfolder
        for image_file in os.listdir(subfolder_path):
            image_path = os.path.join(subfolder_path, image_file)

            # Load the original image using OpenCV
            image = cv2.imread(image_path)

            if image is not None:
                # Flatten the image into a one-dimensional feature vector
                flattened_image = image.flatten()

                # Append the flattened image data to the data list
                data.append(flattened_image)

                # Append the label to the labels list (subfolder name)
                labels.append(subfolder)

# Convert data and labels to numpy arrays
X = np.array(data)
y = np.array(labels)

# Encode labels as integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply SMOTE oversampling to the flattened training data
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Create a subfolder in the output folder for oversampled data
os.makedirs(output_folder, exist_ok=True)

# Iterate through oversampled images and save them
for i, (image_data, label) in enumerate(zip(X_train_resampled, y_train_resampled)):
    # Reshape the flattened image data to its original shape
    image = image_data.reshape(image.shape[0], image.shape[1], image.shape[2])

    # Create a subfolder for the class (label)
    class_folder = os.path.join(output_folder, label_encoder.inverse_transform([label])[0])
    os.makedirs(class_folder, exist_ok=True)

    # Define the filename for the oversampled image
    filename = f"oversampled_{i}.jpg"
    image_path = os.path.join(class_folder, filename)

    # Save the oversampled image
    cv2.imwrite(image_path, image)

print("Oversampling and saving complete.")


Oversampling and saving complete.
