In [1]:
#generating augmented data with same labels of training data
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np

# Load the CIFAR-10 dataset
(x_train, y_train), (_, _) = cifar10.load_data()

# Initialize the ImageDataGenerator for data augmentation
datagen = ImageDataGenerator(
    rotation_range=40,      # Random rotation in the range [-40, 40] degrees
    width_shift_range=0.2,  # Random horizontal shift by up to 20% of the image width
    height_shift_range=0.2, # Random vertical shift by up to 20% of the image height
    shear_range=0.2,        # Shear intensity (shear angle in radians)
    zoom_range=0.2,         # Random zoom (up to 20%)
    horizontal_flip=True,   # Randomly flip images horizontally
    fill_mode='nearest'     # Fill in newly created pixels after transformations
)

# Define the number of augmented samples you want per original image
augmented_samples_per_image = 5

# Initialize an empty list to store augmented data and labels
augmented_x = []
augmented_y = []

# Perform data augmentation for each original image
for i in range(len(x_train)):
    image = x_train[i]
    label = y_train[i]
    
    # Expand dimensions to match the batch size required by datagen
    image = np.expand_dims(image, axis=0)
    
    # Generate augmented images
    augmented_images = datagen.flow(image, batch_size=1)
    
    # Store the augmented images and labels
    for j in range(augmented_samples_per_image):
        augmented_image = next(augmented_images)[0]
        augmented_x.append(augmented_image)
        augmented_y.append(label)

# Concatenate the augmented data with the original data
x_train_augmented = np.concatenate((x_train, np.array(augmented_x)))
y_train_augmented = np.concatenate((y_train, np.array(augmented_y)))

# Now you have an augmented dataset with the same labels
print(f"Original dataset shape: {x_train.shape}")
print(f"Augmented dataset shape: {x_train_augmented.shape}")


Original dataset shape: (50000, 32, 32, 3)
Augmented dataset shape: (300000, 32, 32, 3)
