In [7]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img, array_to_img

# Define source and target directories
SOURCE_DIR = "artifact_dataset/images"         # Contains subfolders: coin, sculpture, inscription
AUGMENTED_DIR = "artifact_dataset/augmented_images"  # New folder to store augmented images

# Create the target directory if it doesn't exist
if not os.path.exists(AUGMENTED_DIR):
    os.makedirs(AUGMENTED_DIR)

# Define your data augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Loop over each subfolder (artifact type)
for subfolder in os.listdir(SOURCE_DIR):
    subfolder_path = os.path.join(SOURCE_DIR, subfolder)
    if os.path.isdir(subfolder_path):
        target_subfolder = os.path.join(AUGMENTED_DIR, subfolder)
        if not os.path.exists(target_subfolder):
            os.makedirs(target_subfolder)
        # Process each image in the subfolder
        for image_file in os.listdir(subfolder_path):
            image_path = os.path.join(subfolder_path, image_file)
            if os.path.isfile(image_path) and image_file.lower().endswith((".jpg", ".jpeg", ".png")):
                # Load and preprocess image
                img = load_img(image_path, target_size=(224, 224))
                x = img_to_array(img)
                x = x.reshape((1,) + x.shape)  # Shape: (1, 224, 224, 3)

                # Generate 5 augmented images for each original image
                i = 0
                for batch in datagen.flow(
                        x, batch_size=1):
                    # Create a new filename that includes the original filename
                    base, ext = os.path.splitext(image_file)
                    new_filename = f"{base}_aug_{i}{ext}"
                    new_filepath = os.path.join(target_subfolder, new_filename)
                    array_to_img(batch[0]).save(new_filepath)
                    i += 1
                    if i >= 5:
                        break

print("Data augmentation complete. Augmented images saved to:", AUGMENTED_DIR)


Data augmentation complete. Augmented images saved to: artifact_dataset/augmented_images
