# Generating data through data augmentation

In [None]:
import os
import numpy as np
import tensorflow as tf
from PIL import Image
from sklearn.utils import shuffle
import time

In [2]:
def delete_augmented_images():
    """
    Deletes all augmented images
    """
    directory = '52kards'
    for card in os.listdir(directory):
        card_path = os.path.join(directory, card)
        if os.path.isdir(card_path):
            # Loop through each file in the card directory
            for filename in os.listdir(card_path):
                if 'aug' in filename:
                    # If 'aug' is in the filename, construct the full path and delete the file
                    file_path = os.path.join(card_path, filename)
                    os.remove(file_path)

In [3]:
def delete_ordinary_images():
    """
    Deletes all non-augmented images. This is important as our generator will generator for
    each augmented image. It's also important to ensure they don't remain in the dataset as 
    they probably require different preprocessing (double check this is the case)
    """
    directory = '52kards'
    for card in os.listdir(directory):
        card_path = os.path.join(directory, card)
        if os.path.isdir(card_path):
            # Loop through each file in the card directory
            for filename in os.listdir(card_path):
                if 'aug' not in filename:
                    # If 'aug' is in the filename, construct the full path and delete the file
                    file_path = os.path.join(card_path, filename)
                    os.remove(file_path)

In [5]:
# delete_ordinary_images()

In [6]:
def gen_img(path): 
    """
    for each given folder takes each non-augmented image and generates
    a number of images with random transformations 
    """
    for i, img_filename in enumerate(os.listdir('52kards/' + path)): #'52kards/2c'
        if 'aug' in img_filename:
            continue # not making further augmented images based on the existing augmented images 
        label = i
        img_path = os.path.join('52kards/' + path, img_filename)
        # grayscale, probably worth pursuing 
        img = Image.open(img_path)#.convert('L')

        img = img.resize((224, 224))

        # convert image to numpy array
        img_np = np.array(img)

        # expand dimensions to add batch size of 1
        img_np = np.expand_dims(img_np, axis=0)
        # img_np = np.expand_dims(img_np, axis=-1)  # Add an extra dimension for channels if converted to greyscale

        # data augmentation configuration
        data_augmentation = tf.keras.Sequential([
            tf.keras.layers.Rescaling(1./255), # Ensure images are normalized
            tf.keras.layers.RandomRotation(0.5),  # 180 degrees, important for cards not horizontally symmetrical (vertically?)
            tf.keras.layers.RandomZoom(height_factor=(-0.1, 0.1), width_factor=(-0.1, 0.1)),
            tf.keras.layers.RandomBrightness(-0.4)  # Adjust the max delta
        ])

        for j in range(70): # range(750)
            img_transformed = data_augmentation(img_np, training=True).numpy() # Apply data augmentation
            epsilon = 1e-6
            if np.all(img_transformed <= epsilon): # don't save black images 
                pass
            else:
                # Convert the augmented image tensor back to a PIL Image
                img_transformed_pil = Image.fromarray((img_transformed.squeeze() * 255).astype(np.uint8))

                # Save the augmented image
                augmented_image_path = os.path.join('52kards/' + path, f'{img_filename[:-4]}_aug_{j:03}.png')
                img_transformed_pil.save(augmented_image_path)

In [7]:
import logging

# Suppress TensorFlow warnings
tf.get_logger().setLevel(logging.ERROR)

In [8]:
start = time.time()
for card in os.listdir('52kards'):
    gen_img(card)

end = time.time()
print(end-start)

0.014002561569213867
