# Data Augmentation

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import os
import numpy as np
import math
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import img_to_array, load_img

datagen = ImageDataGenerator(
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest')

2024-02-28 10:31:10.446116: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [15]:
def generate_images(aug_image_count, image_folder, gen_image_path, prefix_for_images):
    # List all the image files in the folder
    image_files = [file for file in os.listdir(image_folder) if file.endswith('.png')]
    
    # The applied augmentation ratio
    aug_ratio = math.ceil(aug_image_count/ len(image_files))

    # Augment each image in the folder
    for image_file in image_files:
        image_path = os.path.join(image_folder, image_file)
        img = load_img(image_path)
        x = img_to_array(img)  # this is a Numpy array with shape (3, 150, 150)
        x = x.reshape((1,) + x.shape)  # this is a Numpy array with shape (1, 3, 150, 150)

        # the .flow() command below generates batches of randomly transformed images and saves the results to the `gen_image_path` directory

        i = 0
        for batch in datagen.flow(x, batch_size=500,
                                save_to_dir=gen_image_path, save_prefix=prefix_for_images, save_format='png'):
            i += 1
            if i > aug_ratio+1:
                break  # otherwise the generator would loop indefinitely
    print(aug_ratio)
    

In [20]:

############################################## No Clinical Significance  ##################################################
aug_image_count = 4199
images = '../dataset/No Clinical Significance'
gen_image_path = '../dataset/No Clinical Significance Aug'
prefix_for_images = 'No_cs'

# Data Count = 801
# generate_images(aug_image_count, images, gen_image_path, prefix_for_images)

############################################################################################################################

############################################ Moderate Clinical Significance  ###############################################
aug_image_count = 4850
images = '../dataset/Moderate Clinical Significance'
gen_image_path = '../dataset/Moderate Clinical Significance Aug'
prefix_for_images = 'Moderate_cs'

# Data Count = 150
# generate_images(aug_image_count, images, gen_image_path, prefix_for_images)

############################################################################################################################

############################################ Serious Clinical Significance  ################################################
aug_image_count = 1200
images = '../dataset/Serious Clinical Significance'
gen_image_path = '../dataset/Serious Clinical Significance Aug'
prefix_for_images = 'Serious_cs'

# Data Count = 387
generate_images(aug_image_count, images, gen_image_path, prefix_for_images)

############################################################################################################################


4
