# Data Augmentation

## Import Necessary Modules

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import matplotlib.pyplot as plt
from os import listdir
import time    

%matplotlib inline

In [None]:
# Time string formatting
def hms_string(sec_elapsed):
    h = int(sec_elapsed / (60 * 60))
    m = int((sec_elapsed % (60 * 60)) / 60)
    s = sec_elapsed % 60
    return f"{h}:{m}:{round(s,1)}"

In [None]:
def augment_data(file_dir, n_generated_samples, save_to_dir):
    data_gen = ImageDataGenerator(rotation_range=10, 
                                  width_shift_range=0.1, 
                                  height_shift_range=0.1, 
                                  horizontal_flip=True, 
                                  vertical_flip=True, 
                                  fill_mode='nearest'
                                 )

    
    for filename in listdir(file_dir):
        # load the image
        image = cv2.imread(file_dir + '\\' + filename)
        # reshape the image
        image = image.reshape((1,)+image.shape)
        # prefix of the names for the generated sampels.
        save_prefix = 'aug_' + filename[:-4]
        # generate 'n_generated_samples' sample images
        i=0
        for batch in data_gen.flow(x=image, batch_size=1, save_to_dir=save_to_dir, 
                                           save_prefix=save_prefix, save_format='png'):
            i += 1
            if i > n_generated_samples:
                break

In [None]:
start_time = time.time()

# path to save augmented image
augmented_data_path = 'E:/Personal Projects/Multiclass Brain Tumor Classification/Dataset/Augmented + Split/train/'

# path of cropped images
glioma_path = 'E:/Personal Projects/Multiclass Brain Tumor Classification/Dataset/Cropped 2/train/glioma'
meningioma_path = 'E:/Personal Projects/Multiclass Brain Tumor Classification/Dataset/Cropped 2/train/meningioma'
pituitary_path = 'E:/Personal Projects/Multiclass Brain Tumor Classification/Dataset/Cropped 2/train/pituitary tumor'

# augment data for the examples with label equal to 'glioma' 
augment_data(file_dir=glioma_path, n_generated_samples=2, save_to_dir=augmented_data_path+'glioma')

# augment data for the examples with label equal to 'meningioma' 
augment_data(file_dir=meningioma_path, n_generated_samples=5, save_to_dir=augmented_data_path+'meningioma')

# augment data for the examples with label equal to 'pituitary tumor' 
augment_data(file_dir=pituitary_path, n_generated_samples=4, save_to_dir=augmented_data_path+'pituitary tumor')

end_time = time.time()
execution_time = (end_time - start_time)
print(f"Elapsed time: {hms_string(execution_time)}")

Elapsed time: 0:14:59.2


Let's see how many glioma, meningioma and pituitary tumor examples are there after performing data augmentation:

In [None]:
def data_summary(main_path):
    
    glioma_path = main_path+'glioma'
    meningioma_path = main_path+'meningioma'
    pituitary_path = main_path+'pituitary tumor'
        
    # number of files (images) that are in the the folder named 'glioma'
    m_glioma = len(listdir(glioma_path))
    # number of files (images) that are in the the folder named 'meningioma'
    m_meningioma = len(listdir(meningioma_path))
    # number of files (images) that are in the the folder named 'pituitary tumor'
    m_pituitary = len(listdir(pituitary_path))
    
    # number of all examples
    m = (m_glioma + m_meningioma + m_pituitary)
    
    glioma_prec = (m_glioma* 100.0)/ m
    meningioma_prec = (m_meningioma* 100.0)/ m
    pituitary_prec = (m_pituitary* 100.0)/ m
    
    print(f"Number of examples: {m}")
    print(f"Percentage of glioma examples: {glioma_prec}%, number of glioma examples: {m_glioma}") 
    print(f"Percentage of meningioma examples: {meningioma_prec}%, number of meningioma examples: {m_meningioma}")
    print(f"Percentage of pituitary examples: {pituitary_prec}%, number of pituitary examples: {m_pituitary}")

In [None]:
data_summary(augmented_data_path)

Number of examples: 10534
Percentage of glioma examples: 32.466299601291055%, number of glioma examples: 3420
Percentage of meningioma examples: 32.228972849819634%, number of meningioma examples: 3395
Percentage of pituitary examples: 35.30472754888931%, number of pituitary examples: 3719
