**Objective:** Perform Data Augmentation to increase the size and variance of the training data

**Note:** This implementation approach of data augmentation is taken from a project titled "Brain-Tumor-Detection" created by MohamedAliHabib. I have made slight changes to the approach , to suit my project and preferences.

The github link for the original project is mentioned below:

**GitHub Link:** https://github.com/MohamedAliHabib/Brain-Tumor-Detection

**Importing the necessary modules**

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator 
import cv2
import imutils
import matplotlib.pyplot as plt 
%matplotlib inline
from os import listdir
import time 

**Creating a time function to check how much time it is taking to generate the images** 

This function takes the number of seconds that have elapsed and formats it into a proper hms format 

In [2]:
def time_str(t_elap):
    hrs=int(t_elap/3600) # to get number of hours
    min_=int((t_elap%3600)/60) #minutes are calculated from the seconds left after accounting for full hours
    sec_=int(t_elap%60) #seconds are calculated from the time left after accounting for full minutes 
    
    return f"{hrs}:{min_}:{sec_}"

**Creating a function to perform data augmentation**

In [3]:
def augmentation(dir,n_samples,save_dir):
    
    # dir is the path in a string format , where the original images are found 
    # n_samples are the number of samples to create for each image present in the dir path 
    # save_dir is the path in a string format , where the augmented images are to be stored. 
    
    
    # These are the parameters by which the augmented image shifts from the original

    data_aug=ImageDataGenerator(rotation_range=20,width_shift_range=0.1,height_shift_range=0.1,shear_range=0.15,
                                brightness_range=(0.6,1.4),zoom_range=0.10,horizontal_flip=True,vertical_flip=False,
                                fill_mode="reflect")
    
    
    for file in listdir(dir):
        img=cv2.imread(dir+"\\"+file) #loading the images from the directory 
        
        img=img.reshape((1,)+img.shape) #Changing the shape of images 
        
        prefix='aug_' + file[:-4] #adding a prefixx to the name of the original image to save the augmented ones 
        
        # Generating the augmented samples
        i=0
        
        
        for batch in data_aug.flow(x=img,batch_size=1,save_to_dir=save_dir,save_prefix=prefix,save_format=".jpg"):
            
            # data_aug.flow generates augmented images based on the pararmeters we defined before 
            i+=1
            
            if i > n_samples:
                break 
        

**Running the function and generating the images**

In [4]:
start=time.time() #To get the current time when the fucniton is running 

aug_data_path=r"Augmented Data/"

# Augmenting data present in the 0 folder
augmentation(dir=r"Data\0",n_samples=5,save_dir=aug_data_path+'0')

# Augmenting the data present in the 1 folder
augmentation(dir=r"Data\1",n_samples=6,save_dir=aug_data_path+'1')

# Augmenting the data present in the 2 folder
augmentation(dir=r"Data\2",n_samples=8,save_dir=aug_data_path+'2')

# Augmenting the data present in the 3 folder
augmentation(dir=r"Data\3",n_samples=7,save_dir=aug_data_path+'3')

# Augmenting the data present in the 4 folder
augmentation(dir=r"Data\4",n_samples=5,save_dir=aug_data_path+'4')

# Augmenting the data present in the 5 folder
augmentation(dir=r"Data\5",n_samples=10,save_dir=aug_data_path+'5')

# Augmenting the data present in the 6 folder
augmentation(dir=r"Data\6",n_samples=9,save_dir=aug_data_path+'6')

# Augmenting the data present in the 7 folder
augmentation(dir=r"Data\7",n_samples=11,save_dir=aug_data_path+'7')

# Augmenting the data present in the 8 folder
augmentation(dir=r"Data\8",n_samples=5,save_dir=aug_data_path+'8')

# Augmenting the data present in the 9 folder
augmentation(dir=r"Data\9",n_samples=6,save_dir=aug_data_path+'9')



end=time.time()

e_time=(end-start)

print(f"Total time elaspsed: {time_str(e_time)}")

Total time elaspsed: 0:7:15


In [None]:
def summary(path):
    zero=path+'0'
    one=path+'1'
    two=path+'2'
    three=path+'3'
    four=path+'4'
    five=path+'5'
    six=path+'6'
    seven=path+'7'
    eight=path+'8'
    nine=path+'9'
    
    # Total number of yes samples
    
    zero_samples=len(listdir(zero))
    one_samples=len(listdir(one))
    two_samples=len(listdir(two))
    three_samples=len(listdir(three))
    four_samples=len(listdir(four))
    five_samples=len(listdir(five))
    six_samples=len(listdir(six))
    seven_samples=len(listdir(seven))
    eight_samples=len(listdir(eight))
    nine_samples=len(listdir(nine))
    
    
    
    # Total samples
    
    t_samples=(zero_samples+one_samples+two_samples+three_samples+four_samples+five_samples+six_samples+seven_samples+eight_samples+nine_samples) 
    
    zero_samples_prec = (zero_samples* 100.0)/ t_samples
    one_samples_prec = (one_samples* 100.0)/ t_samples
    two_samples_prec = (two_samples* 100.0)/ t_samples
    three_samples_prec = (three_samples* 100.0)/ t_samples
    four_samples_prec = (four_samples* 100.0)/ t_samples
    five_samples_prec = (five_samples* 100.0)/ t_samples
    six_samples_prec = (six_samples* 100.0)/ t_samples
    seven_samples_prec = (seven_samples* 100.0)/ t_samples
    eight_samples_prec = (eight_samples* 100.0)/ t_samples
    nine_samples_prec = (nine_samples* 100.0)/ t_samples
    
    
    # Displaying all the detaols
    
    print(f"Total Number of samples: {t_samples}")
    print(f"Percentage of samples for 0: {zero_samples_prec}%, number of samples for 0: {zero_samples}") 
    print(f"Percentage of samples for 1: {one_samples_prec}%, number of samples for 1: {one_samples}") 
    print(f"Percentage of samples for 2: {two_samples_prec}%, number of samples for 2: {two_samples}") 
    print(f"Percentage of samples for 3: {three_samples_prec}%, number of samples for 3: {three_samples}") 
    print(f"Percentage of samples for 4: {four_samples_prec}%, number of samples for 4: {four_samples}") 
    print(f"Percentage of samples for 5: {five_samples_prec}%, number of samples for 5: {five_samples}") 
    print(f"Percentage of samples for 6: {six_samples_prec}%, number of samples for 6: {six_samples}") 
    print(f"Percentage of samples for 7: {seven_samples_prec}%, number of samples for 7: {seven_samples}") 
    print(f"Percentage of samples for 8: {eight_samples_prec}%, number of samples for 8: {eight_samples}") 
    print(f"Percentage of samples for 9: {nine_samples_prec}%, number of samples for 9: {nine_samples}") 
    

In [6]:
summary(aug_data_path)

Total Number of samples: 10240
Percentage of samples for 0: 11.015625%, number of samples for 0: 1128
Percentage of samples for 1: 9.775390625%, number of samples for 0: 1001
Percentage of samples for 2: 10.44921875%, number of samples for 0: 1070
Percentage of samples for 3: 9.443359375%, number of samples for 0: 967
Percentage of samples for 4: 10.068359375%, number of samples for 0: 1031
Percentage of samples for 5: 10.087890625%, number of samples for 0: 1033
Percentage of samples for 6: 9.9609375%, number of samples for 0: 1020
Percentage of samples for 7: 10.078125%, number of samples for 0: 1032
Percentage of samples for 8: 9.4921875%, number of samples for 0: 972
Percentage of samples for 9: 9.62890625%, number of samples for 0: 986
