**Objective:** To perform Data Augmentation to increase the size of the training data

**Importing the necessary modules**

In [5]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator 
import cv2
import imutils
import matplotlib.pyplot as plt 
%matplotlib inline
from os import listdir
import time 

**Creating a time function to check how much time it is taking to generate the images** 

This function takes the number of seconds that have elapsed and formats it into a proper hms format 

In [6]:
def time_str(t_elap):
    hrs=int(t_elap/3600) # to get number of hours
    min_=int((t_elap%3600)/60) #minutes are calculated from the seconds left after accounting for full hours
    sec_=int(t_elap%60) #seconds are calculated from the time left after accounting for full minutes 
    
    return f"{hrs}:{min_}:{sec_}"

**Creating a function to perform data augmentation**

In [7]:
def augmentation(dir,n_samples,save_dir):
    
    # dir is the path in a string format , where the original images are found 
    # n_samples are the number of samples to create for each image present in the dir path 
    # save_dir is the path in a string format , where the augmented images are to be stored. 
    
    
    
    data_aug=ImageDataGenerator(rotation_range=15,width_shift_range=0.05,height_shift_range=0.1,shear_range=0.1,
                                brightness_range=(0.7,1.1),horizontal_flip=True,vertical_flip=True,
                                fill_mode="nearest")
    
    # These are the parameters by which the augmented image shifts from the original
    
    # Most of them have not been changed as this is a medical dataset and slight distortions in the image can affect the performance of the model drastically 
    
    
    for file in listdir(dir):
        img=cv2.imread(dir+"\\"+file) #loading the images from the directody 
        
        img=img.reshape((1,)+img.shape) #Changing the shape of images 
        
        prefix='aug_' + file[:-4] #adding a prefixx to the name of the original image to save the augmented ones 
        
        # Generating the augmented samples
        i=0
        
        
        for batch in data_aug.flow(x=img,batch_size=1,save_to_dir=save_dir,save_prefix=prefix,save_format=".jpg"):
            
            # data_aug.flow generates augmented images based on the pararmeters we defined before 
            i+=1
            
            if i > n_samples:
                break 
        

**Running the function and generating the images**

In [8]:
start=time.time() #To get the current time when the fucniton is running 

aug_data_path=r"Augmented Data/"

# Augmenting data present in the yes folder

augmentation(dir=r"Preprocessed Data\Yes",n_samples=15,save_dir=aug_data_path+'yes')

# Augmenting the data present in the no folder

augmentation(dir=r"Preprocessed Data\No",n_samples=25,save_dir=aug_data_path+'no')

end=time.time()

e_time=(end-start)

print(f"Total time elaspsed: {time_str(e_time)}")

Total time elaspsed: 0:1:26


In [9]:
def summary(path):
    yes=path+'yes'
    no=path+'no'
    
    # Total number of yes samples
    y_samples=len(listdir(yes))
    
    # Total numbr of no samples
    
    n_samples=len(listdir(no))
    
    # Total samples
    
    t_samples=(y_samples+n_samples) 
    
    pos_prec = (y_samples* 100.0)/ t_samples
    neg_prec = (n_samples* 100.0)/ t_samples 
    
    
    print(f"Total Number of samples: {t_samples}")
    print(f"Percentage of positive examples: {pos_prec}%, number of pos examples: {y_samples}") 
    print(f"Percentage of negative examples: {neg_prec}%, number of neg examples: {n_samples}") 

In [10]:
summary(aug_data_path)

Total Number of samples: 10117
Percentage of positive examples: 49.74794899673817%, number of pos examples: 5033
Percentage of negative examples: 50.25205100326183%, number of neg examples: 5084
