In [1]:
import tensorflow as tf 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import imutils
import matplotlib.pyplot as plt
from os import listdir
import time 

%matplotlib inline



In [2]:
#nicely formatted time string

def hms_string(sec_elapsed):
    h = int(sec_elapsed/(60 * 60))
    m = int((sec_elapsed % (60*60))/60)
    s = sec_elapsed % 60
    return f"{h}:{m}:{round(s,1)}"


In [3]:
def augment_data(file_dir, n_generated_samples, save_to_dir):
    
    #from keras.preprocessing.image import ImageDataGenerator
    # from os import listdie
    
    data_gen = ImageDataGenerator(rotation_range=10,
                                 width_shift_range=0.1,
                                 height_shift_range=0.1,
                                 shear_range=0.1,
                                 brightness_range=(0.3,1.0),
                                 horizontal_flip=True,
                                 vertical_flip=True,
                                 fill_mode='nearest')
    for filename in listdir(file_dir):
        #load the image
        image = cv2.imread(file_dir + '//' + filename)
        #reshape the image
        image = image.reshape((1,)+ image.shape)
        #prefix of the names for the generated sampels
        save_prefix = 'aug_' + filename[:-4]
        #generate 'n_generated_samples' sample images
        i = 0
        for batch in data_gen.flow(x=image, batch_size=1, save_to_dir=save_to_dir,
                                  save_prefix=save_prefix, save_format='jpg'):
            i += 1
            if i >= n_generated_samples:
                break


In [4]:
start_time = time.time()


#augment data for the examples with label equal to 'yes' representing tumurous examples

augment_data(file_dir='/Users/user/Desktop/brain_tumor_dataset/yes', n_generated_samples=6, save_to_dir='/Users/user/Desktop/Augmented data/augmented_yes')

augment_data(file_dir='/Users/user/Desktop/brain_tumor_dataset/no', n_generated_samples=9, save_to_dir='/Users/user/Desktop/Augmented data/augmented_no')

end_time = time.time()
execution_time = (end_time - start_time)
print(f"Elapsed time: {hms_string(execution_time)}")

KeyboardInterrupt: 

In [5]:
def data_summary(main_path):
    
    yes_path = '/Users/user/Desktop/Augmented data/augmented_yes'
    no_path = '/Users/user/Desktop/Augmented data/augmented_no'
    
    # number of files (images) that are in the folder named 'yes' that represent tumorous 
    #examples
    
    m_pos = len(listdir(yes_path))
    
    m_neg = len(listdir(no_path))
    
    m = (m_pos+m_neg)
    
    pos_prec = (m_pos*100.0) / m
    neg_prec = (m_neg*100.0) / m
    
    print(f"Number of examples: {m}")
    print(f"Percentage of positive examples: {pos_prec}%, number of pos examples: {m_pos}")
    print(f"Percentage of negative examples: {neg_prec}%, number of neg_examples: {m_neg}")
    

In [6]:
data_summary('/Users/user/Desktop/Augmented data')

Number of examples: 2022
Percentage of positive examples: 56.37982195845697%, number of pos examples: 1140
Percentage of negative examples: 43.62017804154303%, number of neg_examples: 882
