<a href="https://colab.research.google.com/github/Hadiaz1/Brain-Tumor-Classification-using-CNN/blob/main/Data_Augmentation_BrainTumorDataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# About the data:
The dataset contains 2 folders: yes and no which contains 253 Brain MRI Images. The folder yes contains 155 Brain MRI Images that are tumorous and the folder no contains 98 Brain MRI Images that are non-tumorous

In [None]:
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
import cv2
import imutils
import matplotlib.pyplot as plt
from os import listdir
import time    
from google.colab import drive
drive.mount('/content/drive')
%matplotlib inline

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from zipfile import ZipFile

with ZipFile("/content/drive/MyDrive/brain_tumor_dataset.zip","r") as zip:
  zip.extractall()
  print("done extracting")

done extracting


In [None]:
def augment_data(file_dir, n_generated_samples, save_to_dir):
    """
    Arguments:
        file_dir: A string representing the directory where images that we want to augment are found.
        n_generated_samples: A string representing the number of generated samples using the given image.
        save_to_dir: A string representing the directory in which the generated images will be saved.
    """
    
    data_gen = ImageDataGenerator(rotation_range=10, 
                                  width_shift_range=0.1, 
                                  height_shift_range=0.1, 
                                  shear_range=0.1, 
                                  brightness_range=(0.3, 1.0),
                                  horizontal_flip=True, 
                                  vertical_flip=True, 
                                  fill_mode='nearest'
                                 )

    
    for filename in listdir(file_dir):
        # load the image
        image = cv2.imread(file_dir + '/' + filename)
        # reshape the image
        image = image.reshape((1,)+image.shape)
        # prefix of the names for the generated sampels.
        save_prefix = 'aug_' + filename[:-4]
        # generate 'n_generated_samples' sample images
        i=0
        for batch in data_gen.flow(x=image, batch_size=1, save_to_dir=save_to_dir, 
                                           save_prefix=save_prefix, save_format='jpg'):
            i += 1
            if i > n_generated_samples:
                break

Remember that 61% of the data (155 images) are tumorous. And, 39% of the data (98 images) are non-tumorous.
So, in order to balance the data we can generate 9 new images for every image that belongs to 'no' class and 6 images for every image that belongs the 'yes' class.

In [None]:
augmented_data_path = r"/content/archive/brain_tumor_dataset/augmented_data/"

# augment data for the examples with label equal to 'yes' representing tumurous examples
augment_data(file_dir=r"/content/archive/brain_tumor_dataset/yes", n_generated_samples=6, save_to_dir=augmented_data_path+'yes')
# augment data for the examples with label equal to 'no' representing non-tumurous examples
augment_data(file_dir=r"/content/archive/brain_tumor_dataset/no", n_generated_samples=9, save_to_dir=augmented_data_path+'no')

In [None]:
def data_summary(main_path):
    
    yes_path = main_path+'yes'
    no_path = main_path+'no'
        
    # number of files (images) that are in the the folder named 'yes' that represent tumorous (positive) examples
    m_pos = len(listdir(yes_path))
    # number of files (images) that are in the the folder named 'no' that represent non-tumorous (negative) examples
    m_neg = len(listdir(no_path))
    # number of all examples
    m = (m_pos+m_neg)
    
    pos_prec = (m_pos* 100.0)/ m
    neg_prec = (m_neg* 100.0)/ m
    
    print(f"Number of examples: {m}")
    print(f"Percentage of positive examples: {pos_prec}%, number of pos examples: {m_pos}") 
    print(f"Percentage of negative examples: {neg_prec}%, number of neg examples: {m_neg}")

In [None]:
data_summary(augmented_data_path)

Number of examples: 2063
Percentage of positive examples: 52.49636451769268%, number of pos examples: 1083
Percentage of negative examples: 47.50363548230732%, number of neg examples: 980


In [None]:
!zip -r /content/archive.zip /content/archive/

  adding: content/archive/ (stored 0%)
  adding: content/archive/brain_tumor_dataset/ (stored 0%)
  adding: content/archive/brain_tumor_dataset/yes/ (stored 0%)
  adding: content/archive/brain_tumor_dataset/yes/Y254.jpg (deflated 1%)
  adding: content/archive/brain_tumor_dataset/yes/Y90.jpg (deflated 1%)
  adding: content/archive/brain_tumor_dataset/yes/Y255.JPG (deflated 36%)
  adding: content/archive/brain_tumor_dataset/yes/Y147.JPG (deflated 24%)
  adding: content/archive/brain_tumor_dataset/yes/Y91.jpg (deflated 3%)
  adding: content/archive/brain_tumor_dataset/yes/Y18.JPG (deflated 36%)
  adding: content/archive/brain_tumor_dataset/yes/Y60.jpg (deflated 1%)
  adding: content/archive/brain_tumor_dataset/yes/Y247.JPG (deflated 32%)
  adding: content/archive/brain_tumor_dataset/yes/Y251.JPG (deflated 29%)
  adding: content/archive/brain_tumor_dataset/yes/Y76.jpg (deflated 4%)
  adding: content/archive/brain_tumor_dataset/yes/Y248.JPG (deflated 26%)
  adding: content/archive/brain_tum

In [None]:
# copy it there
!cp archive.zip /content/drive/MyDrive