In [None]:
import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile
import matplotlib.pyplot as plt

In [None]:
ls

MRI-vs-Others.zip  [0m[01;34msample_data[0m/


In [None]:
local_zip = 'MRI-vs-Others.zip'
zip_ref   = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

In [None]:
ls

MRI-vs-Others.zip  [0m[01;34msample_data[0m/


In [None]:
source_path = '/tmp'

source_path_others = os.path.join(source_path, 'whaever')
source_path_mri = os.path.join(source_path, 'MRI')

# os.listdir returns a list containing all files under the given path
print(f"There are {len(os.listdir(source_path_mri))} images of MRI.")
print(f"There are {len(os.listdir(source_path_others))} images of Others.")

There are 253 images of MRI.
There are 253 images of Others.


In [None]:
# Define root directory
root_dir = '/tmp/MRI-vs-Others'

if os.path.exists(root_dir):
  shutil.rmtree(root_dir)

def create_train_val_dirs(root_path):
  """
  Creates directories for the train and test sets

  Args:
    root_path (string) - the base directory path to create subdirectories from

  Returns:
    None
  """
  os.mkdir(os.path.join(root_dir))
  os.mkdir(os.path.join(root_dir,"training"))
  os.mkdir(os.path.join(root_dir,"validation"))
  os.mkdir(os.path.join(root_dir,"training", "mri"))
  os.mkdir(os.path.join(root_dir,"validation", "mri"))
  os.mkdir(os.path.join(root_dir,"training","others"))
  os.mkdir(os.path.join(root_dir,"validation","others"))

try:
  create_train_val_dirs(root_path=root_dir)
except FileExistsError:
  print("You should not be seeing this since the upper directory is removed beforehand")

In [None]:
for rootdir, dirs, files in os.walk(root_dir):
    for subdir in dirs:
        print(os.path.join(rootdir, subdir))

/tmp/MRI-vs-Others/training
/tmp/MRI-vs-Others/validation
/tmp/MRI-vs-Others/training/others
/tmp/MRI-vs-Others/training/mri
/tmp/MRI-vs-Others/validation/others
/tmp/MRI-vs-Others/validation/mri


In [None]:
def split_data(SOURCE_DIR, TRAINING_DIR, VALIDATION_DIR, SPLIT_SIZE):
  """
  Splits the data into train and test sets

  Args:
    SOURCE_DIR (string): directory path containing the images
    TRAINING_DIR (string): directory path to be used for training
    VALIDATION_DIR (string): directory path to be used for validation
    SPLIT_SIZE (float): proportion of the dataset to be used for training

  Returns:
    None
  """
  img_files = os.listdir(os.path.join(SOURCE_DIR))
  total_index = len(img_files)
  train_index = int(total_index * SPLIT_SIZE)
  train_files = img_files[:train_index]
  validation_files = img_files[train_index:]
  for file in train_files:
    if os.path.getsize(os.path.join(SOURCE_DIR, file)) == 0:
      print(f'{file} is zero length, so ignoring.')
    else:
      shutil.copy(os.path.join(SOURCE_DIR, file),
                  os.path.join(TRAINING_DIR, file))
  for file in validation_files:
    if os.path.getsize(os.path.join(SOURCE_DIR, file)) == 0:
      print(f'{file} is zero length, so ignoring.')
    else:
      shutil.copy(os.path.join(SOURCE_DIR, file),
                os.path.join(VALIDATION_DIR, file))

In [None]:
MRI_SOURCE_DIR = source_path_mri
OTHERS_SOURCE_DIR = source_path_others

TRAINING_DIR = "/tmp/MRI-vs-Others/training"
VALIDATION_DIR = "/tmp/MRI-vs-Others/validation"

TRAINING_MRI_DIR = os.path.join(TRAINING_DIR, "mri/")
VALIDATION_MRI_DIR = os.path.join(VALIDATION_DIR, "mri/")

TRAINING_OTHERS_DIR = os.path.join(TRAINING_DIR, "others/")
VALIDATION_OTHERS_DIR = os.path.join(VALIDATION_DIR, "others/")

# Empty directories in case you run this cell multiple times
if len(os.listdir(TRAINING_MRI_DIR)) > 0:
  for file in os.scandir(TRAINING_MRI_DIR):
    os.remove(file.path)
if len(os.listdir(TRAINING_OTHERS_DIR)) > 0:
  for file in os.scandir(TRAINING_OTHERS_DIR):
    os.remove(file.path)
if len(os.listdir(VALIDATION_MRI_DIR)) > 0:
  for file in os.scandir(VALIDATION_MRI_DIR):
    os.remove(file.path)
if len(os.listdir(VALIDATION_OTHERS_DIR)) > 0:
  for file in os.scandir(VALIDATION_OTHERS_DIR):
    os.remove(file.path)

# Define proportion of images used for training
split_size = .75

# Run the function
# NOTE: Messages about zero length images should be printed out
split_data(MRI_SOURCE_DIR, TRAINING_MRI_DIR, VALIDATION_MRI_DIR, split_size)
split_data(OTHERS_SOURCE_DIR, TRAINING_OTHERS_DIR, VALIDATION_OTHERS_DIR, split_size)

# Your function should perform copies rather than moving images so original directories should contain unchanged images
print(f"\n\nOriginal MRI directory has {len(os.listdir(MRI_SOURCE_DIR))} images")
print(f"Original Others directory has {len(os.listdir(OTHERS_SOURCE_DIR))} images\n")

# Training and validation splits. Check that the number of images matches the expected output.
print(f"There are {len(os.listdir(TRAINING_MRI_DIR))} images of MRI for training")
print(f"There are {len(os.listdir(TRAINING_OTHERS_DIR))} images of Others for training")
print(f"There are {len(os.listdir(VALIDATION_MRI_DIR))} images of MRI for validation")
print(f"There are {len(os.listdir(VALIDATION_OTHERS_DIR))} images of Others for validation")



Original MRI directory has 253 images
Original Others directory has 253 images

There are 189 images of MRI for training
There are 189 images of Others for training
There are 64 images of MRI for validation
There are 64 images of Others for validation


In [None]:
from os import listdir
import cv2
def augment_data(file_dir, n_generated_samples, save_to_dir):

    data_gen = ImageDataGenerator(rescale=1.0/255., rotation_range=10,
                                  width_shift_range=0.1,
                                  height_shift_range=0.1,
                                  shear_range=0.1,
                                  brightness_range=(0.3, 1.0),
                                  zoom_range=0.2,
                                  horizontal_flip=True,
                                  vertical_flip=True,
                                  fill_mode='nearest'
                                 )


    for filename in listdir(file_dir):
        image = cv2.imread(file_dir + '/' + filename)
        image = image.reshape((1,)+image.shape)
        save_prefix = 'aug_' + filename[:-4]
        i=0
        for batch in data_gen.flow(x=image, batch_size=1, save_to_dir=save_to_dir,
                                           save_prefix=save_prefix, save_format='jpg'):
            i += 1
            if i > n_generated_samples:
                break

In [None]:
MRI_path = "/tmp/MRI-vs-Others/training/mri"
others_path = "/tmp/MRI-vs-Others/training/others"


augmented_data_path = 'augmented/'
if os.path.exists(augmented_data_path):
  shutil.rmtree(augmented_data_path)
os.mkdir(os.path.join(augmented_data_path))
os.mkdir(os.path.join(augmented_data_path+'mri'))
os.mkdir(os.path.join(augmented_data_path+'others'))
augment_data(file_dir=MRI_path, n_generated_samples=10, save_to_dir=augmented_data_path+'mri')
augment_data(file_dir=others_path, n_generated_samples=10, save_to_dir=augmented_data_path+'others')

In [None]:
def data_summary(main_path):

    MRI_path = 'augmented/mri/'
    others_path = 'augmented/others/'

    m_mri = len(listdir(MRI_path))
    m_others = len(listdir(others_path))
    m = (m_mri+m_others)

    pos_prec = (m_mri* 100.0)/ m
    neg_prec = (m_others* 100.0)/ m

    print(f"Number of examples: {m}")
    print(f"Percentage of MRI examples: {pos_prec}%, Number of pos examples: {m_mri}")
    print(f"Percentage of Others examples: {neg_prec}%, Number of neg examples: {m_others}")
data_summary(augmented_data_path)

Number of examples: 4157
Percentage of MRI examples: 49.98797209526101%, Number of pos examples: 2078
Percentage of Others examples: 50.01202790473899%, Number of neg examples: 2079


In [None]:
# grader-required-cell

# GRADED FUNCTION: train_val_generators
def train_val_generators(TRAINING_DIR, VALIDATION_DIR):
  train_datagen = ImageDataGenerator(rescale=1.0/255.,
                                     rotation_range=40,
                                     width_shift_range=0.2,
                                     height_shift_range=0.2,
                                     shear_range=0.2,
                                     zoom_range=0.2,
                                     horizontal_flip=0.2,
                                     fill_mode="nearest")

  # Pass in the appropriate arguments to the flow_from_directory method
  train_generator = train_datagen.flow_from_directory(directory=TRAINING_DIR,
                                                      batch_size=32,
                                                      class_mode="binary",
                                                      target_size=(240, 240))

  # Instantiate the ImageDataGenerator class (don't forget to set the rescale argument)
  validation_datagen = ImageDataGenerator(1.0/255.)

  # Pass in the appropriate arguments to the flow_from_directory method
  validation_generator = validation_datagen.flow_from_directory(directory=VALIDATION_DIR,
                                                                batch_size=32,
                                                                class_mode="binary",
                                                                target_size=(240, 240))
  ### END CODE HERE
  return train_generator, validation_generator

In [None]:
train_generator, validation_generator = train_val_generators(augmented_data_path, VALIDATION_DIR)

Found 4157 images belonging to 2 classes.
Found 128 images belonging to 2 classes.


In [None]:
train_generator.samples

4157

In [None]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras import layers, models, regularizers

def create_model():

  model = tf.keras.models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(240, 240, 3)),
    BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dropout(0.5),
    layers.Dense(512, activation='relu', kernel_regularizer=regularizers.L1L2(l1=0.001, l2=0.001)),
    layers.Dense(1, activation='sigmoid')
  ])


  model.compile(loss='binary_crossentropy',
              optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),
              metrics=['accuracy'])                                                                                       4
  return model

In [None]:
model = create_model()


history = model.fit(train_generator,
                    steps_per_epoch=100,
                    epochs=15,
                    verbose=1,
                    validation_data=validation_generator)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [None]:
model_name = "mri-vs-others"
model_version = "0001"
model_path = os.path.join(model_name, model_version)
model.save(model_path, save_format="tf")



In [None]:
ls

[0m[01;34maugmented[0m/  [01;34mmri-vs-others[0m/  MRI-vs-Others.zip  [01;34msample_data[0m/


In [None]:
!zip -r file.zip /content/mri-vs-others

  adding: content/mri-vs-others/ (stored 0%)
  adding: content/mri-vs-others/0001/ (stored 0%)
  adding: content/mri-vs-others/0001/variables/ (stored 0%)
  adding: content/mri-vs-others/0001/variables/variables.index (deflated 67%)
  adding: content/mri-vs-others/0001/variables/variables.data-00000-of-00001 (deflated 14%)
  adding: content/mri-vs-others/0001/saved_model.pb (deflated 89%)
  adding: content/mri-vs-others/0001/assets/ (stored 0%)
  adding: content/mri-vs-others/0001/keras_metadata.pb (deflated 93%)
  adding: content/mri-vs-others/0001/fingerprint.pb (stored 0%)


In [None]:
from IPython.display import FileLink
FileLink(r'file.zip')