# CNN Model Building and Training Notebook

### This code is in developmental stage. Later will be translated for Azure Cloud implementation. 

### To use this code, dataset has been stored in google drive and gdrive path is mounted for use in notebook. 

In [1]:
try:
  import os, sys 
  #to be able to interact with Google Drive's operating system
  from google.colab import drive 
  #drive is a module that allows us use Python to interact with google drive
  drive.mount('/content/gdrive') 
  #mounting google drive allows us to work with its contents
  nb_path = '/content/notebooks'
  os.symlink('/content/gdrive/My Drive/Colab Notebooks', nb_path)
  sys.path.insert(0, nb_path)  # or append(nb_path)
  #The last three lines are what changes the path of the file.
except:
  print("Drive already mounted and ready to use!")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Drive already mounted and ready to use!


In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
import sys
sys.path.append('/content/gdrive/My Drive/Colab Notebooks')

In [4]:
# Storing all required paths for later use

main_cwd = r'/content/gdrive/My Drive/Colab Notebooks/CNN_Medical_Imaging'
model_cwd = os.path.join(main_cwd, "Models")
dataset_cwd = os.path.join(main_cwd, "Datasets")
image_data_cwd = os.path.join(main_cwd, "Saved Image Data Arrays")
train_dataset_cwd = os.path.join(dataset_cwd, "train")
test_dataset_cwd = os.path.join(dataset_cwd, "test")
validation_dataset_cwd = os.path.join(dataset_cwd, "valid")
train_x_image_data = os.path.join(image_data_cwd, "train_x_images_compressed_data_array_224.npz")
train_y_labels_data = os.path.join(image_data_cwd, "train_y_labels_compressed_array_224.npy")
test_x_image_data = os.path.join(image_data_cwd, "test_x_images_compressed_data_array_224.npz")
test_y_labels_data = os.path.join(image_data_cwd, "test_y_labels_compressed_array_224.npy")
valid_x_image_data = os.path.join(image_data_cwd, "valid_x_images_compressed_data_array_224.npz")
valid_y_labels_data = os.path.join(image_data_cwd, "valid_y_labels_compressed_array_224.npy")

In [5]:
# Importing required libraries

import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import load_img, img_to_array, array_to_img
from tensorflow.keras.optimizers import Adam
import warnings
import re
import os


In [6]:
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore")

## Helper functions

In [7]:
def get_cwd_files(path):

  ignore_files = [".gitkeep", ".gitignore"]
  path_files = os.listdir(path)
  path_files = [file for file in path_files if file not in ignore_files]

  return path_files

In [8]:
def get_image_data(image_data_process_directory, image_resize_value):

  # Classes for our prediction
  classification_classes = ["healthy_bones", "fractured_bones"]
  classification_classes_dict = {1:"healthy_bones", 2:"fractured_bones", 3:"bones_beyond_repair"}
  # Set Image size
  img_size = image_resize_value

  flag_counter = 0
  # Processing image to array
  #data = []
  image_data = []
  label_data = []
  for each_class in classification_classes:
    #flag_counter = 0
    class_category_number = classification_classes.index(each_class)
    bone_class_img_path = os.path.join(image_data_process_directory, each_class)
    #print(bone_class_img_path)
    bone_categories = get_cwd_files(bone_class_img_path)
    for bone_category in bone_categories:
      bone_category_img_path = os.path.join(bone_class_img_path, bone_category)
      #print(bone_category_img_path)
      patients_recorded = get_cwd_files(bone_category_img_path)
      #print(len(patients_recorded))
      for patient_record in patients_recorded:
        '''
        flag_counter += 1
        if flag_counter == 10:
          break'''
        patient_record_img_path = os.path.join(bone_category_img_path, patient_record)
        patient_record_files = get_cwd_files(patient_record_img_path)
        #print(patient_record_files)
        for patient_record_file in patient_record_files:
          patient_record_file_path = os.path.join(patient_record_img_path, patient_record_file)
          patient_record_case_images = get_cwd_files(patient_record_file_path)
          #print(len(patient_record_case_images))
          for patient_image in patient_record_case_images:
            #print(patient_image)
            patient_image_path = os.path.join(patient_record_file_path, patient_image)
            #print(patient_image_path)
            try:
              x_ray_image = load_img(patient_image_path, target_size=(img_size, img_size))
              x_ray_image = img_to_array(x_ray_image)
              image_data.append(x_ray_image)
              label_data.append(class_category_number)
              #print(x_ray_image.shape)
              #data.append([x_ray_image, class_category_number])
            except:
              print("Some error occured in fetching data!")

  
  #data = np.array(data)
  image_data = np.array(image_data)
  label_data = np.array(label_data) 
  
  return image_data, label_data  


## Main Program

Pre processing images to arrays and storing them is done only for the first run and for subsequent runs load the saved array images files for further work to reduce time.

In [9]:
# Processing train data images
#train_x, train_y = get_image_data(train_dataset_cwd, image_resize_value = 224)

In [10]:
# Processing test data images
#test_x, test_y = get_image_data(test_dataset_cwd, image_resize_value = 224)

In [11]:
# Processing validation data images
#valid_x, valid_y = get_image_data(validation_dataset_cwd, image_resize_value = 224)

## Storing values of the image processed array data in csv files

In [12]:
# Storing Train image array values

# For train_x compressed values
#np.savez_compressed("train_x_images_compressed_data_array_224", train_array = train_x)

# For train_y compressed values
#np.save("train_y_labels_compressed_array_224", train_y)


In [13]:
# Storing Test image array values

# For test_x compressed values
#np.savez_compressed("test_x_images_compressed_data_array_224", test_array = test_x)

# For test_y compressed values
#np.save("test_y_labels_compressed_array_224", test_y)


In [14]:
# Storing Validation image array values

# For valid_x compressed values
#np.savez_compressed("valid_x_images_compressed_data_array_224", valid_array = valid_x)

# For valid_y compressed values
#np.save("valid_y_labels_compressed_array_224", valid_y)


## Loading Image arrays from the saved files

In [15]:
# Loading Train image compressed array values

# For train_x values
model_train_x = np.load(train_x_image_data)["train_array"]

# For train_y values
model_train_y = np.load(train_y_labels_data)

In [16]:
# Loading Test image compressed array values

# For test_x values
model_test_x = np.load(test_x_image_data)["test_array"]

# For test_y values
model_test_y = np.load(test_y_labels_data)


In [17]:
# Loading Valid image compressed array values

# For valid_x values
model_valid_x = np.load(valid_x_image_data)["valid_array"]

# For valid_y values
model_valid_y = np.load(valid_y_labels_data)


In [18]:
# Normalizing values
model_train_x, model_train_y = model_train_x/255, model_train_y
model_test_x, model_test_y = model_test_x/255, model_test_y
model_valid_x, model_valid_y = model_valid_x/255, model_valid_y

## Hyperparameters for model building

In [19]:
# Hyperparameters for Imagedatagenerator
model_shear_range = 0.2           # Image angular deformation range
model_zoom_range = 0.2            # Image zoom in/out range
model_brightness_range = [0.9,1.2]    # Brightness change range (0.9 to 1.2)
model_rotation_range = 30         # Angular change range
model_width_shift_range = 0.2     # Horizontal shift range
model_height_shift_range = 0.2    # Vertical shift range

# Model hyperparameters

model_accuracy = ["acc", "mse"]
model_loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model_lr = 0.001  # 0.000001
model_optimizer = Adam(learning_rate = model_lr)
model_epochs = 200
model_callbacks = [
                # ModelCheckpoint(checkpoint_dir, monitor = "val_loss", save_best_only=True, mode="min",options=None),
                # EarlyStopping(monitor="val_loss",min_delta=1e-4,patience=8,verbose=1, mode="min",restore_best_weights=True),
                # ReduceLROnPlateau(monitor="val_loss",min_delta=1e-4, factor=0.1, patience=3, verbose=1, min_lr=0.0001,mode="min")                
                ]


### Data Augmentation Initilization


In [20]:
# Image generator for training images
train_gen = ImageDataGenerator(
    rotation_range = model_rotation_range,
    brightness_range = model_brightness_range,
    width_shift_range = model_width_shift_range,
    height_shift_range = model_height_shift_range,
    zoom_range = model_zoom_range,
    shear_range = model_shear_range,
    horizontal_flip = True,
    vertical_flip=True,
    fill_mode = "constant",
    cval = 0.0  # fill with black color for any blank spaces present after image shift
)

# Image generator fo validation images
valid_gen = ImageDataGenerator()  # No parameters

### Data Augmentation

In [21]:
# Fitting image generator models to datasets 
train_gen.fit(model_train_x)
valid_gen.fit(model_valid_x)

# Creating iter variables for model compile flow
train_iter = train_gen.flow(model_train_x, model_train_y, batch_size = 32)
valid_iter = valid_gen.flow(model_valid_x, model_valid_y, batch_size = 8, shuffle = False)