# CNN Model Building and Training Notebook

### This code is in developmental stage. Later will be translated for Azure Cloud implementation. 

### To use this code, dataset has been stored in google drive and gdrive path is mounted for use in notebook. 

In [1]:
try:
  import os, sys 
  #to be able to interact with Google Drive's operating system
  from google.colab import drive 
  #drive is a module that allows us use Python to interact with google drive
  drive.mount('/content/gdrive') 
  #mounting google drive allows us to work with its contents
  nb_path = '/content/notebooks'
  os.symlink('/content/gdrive/My Drive/Colab Notebooks', nb_path)
  sys.path.insert(0, nb_path)  # or append(nb_path)
  #The last three lines are what changes the path of the file.
except:
  print("Drive already mounted and ready to use!")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Drive already mounted and ready to use!


### If there is some error message in the above cell, then no worries, just proceed running from the below cell. It's just that you must have mounted the drive already and re-running the cell for mounting your drive again.

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
import sys
sys.path.append('/content/gdrive/My Drive/Colab Notebooks')

In [4]:
# Storing all required paths for later use

main_cwd = r'/content/gdrive/My Drive/Colab Notebooks'
model_cwd = os.path.join(main_cwd, "Models")
dataset_cwd = os.path.join(main_cwd, "Datasets")
train_dataset_cwd = os.path.join(dataset_cwd, "train")
validation_dataset_cwd = os.path.join(dataset_cwd, "validation")

In [5]:
# Importing required libraries

import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import numpy as np
from tensorflow import keras
from keras.preprocessing.image import load_img, img_to_array, array_to_img
import warnings
import re
import os


In [6]:
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore")

## Helper functions

In [7]:
def get_cwd_files(path):

  ignore_files = [".gitkeep", ".gitignore"]
  path_files = os.listdir(path)
  path_files = [file for file in path_files if file not in ignore_files]

  return path_files

In [8]:
def get_image_data(image_data_process_directory, image_resize_value):

  # Classes for our prediction
  classification_classes = ["healthy_bones", "fractured_bones"]
  classification_classes_dict = {1:"healthy_bones", 2:"fractured_bones", 3:"bones_beyond_repair"}
  # Set Image size
  img_size = 224

  # Processing image to array
  data = []
  for each_class in classification_classes:
    class_category_number = classification_classes.index(each_class)
    bone_class_img_path = os.path.join(image_data_process_directory, each_class)
    #print(bone_class_img_path)
    bone_categories = get_cwd_files(bone_class_img_path)
    for bone_category in bone_categories:
      bone_category_img_path = os.path.join(bone_class_img_path, bone_category)
      #print(bone_category_img_path)
      patients_recorded = get_cwd_files(bone_category_img_path)
      #print(len(patients_recorded))
      for patient_record in patients_recorded:
        patient_record_img_path = os.path.join(bone_category_img_path, patient_record)
        patient_record_files = get_cwd_files(patient_record_img_path)
        #print(patient_record_files)
        for patient_record_file in patient_record_files:
          patient_record_file_path = os.path.join(patient_record_img_path, patient_record_file)
          patient_record_case_images = get_cwd_files(patient_record_file_path)
          #print(len(patient_record_case_images))
          for patient_image in patient_record_case_images:
            #print(patient_image)
            patient_image_path = os.path.join(patient_record_file_path, patient_image)
            #print(patient_image_path)
            try:
              x_ray_image = load_img(patient_image_path, target_size=(image_resize_value, image_resize_value))
              x_ray_image = img_to_array(x_ray_image)
              #print(x_ray_image.shape)
              data.append([x_ray_image, class_category_number])
            except:
              print("Some error occured in fetching data!")

  return np.array(data)


## Main Program

In [9]:
# Processing train data images
train_data = get_image_data(train_dataset_cwd, image_resize_value = 224)

In [10]:
train_data.shape

(5543, 2)