<a href="https://colab.research.google.com/github/StevenVuong/MSc_Project/blob/master/Step_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Processing should be done in this notebook, training the model**

In [1]:
# Standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# other imports to handle files
import os
import pickle
import csv

# deep learning imports
from keras.models import Sequential, load_model
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution3D, MaxPooling3D, Convolution1D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils import np_utils, generic_utils, to_categorical
from keras.layers import LeakyReLU
from keras import regularizers

# to split our dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# to mount our drive
from google.colab import drive

Using TensorFlow backend.


In [2]:
# mount google drive into google colab
drive.mount('/content/gdrive')

os.chdir('/content')

# go to where we will be working
print (os.listdir())
os.chdir('gdrive/My Drive/msc_project/all_mprage_grappa')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive
['.config', 'gdrive', 'sample_data']


**Load our batches and build the model**

In [0]:
# gets our y-values and converts to keras, one hot encoded outputs
def get_y_values(total_slices_info):
  
  y_values = [s[2] for s in total_slices_info]

  y_values = np.array(to_categorical(y_values, 2))
  
  return y_values

In [0]:
def initialise_model():

  # compile our model
  model = Sequential()
  
  model.add(Convolution3D(filters=32, kernel_size=3, padding='same', strides=1, input_shape=(160,160,160,1),
                          kernel_regularizer=regularizers.l2(0.005), bias_regularizer=regularizers.l2(0.005))) # padding on first one only?
  model.add(LeakyReLU(alpha=0.20)) # set to 0.01
  model.add(BatchNormalization(momentum=0.5))
  model.add(MaxPooling3D(pool_size=2, strides=1, padding='same')) # pool_size=2
  # could user he_norm kernel initializer?
  
  model.add(Convolution3D(filters=64, kernel_size=3, padding='same', strides=1,
                          kernel_regularizer=regularizers.l2(0.005), bias_regularizer=regularizers.l2(0.005))) # padding on first one only?
  model.add(LeakyReLU(alpha=0.20)) # set to 0.01
  model.add(BatchNormalization(momentum=0.5))
  model.add(MaxPooling3D(pool_size=2, strides=2, padding='same')) # pool_size=2
  
  model.add(Convolution3D(filters=128, kernel_size=3, padding='same', strides=2,
                          kernel_regularizer=regularizers.l2(0.005), bias_regularizer=regularizers.l2(0.005))) # padding on first one only?
  model.add(LeakyReLU(alpha=0.2)) # set to 0.01
  model.add(BatchNormalization(momentum=0.5))
  model.add(MaxPooling3D(pool_size=2, strides=2, padding='same')) # pool_size=2

  model.add(Convolution3D(filters=256, kernel_size=3, padding='same', strides=2,
                          kernel_regularizer=regularizers.l2(0.005), bias_regularizer=regularizers.l2(0.005))) # padding on first one only?
  model.add(LeakyReLU(alpha=0.20)) # set to 0.01
  model.add(BatchNormalization(momentum=0.5))
  model.add(MaxPooling3D(pool_size=2, strides=2, padding='same')) # pool_size=2

  model.add(Flatten())
   
  model.add(Dense(512, kernel_regularizer=regularizers.l2(0.005)))
  model.add(LeakyReLU(alpha=0.2)) # set to 0.01
  
  model.add(Dropout(0.40)) # add dropout to prevent overfitting
  
  model.add(Dense(64, kernel_regularizer=regularizers.l2(0.005)))
  model.add(LeakyReLU(alpha=0.2)) # set to 0.01
  
  model.add(Dropout(0.45)) # add dropout to prevent overfitting

  model.add(Dense(2, activation='softmax'))

  model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy',metrics = ['categorical_accuracy'])

  return model

In [20]:
# Initialise Model!
model = initialise_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d_25 (Conv3D)           (None, 160, 160, 160, 32) 896       
_________________________________________________________________
leaky_re_lu_37 (LeakyReLU)   (None, 160, 160, 160, 32) 0         
_________________________________________________________________
batch_normalization_25 (Batc (None, 160, 160, 160, 32) 128       
_________________________________________________________________
max_pooling3d_25 (MaxPooling (None, 160, 160, 160, 32) 0         
_________________________________________________________________
conv3d_26 (Conv3D)           (None, 160, 160, 160, 64) 55360     
_________________________________________________________________
leaky_re_lu_38 (LeakyReLU)   (None, 160, 160, 160, 64) 0         
_________________________________________________________________
batch_normalization_26 (Batc (None, 160, 160, 160, 64) 256       
__________

In [0]:
def get_metrics(hist):
  ''' Function to get our metrics from history and score as inputs'''

  # actually obtain our metrics
  val_loss = hist.history['val_loss'][0]
  val_acc = hist.history['val_categorical_accuracy'][0]
  train_loss = hist.history['loss'][0]
  train_acc = hist.history['categorical_accuracy'][0]

  # put everything into one array
  return [val_loss, val_acc, train_loss, train_acc]

In [0]:
# https://stackoverflow.com/questions/4601373/better-way-to-shuffle-two-numpy-arrays-in-unison
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [0]:
def past_accuracies_fn(current_list, hist):
  '''Check if we have hit 100 accuracy for our entire dataset.
  If mean of 1, break, otherwise append to current list'''
  current_list = np.array(current_list)

  # only take 1 onward
  current_list = np.append(current_list, hist.history['categorical_accuracy'][0])
  current_list = current_list[1:]
  return current_list 

In [0]:
ppath = 'processed_brains/processed_brains_aug' # where to get the batches from
nname = '91_aug_v' # what to save as
# train on all, perhaps have different conditions for when we reach our last one
total_slices_train = os.listdir(ppath)[:4] # train on first 4, test on last one ###Need to update this

# Initialise Model!
model = initialise_model()

# set number of repeats and iterations
num_loopz = 1
num_iterations = 100

# stops when training accuracy is 1 across the board
past_accuracies = np.zeros(3)
break_at_100 = 'YES' # basic boolean switch

for kk in range(num_loopz):
  
  file_name = (nname+str(kk)) ###filename, what we are changing
  for iteration in range(num_iterations):
    
    # check if we have training accuracy of 100, if so, exit training loop
    if ((np.mean(past_accuracies) == 1) and (break_at_100=='YES')):
      print ("Hit 100 on training accuracies")
      break
      
    print ("--------------------------------------------------------------------")
    print ("Current accuracy on training sets: %.2f " % np.mean(past_accuracies))
    print ("--------------------------------------------------------------------")

    # load and fit our model for our instances
    for tsf in total_slices_train:
      pkl_path = ppath+'/'+tsf

      # load pickle file
      with open(pkl_path, 'rb') as f:
        total_slices, total_slices_info = pickle.load(f)
        
        ## total_slices[:, 18:146,18:146,18:146, :] # to cut down to 128,128,128
        
      print ("Iteration: %d, batch %s" % (iteration, tsf[-5])) # print up here instead
      
      # Only take certain part of our slice

      
      # convert to numpy array
      total_slices = np.array(total_slices)
      # process y-values
      y_values = get_y_values(total_slices_info)

      # shuffle both of these
      # total_slices, y_values = unison_shuffled_copies(total_slices, y_values)

      # Run our model with validation of 5%
      hist = model.fit(x=total_slices, y=y_values, batch_size = 1, epochs=1, verbose=1, shuffle=True,validation_split=0.05)
      # add to running totals
      past_accuracies = past_accuracies_fn(past_accuracies, hist) 
      # get metrics
      metrics = get_metrics(hist)

      # write to csv (want to append instead of overwrite)
      with open('stored_metrics/metrics'+file_name+'.csv', 'a') as csvFile:
          writer = csv.writer(csvFile)
          writer.writerow(metrics)
      csvFile.close()

  # save the model as reference, incase we need the brain heatmap
  model.save('stored_models/model'+file_name+'.h5', overwrite=True)  # saves as a hd5 file

--------------------------------------------------------------------
Current accuracy on training sets: 0.00 
--------------------------------------------------------------------
Iteration: 0, batch 0
Train on 95 samples, validate on 5 samples
Epoch 1/1
Iteration: 0, batch 1
Train on 95 samples, validate on 5 samples
Epoch 1/1
Iteration: 0, batch 2
Train on 95 samples, validate on 5 samples
Epoch 1/1