<a href="https://colab.research.google.com/github/StevenVuong/MSc_Project/blob/master/Step_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Processing should be done in this notebook, training the model**

In [1]:
# Standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# other imports to handle files
import os
import pickle
import csv

# deep learning imports
from keras.models import Sequential, load_model
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution3D, MaxPooling3D, Convolution1D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils import np_utils, generic_utils, to_categorical
from keras.layers import LeakyReLU
from keras import regularizers

# to split our dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# to mount our drive
from google.colab import drive

Using TensorFlow backend.


In [2]:
# mount google drive into google colab
drive.mount('/content/gdrive')

os.chdir('/content')

# go to where we will be working
print (os.listdir())
os.chdir('gdrive/My Drive/msc_project/all_mprage_grappa')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
['.config', 'gdrive', 'sample_data']


**Load our batches and build the model**

In [0]:
# gets our y-values and converts to keras, one hot encoded outputs
def get_y_values(total_slices_info):
  
  y_values = [s[2] for s in total_slices_info]

  y_values = np.array(to_categorical(y_values, 2))
  
  return y_values

In [0]:
def initialise_model():

  # compile our model
  model = Sequential()
  
  model.add(Convolution3D(filters=16, kernel_size=3, padding='same', input_shape=(160,160,160,1), strides=1))# ,kernel_regularizer=regularizers.l2(0.0005))) # padding on first one only?
  model.add(LeakyReLU(alpha=0.01)) # set to 0.01
  model.add(BatchNormalization()) 
  # could user he_norm kernel initializer?
  model.add(MaxPooling3D(pool_size=2, strides=2)) # pool_size=2

  model.add(Convolution3D(filters=32, kernel_size=3, padding='same', strides=1))# ,kernel_regularizer=regularizers.l2(0.0005))) # padding on first one only?
  model.add(LeakyReLU(alpha=0.01)) # set to 0.01
  model.add(BatchNormalization()) 
  # could user he_norm kernel initializer?
  model.add(MaxPooling3D(pool_size=3, strides=2)) # pool_size=2
  
  model.add(Convolution3D(filters=64, kernel_size=3, strides=1, padding='same')) #,kernel_regularizer=regularizers.l2(0.0005)))
  model.add(LeakyReLU(alpha=0.01)) 
  model.add(BatchNormalization())
  model.add(MaxPooling3D(pool_size=3, strides=2))

  model.add(Convolution3D(filters=128, kernel_size=3, strides=1, padding='same'))# ,kernel_regularizer=regularizers.l2(0.0005)))
  model.add(LeakyReLU())
  model.add(BatchNormalization())
  model.add(MaxPooling3D(pool_size=3, strides=2))
  
  model.add(Convolution3D(filters=256, kernel_size=3, strides=1, padding='same'))# ,kernel_regularizer=regularizers.l2(0.0005)))
  model.add(LeakyReLU())
  model.add(BatchNormalization())
  model.add(MaxPooling3D(pool_size=3, strides=2))
  
  model.add(Convolution3D(filters=512, kernel_size=3, strides=1, padding='same'))# ,kernel_regularizer=regularizers.l2(0.0005)))
  model.add(LeakyReLU())
  model.add(BatchNormalization())
  model.add(MaxPooling3D(pool_size=3, strides=2))

  model.add(Flatten())
   
  model.add(Dense(512))
  model.add(LeakyReLU(alpha=0.01))
  model.add(Dropout(0.35)) # add dropout to prevent overfitting

  model.add(Dense(2, activation='softmax'))

  model.compile(optimizer=Adam(lr=0.00005), loss='categorical_crossentropy',metrics = ['categorical_accuracy']) # metrics=['categorical_accuracy']

  # experiment with literally everything?... Random Search with optimisers
            
  return model

In [9]:
# Initialise Model!
model = initialise_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d_9 (Conv3D)            (None, 160, 160, 160, 16) 448       
_________________________________________________________________
leaky_re_lu_11 (LeakyReLU)   (None, 160, 160, 160, 16) 0         
_________________________________________________________________
batch_normalization_9 (Batch (None, 160, 160, 160, 16) 64        
_________________________________________________________________
max_pooling3d_9 (MaxPooling3 (None, 80, 80, 80, 16)    0         
_________________________________________________________________
conv3d_10 (Conv3D)           (None, 80, 80, 80, 32)    13856     
_________________________________________________________________
leaky_re_lu_12 (LeakyReLU)   (None, 80, 80, 80, 32)    0         
_________________________________________________________________
batch_normalization_10 (Batc (None, 80, 80, 80, 32)    128       
__________

In [0]:
def get_metrics(hist):
  ''' Function to get our metrics from history and score as inputs'''

  # actually obtain our metrics
  val_loss = hist.history['val_loss'][0]
  val_acc = hist.history['val_categorical_accuracy'][0]
  train_loss = hist.history['loss'][0]
  train_acc = hist.history['categorical_accuracy'][0]

  # put everything into one array
  return [val_loss, val_acc, train_loss, train_acc]

In [7]:
ppath = 'processed_brains' # set this early on
nname = '11_v'
# train on all, perhaps have different conditions for when we reach our last one
total_slices_train = os.listdir(ppath)[:3] # train on first 3, test on last one

# Initialise Model!
model = initialise_model()

# Load Model (If not this, then must initialise)
# model = load_model('my_model_1.h5')

num_loopz = 1 # number of repeats we want
num_iterations = 30 # number of times we want to loop it

for kk in range(num_loopz):
  
  file_name = (nname+str(kk)) ###filename, what we are changing
  for iteration in range(num_iterations):

    # load and fit our model for our instances
    for tsf in total_slices_train:
      pkl_path = ppath+'/'+tsf

      # load pickle file
      with open(pkl_path, 'rb') as f:
        total_slices, total_slices_info = pickle.load(f)

      # convert to numpy array
      total_slices = np.array(total_slices)
      # process y-values
      y_values = get_y_values(total_slices_info)

      # Run our model with validation of 15%
      hist = model.fit(x=total_slices, y=y_values, batch_size=1, epochs=1, verbose=1, validation_split=0.10, shuffle=True)
      # get metrics
      metrics = get_metrics(hist)

      # write to csv (want to append instead of overwrite)
      with open('stored_metrics/metrics'+file_name+'.csv', 'a') as csvFile:
          writer = csv.writer(csvFile)
          writer.writerow(metrics)
      csvFile.close()

      print ("Iteration: %d, batch %s" % (iteration, tsf[-5]))

  # save the model as reference, incase we need the brain heatmap
  model.save('stored_models/model'+file_name+'.h5', overwrite=True)  # saves as a hd5 file

Instructions for updating:
Use tf.cast instead.
Train on 45 samples, validate on 5 samples
Epoch 1/1
Iteration: 0, batch 0
Train on 45 samples, validate on 5 samples
Epoch 1/1
Iteration: 0, batch 1
Train on 45 samples, validate on 5 samples
Epoch 1/1
Iteration: 0, batch 2
Train on 45 samples, validate on 5 samples
Epoch 1/1
Iteration: 1, batch 0
Train on 45 samples, validate on 5 samples
Epoch 1/1
Iteration: 1, batch 1
Train on 45 samples, validate on 5 samples
Epoch 1/1

KeyboardInterrupt: ignored