<a href="https://colab.research.google.com/github/StevenVuong/MSc_Project/blob/master/p3_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**This notebook aims to modularise some of the input functions in P3 to suit the databse**

In [0]:
# Standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# other imports to handle files
import os
import pickle
import csv

# deep learning imports
from keras.models import Sequential, load_model
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution3D, MaxPooling3D, Convolution1D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils import np_utils, generic_utils, to_categorical
from keras.layers import LeakyReLU
from keras import regularizers

# to split our dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# to mount our drive
from google.colab import drive

In [19]:
# mount google drive
drive.mount('/content/gdrive')

os.chdir('/content')

# go to where the data is
print (os.listdir())
os.chdir('gdrive/My Drive/msc_project')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
['.config', 'gdrive', 'sample_data']


**Load our dataframe**

In [0]:
patient_df = pd.read_pickle('processed_patient_df_TRY2.pkl')

In [14]:
# establish control and pd df's
df_control = patient_df[patient_df.Group == 0] 
df_pd = patient_df[patient_df.Group == 1] 

patient_df.head()

Unnamed: 0,Image Data ID,Subject,Group,Sex,Age,Visit,Modality,Description,Type,Acq Date,Format,Downloaded
0,1130198,75422,0,M,73,1,MRI,MPRAGE GRAPPA,Original,11/13/2018,DCM,5/07/2019
2,1130191,75414,0,F,73,1,MRI,Sag MPRAGE GRAPPA,Original,12/13/2018,DCM,4/24/2019
3,1125041,74375,0,F,59,1,MRI,MPRAGE_GRAPPA,Original,9/06/2018,DCM,4/24/2019
4,1003469,72138,0,F,55,1,MRI,MPRAGE GRAPPA,Original,2/19/2018,DCM,4/24/2019
5,1119693,71935,1,M,66,1,MRI,MPRAGE GRAPPA,Original,4/03/2018,DCM,4/24/2019


**Load our file**

In [0]:
# for each set of training data in our pickle
# load
# process
# train our model in this
# 100 Test, 15% of 600 Validation, 85% of 600 Training. 700 Slices in total
# Save our model, tweak and evaluate etc..

In [0]:
# gets our y-values and converts to keras, one hot encoded outputs
def get_y_values(total_slices_info):
  
  y_values = [s[2] for s in total_slices_info]

  y_values = np.array(to_categorical(y_values, 2))
  
  return y_values

In [0]:
def initialise_model():

  # compile our model
  model = Sequential()

  model.add(Convolution3D(filters=8, kernel_size=2, padding='same', input_shape=(200,200,160,1)))# ,kernel_regularizer=regularizers.l2(0.02))) # padding on first one only?
  model.add(LeakyReLU(alpha=0.01)) # set to 0.01
  model.add(BatchNormalization()) 
  model.add(MaxPooling3D(pool_size=2)) # pool_size=2
  
  model.add(Convolution3D(filters=16, kernel_size=2)) #,kernel_regularizer=regularizers.l2(0.02)))
  model.add(LeakyReLU(alpha=0.01)) 
  model.add(BatchNormalization())
  model.add(MaxPooling3D(pool_size=2))

  model.add(Convolution3D(filters=32, kernel_size=3))# ,kernel_regularizer=regularizers.l2(0.02)))
  model.add(LeakyReLU())
  model.add(BatchNormalization())
  model.add(MaxPooling3D(pool_size=2))

  model.add(Convolution3D(filters=64, kernel_size=3))# ,kernel_regularizer=regularizers.l2(0.02)))
  model.add(LeakyReLU(alpha=0.01))
  model.add(BatchNormalization()) # momentum = 0.99?
  model.add(MaxPooling3D(pool_size=2))

  model.add(Convolution3D(filters=128, kernel_size=2))# ,kernel_regularizer=regularizers.l2(0.02)))
  model.add(LeakyReLU(alpha=0.01))
  model.add(BatchNormalization())
  model.add(MaxPooling3D(pool_size=2))

  model.add(Convolution3D(filters=256, kernel_size=2))# ,kernel_regularizer=regularizers.l2(0.02)))
  model.add(LeakyReLU(alpha=0.01))
  model.add(BatchNormalization())
  model.add(MaxPooling3D(pool_size=2))

  model.add(Flatten())
   
  model.add(Dense(512))
  model.add(LeakyReLU(alpha=0.01))
  model.add(Dropout(0.35)) # add dropout to prevent overfitting

  model.add(Dense(2, activation='softmax'))

  model.compile(optimizer=Adam(lr=0.00005), loss='categorical_crossentropy',metrics = ['accuracy']) # metrics=['categorical_accuracy']

  # experiment with literally everything?... Random Search with optimisers
            
  return model

In [0]:
def get_metrics(hist):
  ''' Function to get our metrics from history and score as inputs'''

  # actually obtain our metrics
  val_loss = hist.history['val_loss'][0]
  val_acc = hist.history['val_acc'][0]
  train_loss = hist.history['loss'][0]
  train_acc = hist.history['acc'][0]

  # put everything into one array
  return [val_loss, val_acc, train_loss, train_acc]

In [27]:
# train on all, perhaps have different conditions for when we reach our last one
total_slices_train = os.listdir('stored_batches')[:6] # 600 exemplars, then test on 100

# Initialise Model!
model = initialise_model()

# Load Model (If not this, then must initialise)
# model = load_model('my_model_1.h5')

num_iterations = 3 # number of times we want to loop it
num_loopz = 2 # number of repeats we want

for kk in range(num_loopz):
  
  file_name = ('10_v'+str(kk))
  for iteration in range(num_iterations):

    # load and fit our model for our instances
    for tsf in total_slices_train:
      pkl_path = 'stored_batches/'+tsf

      # load pickle file
      with open(pkl_path, 'rb') as f:
        total_slices, total_slices_info = pickle.load(f)

      # convert to numpy array
      total_slices = np.array(total_slices)
      # process y-values
      y_values = get_y_values(total_slices_info)

      # Run our model with validation of 15%
      hist = model.fit(x=total_slices, y=y_values, batch_size=1, epochs=1, verbose=1, validation_split=0.15, shuffle=True)
      # get metrics
      metrics = get_metrics(hist)

      # write to csv (want to append instead of overwrite)
      with open('stored_metrics_v2/metrics'+file_name+'.csv', 'a') as csvFile:
          writer = csv.writer(csvFile)
          writer.writerow(metrics)
      csvFile.close()

      print ("Iteration: %d, batch %s" % (iteration, tsf[-5]))

  # save the model as reference, incase we need the brain heatmap
  model.save('stored_models_v2/model'+file_name+'.h5', overwrite=True)  # saves as a hd5 file

Train on 93 samples, validate on 17 samples
Epoch 1/1
Iteration: 0, batch 0
Train on 86 samples, validate on 16 samples
Epoch 1/1
Iteration: 0, batch 1
Train on 85 samples, validate on 15 samples
Epoch 1/1
Iteration: 0, batch 2
Train on 90 samples, validate on 16 samples
Epoch 1/1
Iteration: 0, batch 3
Train on 85 samples, validate on 15 samples
Epoch 1/1
Iteration: 0, batch 4
Train on 86 samples, validate on 16 samples
Epoch 1/1
Iteration: 0, batch 5
Train on 93 samples, validate on 17 samples
Epoch 1/1
Iteration: 1, batch 0
Train on 86 samples, validate on 16 samples
Epoch 1/1
Iteration: 1, batch 1
Train on 85 samples, validate on 15 samples
Epoch 1/1
Iteration: 1, batch 2
Train on 90 samples, validate on 16 samples
Epoch 1/1
Iteration: 1, batch 3
Train on 85 samples, validate on 15 samples
Epoch 1/1
Iteration: 1, batch 4
Train on 86 samples, validate on 16 samples
Epoch 1/1
Iteration: 1, batch 5
Train on 93 samples, validate on 17 samples
Epoch 1/1
Iteration: 2, batch 0
Train on 86 