<a href="https://colab.research.google.com/github/StevenVuong/MSc_Project/blob/master/p3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**This notebook aims to bring together the df and loaded files to implement them in a Deep Learning Model**

In [0]:
!pip install talos

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import pickle
from sklearn.model_selection import train_test_split
import nibabel as nb

In [0]:
# imports for keras model
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution3D, MaxPooling3D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils import np_utils, generic_utils
from keras.layers import LeakyReLU

# imports for automated parameter tuning
import talos as ta
from talos.metrics.keras_metrics import fmeasure_acc
from talos import live

In [19]:
from google.colab import drive

# mount google drive into google colab
drive.mount('/content/gdrive')

os.chdir('/content')

# go to where the data is
print (os.listdir())
os.chdir('gdrive/My Drive/msc_project')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
['.config', 'gdrive', 'sample_data']


**Load Dataframe and Pickle**

In [0]:
patient_df = pd.read_pickle('processed_patient_df.pkl')

with open('total_slices_all.pkl', 'rb') as f:
    total_slices, total_slices_info = pickle.load(f)

**Split data into training and test set**

In [66]:
from keras.utils import to_categorical

def get_y_values(total_slices_info):
  
  y_values = [s[2] for s in total_slices_info]

  # convert to 'keras friendly outputs'
  y_values = np.array(to_categorical(y_values, 2))
  
  return y_values

y_values = get_y_values(total_slices_info)

# print distribution of the array
num_controls = np.unique(y_values, return_counts=True)[1][0]
num_pd = np.unique(y_values, return_counts=True)[1][1]

pct_controls = (num_controls/ np.sum([num_controls, num_pd])) * 100
pct_pd = (num_pd / np.sum([num_controls, num_pd])) * 100
print ("%s%% Control and %s%% PD" % (pct_controls, pct_pd))

50.0% Control and 50.0% PD


In [93]:
from sklearn.model_selection import train_test_split

# split into training and test set
X_train, X_test, y_train, y_test = train_test_split(total_slices, y_values, test_size=0.2, shuffle=True)

np.shape(y_test) # how many y-outputs

(21, 2)

**Model Architecture**
Could potentially split data into chunks of 50 post processing (randomly sample 50 at a time and save that for our data set), train model with one epoch per chunk and save in batches, save, continue etc.. Then once that is done then validate on one chunk that is left out. Saving different parameters in our deep learning framework

In [0]:
# compile our model
# change architecture / layers /parameters, increase filters then decrease again
model = Sequential()

model.add(Convolution3D(filters=8, kernel_size=2, padding='same', input_shape=(200,200,160,1))) # or should activation be linear?
model.add(LeakyReLU(alpha=0.01)) # set to 0.01
model.add(MaxPooling3D(pool_size=2)) # pool_size=2

model.add(Convolution3D(filters=16, kernel_size=2))
model.add(LeakyReLU(alpha=0.01)) 
model.add(MaxPooling3D(pool_size=2))

model.add(Convolution3D(filters=32, kernel_size=3))
model.add(LeakyReLU())
model.add(MaxPooling3D(pool_size=2))

model.add(Convolution3D(filters=64, kernel_size=3))
model.add(LeakyReLU(alpha=0.01))
model.add(MaxPooling3D(pool_size=2))

model.add(Convolution3D(filters=128, kernel_size=2))
model.add(LeakyReLU(alpha=0.01))
model.add(MaxPooling3D(pool_size=2))

model.add(Convolution3D(filters=256, kernel_size=2))
model.add(LeakyReLU(alpha=0.01))
model.add(MaxPooling3D(pool_size=2))

model.add(Flatten())
model.add(Dense(512))
model.add(LeakyReLU(alpha=0.01))

model.add(Dense(2, activation='softmax'))

model.compile(optimizer=Adam(lr=0.00005), loss='categorical_crossentropy',metrics = ['accuracy']) # metrics=['categorical_accuracy']

In [76]:
# investigate what possible 'good' combinations are first..
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d_73 (Conv3D)           (None, 200, 200, 160, 8)  72        
_________________________________________________________________
leaky_re_lu_76 (LeakyReLU)   (None, 200, 200, 160, 8)  0         
_________________________________________________________________
max_pooling3d_70 (MaxPooling (None, 100, 100, 80, 8)   0         
_________________________________________________________________
conv3d_74 (Conv3D)           (None, 99, 99, 79, 16)    1040      
_________________________________________________________________
leaky_re_lu_77 (LeakyReLU)   (None, 99, 99, 79, 16)    0         
_________________________________________________________________
max_pooling3d_71 (MaxPooling (None, 49, 49, 39, 16)    0         
_________________________________________________________________
conv3d_75 (Conv3D)           (None, 47, 47, 37, 32)    13856     
__________

**RUNNING THE MODEL!**

In [98]:
model.fit(x=X_train, y=y_train, batch_size=5, epochs=3, verbose=1,
          validation_split=0.2, shuffle=True)

Train on 64 samples, validate on 17 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f98a07cfbe0>

In [101]:
score = model.evaluate(X_test[:20], y_test[:20], verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 0.10864386707544327
Test accuracy: 0.949999988079071


In [0]:
model.save('my_model.h5', overwrite=True)  # creates a HDF5 file 'my_model.h5'

Next Steps:
-  Add age and gender to FC layers
-  Add Normalisation
-  Add Bias and Kernel Regularisation (coefficient 0.001)
-  Add Dropout layers to last two layers (Keep coefficient of 0.45 and 0.5)

**Put into one super class which we can load our data (overwrite), fit and continue**

In [0]:
from keras.models import load_model
num_epochs = 3
batch_size = 10

batches_path = os.getcwd() + '/stored_batches'

for batch_pkl in os.listdir(batches_path):
  batch_pkl_path = batches_path + '/' + batch_pkl
  
  # load up the batch
  with open(batch_pkl_path, 'rb') as f:
    total_slices, total_slices_info = pickle.load(f)
    
  # get the corresponding y-values
  y_values = get_y_values(total_slices_info)
  
  # split into training and test set
  X_train, X_test, y_train, y_test = train_test_split(total_slices, y_values, test_size=0.2, shuffle=True)
  
  # fit to our existing model
  model.fit(x=X_train, y=y_train, batch_size=batch_size, epochs=num_epochs, verbose=1, # can modify batch size and epochs
          validation_data=(X_test, y_test), shuffle=True)

# creates a file and saves it (architecture, weights, optimizer)
model.save('my_model.h5', overwrite=True)  # creates a HDF5 file 'my_model.h5'
# model = load_model('my_model.h5')

**Attempt at auto tuning the parameters to our model**

In [0]:
def many_layers_architecture(x_train, y_train, x_val, y_val, params):
  '''This model has many layers of smaller kernel sizes which we can adjust'''
  
  model = Sequential()

  model.add(Convolution3D(filters=params['c1_filter'], kernel_size=params['k1_size']
                          , padding='same', input_shape=(200,200,160,1)))
  model.add(LeakyReLU(alpha=0.01)) # set to 0.01
  model.add(MaxPooling3D(pool_size=2, strides=1))

  model.add(Convolution3D(filters=params['c2_filter'], kernel_size=params['k1_size']))
  model.add(LeakyReLU(alpha=0.01)) 
  model.add(MaxPooling3D(pool_size=2, strides=1))

  model.add(Convolution3D(filters=params['c3_filter'], kernel_size=params['k2_size']))
  model.add(LeakyReLU(alpha=0.01))
  model.add(MaxPooling3D(pool_size=2, strides=1))

  model.add(Convolution3D(filters=params['c4_filter'], kernel_size=params['k2_size']))
  model.add(LeakyReLU(alpha=0.01))
  model.add(MaxPooling3D(pool_size=2, strides=1))

  model.add(Convolution3D(filters=params['c5_filter'], kernel_size=params['k3_size']))
  model.add(LeakyReLU(alpha=0.01))
  model.add(MaxPooling3D(pool_size=2, strides=1))

  model.add(Convolution3D(filters=params['c6_filter'], kernel_size=params['k3_size']))
  model.add(LeakyReLU(alpha=0.01))
  model.add(MaxPooling3D(pool_size=2, strides=1))

  model.add(Flatten())
  model.add(Dense(512))
  model.add(LeakyReLU(alpha=0.01))

  model.add(Dense(2, activation='softmax'))

  model.compile(optimizer=Adam(lr=0.00005), loss='categorical_crossentropy',metrics = ['accuracy']) # metrics=['categorical_accuracy']
  
  # https://github.com/MinhazPalasara/keras/blob/master/examples/shapes_3d_cnn.py
  history = model.fit(x=X_train, y=y_train, batch_size=10, epochs=10, verbose=1,
          validation_data=(X_test, y_test), shuffle=True)
  
  return history, model

p = {'c1_filter':[4,8],
    'c2_filter':[6,16],
    'c3_filter':[16,32],
    'c4_filter':[32,64],
    'c5_filter':[64,128],
    'c6_filter':[128,256],
    'd1_filter':[512,1024],
    'k1_size':[2,3],
    'k2_size':[3,4],
    'k3_size':[2]}

In [7]:
# https://github.com/autonomio/talos/blob/master/examples/Hyperparameter%20Optimization%20on%20Keras%20with%20Breast%20Cancer%20Data.ipynb
def breast_cancer_model(x_train, y_train, x_val, y_val, params):
    '''This model has many layers of smaller kernel sizes which we can adjust'''
    
    model = Sequential()
    model.add(Dense(params['first_neuron'], input_dim=x_train.shape[1],
                    activation=params['activation'],
                    kernel_initializer=params['kernel_initializer']))
    
    model.add(Dropout(params['dropout']))

    model.add(Dense(1, activation=params['last_activation'],
                    kernel_initializer=params['kernel_initializer']))
    
    model.compile(loss=params['losses'],
                  optimizer=params['optimizer'](),
                  metrics=['acc', fmeasure_acc])
    
    history = model.fit(x_train, y_train, 
                        validation_data=[x_val, y_val],
                        batch_size=params['batch_size'],
                        callbacks=[live()],
                        epochs=params['epochs'],
                        verbose=0)

    return history, model
  
# then we can go ahead and set the parameter space
p = {'first_neuron':[9,10,11],
     'hidden_layers':[0, 1, 2],
     'batch_size': [30],
     'epochs': [100],
     'dropout': [0],
     'kernel_initializer': ['uniform','normal'],
     'optimizer': [Nadam, Adam],
     'losses': [binary_crossentropy],
     'activation':[relu, elu],
     'last_activation': ['sigmoid']}

SyntaxError: ignored