<a href="https://colab.research.google.com/github/StevenVuong/MSc_Project/blob/master/p3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**This notebook aims to bring together the df and loaded files to implement them in a Deep Learning Model**

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import pydicom
import pickle
from deepbrain import Extractor
from sklearn.model_selection import train_test_split
import nibabel as nb

In [0]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution3D, MaxPooling3D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils import np_utils, generic_utils
from keras.layers import LeakyReLU

In [36]:
from google.colab import drive

# mount google drive into google colab
drive.mount('/content/gdrive')

os.chdir('/content')

# go to where the data is
print (os.listdir())
os.chdir('gdrive/My Drive/msc_project')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
['.config', 'gdrive', 'sample_data']


**Load Dataframe and Pickle**

In [0]:
patient_df = pd.read_pickle('processed_patient_df.pkl')

with open('total_slices_all.pkl', 'rb') as f:
    total_slices, total_slices_info = pickle.load(f)

**Split data into training and test set**

In [38]:
from keras.utils import to_categorical

def get_y_values(total_slices_info):
  
  y_values = [s[2] for s in total_slices_info]

  # convert to 'keras friendly outputs'
  y_values = np.array(to_categorical(y_values, 2))
  
  return y_values

y_values = get_y_values(total_slices_info)

# print distribution of the array
num_controls = np.unique(y_values, return_counts=True)[1][0]
num_pd = np.unique(y_values, return_counts=True)[1][1]

pct_controls = (num_controls/ np.sum([num_controls, num_pd])) * 100
pct_pd = (num_pd / np.sum([num_controls, num_pd])) * 100
print ("%s%% Control and %s%% PD" % (pct_controls, pct_pd))

50.0% Control and 50.0% PD


In [39]:
from sklearn.model_selection import train_test_split

# split into training and test set
X_train, X_test, y_train, y_test = train_test_split(total_slices, y_values, test_size=0.2, shuffle=True)

np.shape(X_train)

(8, 200, 200, 160, 1)

**Model Architecture**
Could potentially split data into chunks of 50 post processing (randomly sample 50 at a time and save that for our data set), train model with one epoch per chunk and save in batches, save, continue etc.. Then once that is done then validate on one chunk that is left out. Saving different parameters in our deep learning framework

In [0]:
model = Sequential()

model.add(Convolution3D(filters=8, kernel_size=2, strides=1, padding='same', input_shape=(200,200,160,1))) # or should activation be linear?
model.add(LeakyReLU(alpha=0.01)) # set to 0.01
model.add(MaxPooling3D())

model.add(Convolution3D(filters=16, kernel_size=2, strides=1, padding='same'))
model.add(LeakyReLU(alpha=0.01)) 
model.add(MaxPooling3D())

model.add(Convolution3D(filters=32, kernel_size=3, strides=1, padding='same'))
model.add(LeakyReLU(alpha=0.01))
model.add(MaxPooling3D())

model.add(Convolution3D(filters=64, kernel_size=3, strides=1, padding='same'))
model.add(LeakyReLU(alpha=0.01))
model.add(MaxPooling3D())

model.add(Convolution3D(filters=128, kernel_size=4, strides=1, padding='same'))
model.add(LeakyReLU(alpha=0.01))
model.add(MaxPooling3D())

model.add(Convolution3D(filters=256, kernel_size=4, strides=1, padding='same'))
model.add(LeakyReLU(alpha=0.01))
model.add(MaxPooling3D())

model.add(Flatten())
model.add(Dense(512))
model.add(LeakyReLU(alpha=0.01))

model.add(Dense(2, activation='softmax'))

model.compile(optimizer=Adam(lr=0.00005), loss='categorical_crossentropy',metrics = ['accuracy']) # metrics=['categorical_accuracy']

In [33]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d_7 (Conv3D)            (None, 200, 200, 160, 8)  72        
_________________________________________________________________
leaky_re_lu_8 (LeakyReLU)    (None, 200, 200, 160, 8)  0         
_________________________________________________________________
max_pooling3d_7 (MaxPooling3 (None, 100, 100, 80, 8)   0         
_________________________________________________________________
conv3d_8 (Conv3D)            (None, 100, 100, 80, 16)  1040      
_________________________________________________________________
leaky_re_lu_9 (LeakyReLU)    (None, 100, 100, 80, 16)  0         
_________________________________________________________________
max_pooling3d_8 (MaxPooling3 (None, 50, 50, 40, 16)    0         
_________________________________________________________________
conv3d_9 (Conv3D)            (None, 50, 50, 40, 32)    13856     
__________

**RUNNING THE MODEL!**

In [0]:
# https://github.com/MinhazPalasara/keras/blob/master/examples/shapes_3d_cnn.py
model.fit(x=X_train, y=y_train, batch_size=1, epochs=3, verbose=1,
          validation_data=(X_test, y_test), shuffle=True)

score = model.evaluate(x_test, y_test, verbose=1)

**Put into one super class which we can load our data (overwrite), fit and continue**

In [0]:
from keras.models import load_model
num_epochs = 3
batch_size = 10

batches_path = os.getcwd() + '/stored_batches'

for batch_pkl in os.listdir(batches_path):
  batch_pkl_path = batches_path + '/' + batch_pkl
  
  # load up the batch
  with open(batch_pkl_path, 'rb') as f:
    total_slices, total_slices_info = pickle.load(f)
    
  # get the corresponding y-values
  y_values = get_y_values(total_slices_info)
  
  # split into training and test set
  X_train, X_test, y_train, y_test = train_test_split(total_slices, y_values, test_size=0.2, shuffle=True)
  
  # fit to our existing model
  model.fit(x=X_train, y=y_train, batch_size=batch_size, epochs=num_epochs, verbose=1, # can modify batch size and epochs
          validation_data=(X_test, y_test), shuffle=True)

# creates a file and saves it (architecture, weights, optimizer)
model.save('my_model.h5', overwrite=True)  # creates a HDF5 file 'my_model.h5'
# model = load_model('my_model.h5')