<a href="https://colab.research.google.com/github/StevenVuong/MSc_Project/blob/master/p3_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**This notebook aims to modularise some of the input functions in P3 to suit the databse**

In [0]:
# Standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import pickle
import seaborn as sns

# deep learning imports
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution3D, MaxPooling3D, Convolution1D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils import np_utils, generic_utils, to_categorical
from keras.layers import LeakyReLU
from keras import regularizers

# to split our dataset
from sklearn.model_selection import train_test_split

# to mount our drive
from google.colab import drive

In [3]:
# mount google drive
drive.mount('/content/gdrive')

os.chdir('/content')

# go to where the data is
print (os.listdir())
os.chdir('gdrive/My Drive/msc_project')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive
['.config', 'gdrive', 'sample_data']


**Load our dataframe**

In [0]:
patient_df = pd.read_pickle('processed_patient_df_TRY2.pkl')

In [5]:
# establish control and pd df's
df_control = patient_df[patient_df.Group == 0] 
df_pd = patient_df[patient_df.Group == 1] 

patient_df.head()

Unnamed: 0,Image Data ID,Subject,Group,Sex,Age,Visit,Modality,Description,Type,Acq Date,Format,Downloaded
0,1130198,75422,0,M,73,1,MRI,MPRAGE GRAPPA,Original,11/13/2018,DCM,5/07/2019
2,1130191,75414,0,F,73,1,MRI,Sag MPRAGE GRAPPA,Original,12/13/2018,DCM,4/24/2019
3,1125041,74375,0,F,59,1,MRI,MPRAGE_GRAPPA,Original,9/06/2018,DCM,4/24/2019
4,1003469,72138,0,F,55,1,MRI,MPRAGE GRAPPA,Original,2/19/2018,DCM,4/24/2019
5,1119693,71935,1,M,66,1,MRI,MPRAGE GRAPPA,Original,4/03/2018,DCM,4/24/2019


**Load our file**

In [0]:
# for each set of training data in our pickle
# load
# process
# train our model in this
# 100 Test, 15% of 600 Validation, 85% of 600 Training. 700 Slices in total
# Save our model, tweak and evaluate etc..

In [0]:
# gets our y-values and converts to keras, one hot encoded outputs
def get_y_values(total_slices_info):
  
  y_values = [s[2] for s in total_slices_info]

  y_values = np.array(to_categorical(y_values, 2))
  
  return y_values

In [0]:
def initialise_model():

  # compile our model
  model = Sequential()

  model.add(Convolution3D(filters=8, kernel_size=2, padding='same', input_shape=(200,200,160,1), kernel_regularizer=regularizers.l2(0.05),use_bias = True)) # padding on first one only?
  model.add(LeakyReLU(alpha=0.01)) # set to 0.01
  model.add(MaxPooling3D(pool_size=2)) # pool_size=2
  ## model.add(BatchNormalization(momentum=0.99)) # something to consider next level, moving mean and variance

  model.add(Convolution3D(filters=16, kernel_size=2))
  model.add(LeakyReLU(alpha=0.01)) 
  model.add(MaxPooling3D(pool_size=2))
  ## model.add(BatchNormalization(momentum=0.99))

  model.add(Convolution3D(filters=32, kernel_size=3))
  model.add(LeakyReLU())
  model.add(MaxPooling3D(pool_size=2))
  ## model.add(BatchNormalization(momentum=0.99))

  model.add(Convolution3D(filters=64, kernel_size=3))
  model.add(LeakyReLU(alpha=0.01))
  model.add(MaxPooling3D(pool_size=2))
  ## model.add(BatchNormalization(momentum=0.99))

  model.add(Convolution3D(filters=128, kernel_size=2))
  model.add(LeakyReLU(alpha=0.01))
  model.add(MaxPooling3D(pool_size=2))
  ## model.add(BatchNormalization(momentum=0.99))

  model.add(Convolution3D(filters=256, kernel_size=2))
  model.add(LeakyReLU(alpha=0.01))
  model.add(MaxPooling3D(pool_size=2))
  ## model.add(BatchNormalization(momentum=0.99))

  model.add(Flatten())
  # model.add(Dropout(0.45)) # add dropout to prevent overfitting
  model.add(Dense(512))
  model.add(LeakyReLU(alpha=0.01))

  # model.add(Dropout(0.2))
  model.add(Dense(2, activation='softmax'))

  model.compile(optimizer=Adam(lr=0.00005), loss='categorical_crossentropy',metrics = ['accuracy']) # metrics=['categorical_accuracy']

  # experiment with literally everything?... Random Search with optimisers
            
  return model

In [8]:
# use 6 files to train, 7th we test our model on
total_slices_train = os.listdir('stored_batches')[:6]
total_slices_test = os.listdir('stored_batches')[6]

# Initialise Model!
model = initialise_model()

# load and fit our model for our instances
for tsf in total_slices_train:
  pkl_path = os.listdir('stored_batches/'+tsf)
  
  # load pickle file
  with open('total_slices_all.pkl', 'rb') as f:
    total_slices, total_slices_info = pickle.load(f)
    
  # process y-values
  y_values = get_y_values(total_slices_info)
  
  # fit our model ## can play with batch size
  model.fit(x=total_slices, y=y_values, batch_size=5, epochs=1, verbose=1,
          validation_split=0.15, shuffle=True)

['total_slices_batch0.pkl',
 'total_slices_batch1.pkl',
 'total_slices_batch2.pkl',
 'total_slices_batch3.pkl',
 'total_slices_batch4.pkl',
 'total_slices_batch5.pkl',
 'total_slices_batch6.pkl']