In [7]:
# standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import pickle
from sklearn.model_selection import train_test_split
import seaborn as sns

# imports for keras model
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution3D, MaxPooling3D, Convolution1D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils import np_utils, generic_utils
from keras.layers import LeakyReLU
from keras import regularizers

##### Port to our downloads

In [8]:
os.chdir('C:\\Users\\steve\\Downloads')

##### Load the dataframe and slices

In [9]:
patient_df = pd.read_pickle('processed_patient_df.pkl')

with open('total_slices_all.pkl', 'rb') as f:
    total_slices, total_slices_info = pickle.load(f)

In [10]:
np.shape(total_slices)

(102, 200, 200, 160, 1)

In [11]:
# establish control and pd df's
df_control = patient_df[patient_df.Group == 0] 
df_pd = patient_df[patient_df.Group == 1] 

patient_df.head()

Unnamed: 0,Image Data ID,Subject,Group,Sex,Age,Visit,Modality,Description,Type,Acq Date,Format,Downloaded
197,353477,3750,0,M,54,5,MRI,MPRAGE GRAPPA,Original,10/10/2012,DCM,
198,368584,3638,1,M,66,1,MRI,SAG T1 3D MPRAGE,Original,4/02/2013,DCM,
199,363979,3637,0,M,57,1,MRI,SAG T1 3D MPRAGE,Original,2/27/2013,DCM,
200,362046,3636,0,M,64,1,MRI,SAG T1 3D MPRAGE,Original,2/22/2013,DCM,
201,362045,3635,0,M,57,1,MRI,SAG T1 3D MPRAGE,Original,2/07/2013,DCM,


##### Split into training and test set

In [12]:
from keras.utils import to_categorical

def get_y_values(total_slices_info):
  
  y_values = [s[2] for s in total_slices_info]

  # convert to 'keras friendly outputs'
  y_values = np.array(to_categorical(y_values, 2))
  
  return y_values

y_values = get_y_values(total_slices_info)

# print distribution of the array
num_controls = np.unique(y_values, return_counts=True)[1][0]
num_pd = np.unique(y_values, return_counts=True)[1][1]

pct_controls = (num_controls/ np.sum([num_controls, num_pd])) * 100
pct_pd = (num_pd / np.sum([num_controls, num_pd])) * 100
print ("%s%% Control and %s%% PD" % (pct_controls, pct_pd))

50.0% Control and 50.0% PD


In [13]:
from sklearn.model_selection import train_test_split

# split into training and test set
X_train, X_test, y_train, y_test = train_test_split(total_slices, y_values, test_size=0.2, shuffle=True)

np.shape(total_slices) # how many y-outputs

(102, 200, 200, 160, 1)

##### Create model architecture

In [14]:
# compile our model
model = Sequential()

model.add(Convolution3D(filters=8, kernel_size=2, padding='same', 
                        input_shape=(200,200,160,1), kernel_regularizer=regularizers.l2(0.05),
                       use_bias = True)) # padding on first one only?
model.add(LeakyReLU(alpha=0.01)) # set to 0.01
model.add(MaxPooling3D(pool_size=2)) # pool_size=2
## model.add(BatchNormalization(momentum=0.99)) # something to consider next level, moving mean and variance

model.add(Convolution3D(filters=16, kernel_size=2))
model.add(LeakyReLU(alpha=0.01)) 
model.add(MaxPooling3D(pool_size=2))
## model.add(BatchNormalization(momentum=0.99))

model.add(Convolution3D(filters=32, kernel_size=3))
model.add(LeakyReLU())
model.add(MaxPooling3D(pool_size=2))
## model.add(BatchNormalization(momentum=0.99))

model.add(Convolution3D(filters=64, kernel_size=3))
model.add(LeakyReLU(alpha=0.01))
model.add(MaxPooling3D(pool_size=2))
## model.add(BatchNormalization(momentum=0.99))

model.add(Convolution3D(filters=128, kernel_size=2))
model.add(LeakyReLU(alpha=0.01))
model.add(MaxPooling3D(pool_size=2))
## model.add(BatchNormalization(momentum=0.99))

model.add(Convolution3D(filters=256, kernel_size=2))
model.add(LeakyReLU(alpha=0.01))
model.add(MaxPooling3D(pool_size=2))
## model.add(BatchNormalization(momentum=0.99))

model.add(Flatten())
# model.add(Dropout(0.45)) # add dropout to prevent overfitting
model.add(Dense(512))
model.add(LeakyReLU(alpha=0.01))

# model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))

model.compile(optimizer=Adam(lr=0.00005), loss='categorical_crossentropy',metrics = ['accuracy']) # metrics=['categorical_accuracy']
          
# experiment with literally everything?... Random Search with optimisers

##### Running the model

In [None]:
for run in range(1):
    hist = model.fit(x=X_train, y=y_train, batch_size=5, epochs=1, verbose=1,
              validation_split=0.2, shuffle=True)

Instructions for updating:
Use tf.cast instead.
Train on 64 samples, validate on 17 samples
Epoch 1/12
 5/64 [=>............................] - ETA: 31:18 - loss: 0.0760 - acc: 1.0000