<a href="https://colab.research.google.com/github/StevenVuong/MSc_Project/blob/master/p3_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**This notebook aims to modularise some of the input functions in P3 to suit the databse**

In [0]:
# Standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import pickle
import seaborn as sns

# deep learning imports
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution3D, MaxPooling3D, Convolution1D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils import np_utils, generic_utils, to_categorical
from keras.layers import LeakyReLU
from keras import regularizers

# to split our dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# to mount our drive
from google.colab import drive

In [0]:
# mount google drive
drive.mount('/content/gdrive')

os.chdir('/content')

# go to where the data is
print (os.listdir())
os.chdir('gdrive/My Drive/msc_project')

**Load our dataframe**

In [0]:
patient_df = pd.read_pickle('processed_patient_df_TRY2.pkl')

In [0]:
# establish control and pd df's
df_control = patient_df[patient_df.Group == 0] 
df_pd = patient_df[patient_df.Group == 1] 

patient_df.head()

**Load our file**

In [0]:
# for each set of training data in our pickle
# load
# process
# train our model in this
# 100 Test, 15% of 600 Validation, 85% of 600 Training. 700 Slices in total
# Save our model, tweak and evaluate etc..

In [0]:
# gets our y-values and converts to keras, one hot encoded outputs
def get_y_values(total_slices_info):
  
  y_values = [s[2] for s in total_slices_info]

  y_values = np.array(to_categorical(y_values, 2))
  
  return y_values

In [0]:
def initialise_model():

  # compile our model
  model = Sequential()

  model.add(Convolution3D(filters=8, kernel_size=2, padding='same', input_shape=(200,200,160,1)))
            #,kernel_regularizer=regularizers.l2(0.05),use_bias = True)) # padding on first one only?
  model.add(LeakyReLU(alpha=0.01)) # set to 0.01
  model.add(MaxPooling3D(pool_size=2)) # pool_size=2
  ## model.add(BatchNormalization(momentum=0.99)) # something to consider next level, moving mean and variance

  model.add(Convolution3D(filters=16, kernel_size=2))
  model.add(LeakyReLU(alpha=0.01)) 
  model.add(MaxPooling3D(pool_size=2))
  ## model.add(BatchNormalization(momentum=0.99))

  model.add(Convolution3D(filters=32, kernel_size=3))
  model.add(LeakyReLU())
  model.add(MaxPooling3D(pool_size=2))
  ## model.add(BatchNormalization(momentum=0.99))

  model.add(Convolution3D(filters=64, kernel_size=3))
  model.add(LeakyReLU(alpha=0.01))
  model.add(MaxPooling3D(pool_size=2))
  ## model.add(BatchNormalization(momentum=0.99))

  model.add(Convolution3D(filters=128, kernel_size=2))
  model.add(LeakyReLU(alpha=0.01))
  model.add(MaxPooling3D(pool_size=2))
  ## model.add(BatchNormalization(momentum=0.99))

  model.add(Convolution3D(filters=256, kernel_size=2))
  model.add(LeakyReLU(alpha=0.01))
  model.add(MaxPooling3D(pool_size=2))
  ## model.add(BatchNormalization(momentum=0.99))

  model.add(Flatten())
  # model.add(Dropout(0.45)) # add dropout to prevent overfitting
  model.add(Dense(512))
  model.add(LeakyReLU(alpha=0.01))

  # model.add(Dropout(0.2))
  model.add(Dense(2, activation='softmax'))

  model.compile(optimizer=Adam(lr=0.00005), loss='categorical_crossentropy',metrics = ['accuracy']) # metrics=['categorical_accuracy']

  # experiment with literally everything?... Random Search with optimisers
            
  return model

In [9]:
# train on all, perhaps have different conditions for when we reach our last one
total_slices_train = os.listdir('stored_batches')

# Initialise Model!
model = initialise_model()

# load and fit our model for our instances
for tsf in total_slices_train:
  pkl_path = 'stored_batches/'+tsf
  
  # load pickle file
  with open(pkl_path, 'rb') as f:
    total_slices, total_slices_info = pickle.load(f)
    
  # convert to numpy array
  total_slices = np.array(total_slices)
    
  # process y-values
  y_values = get_y_values(total_slices_info)
  
  # fit our model ## can play with batch size
  model.fit(x=total_slices, y=y_values, batch_size=1, epochs=1, verbose=1, validation_split=0.1, shuffle=True)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Train on 99 samples, validate on 11 samples
Epoch 1/1
Train on 91 samples, validate on 11 samples
Epoch 1/1
Train on 90 samples, validate on 10 samples
Epoch 1/1
Train on 95 samples, validate on 11 samples
Epoch 1/1
Train on 90 samples, validate on 10 samples
Epoch 1/1
Train on 91 samples, validate on 11 samples
Epoch 1/1


In [0]:
# save the model to use in future
model.save('my_model_1.h5', overwrite=True)  # creates a HDF5 file 'my_model.h5'

In [0]:
from keras.models import load_model

# load and run a saved model
model = load_model('my_model_1.h5') # such a beastly model here..

In [0]:
# Now load the test file and run the prediction against it!
total_slices_test = os.listdir('stored_batches')[6]
pkl_path_test = 'stored_batches/'+total_slices_test

# load pickle file
with open(pkl_path_test, 'rb') as f:
  total_slices, total_slices_info = pickle.load(f)
  
# convert to numpy array
total_slices_test = np.array(total_slices)

# process y-values
y_true_test = get_y_values(total_slices_info)

# split into 10, predict and build y-values as we go along
total_slices_test_chunks = np.array_split(total_slices_test, 10)

y_predictions = []
# run test chunks and get predictions.. Doesn't work because goddamn everything is 1
for test_chunk in total_slices_test_chunks:
  
  # make a prediction
  y_chunk_pred = model.predict_classes(test_chunk)
  y_predictions.append(y_chunk_pred)

In [0]:
# concatenate and turn to keras friendly output
y_predictions = np.concatenate(y_predictions)
y_predictions = np.array(to_categorical(y_predictions, 2))

In [0]:
# get accuracy score
accuracy_score(y_true_test, y_predictions, normalize=True, sample_weight=None)

In [0]:
print ("Accuracy Score: %s" % accura
cy_score)
