<a href="https://colab.research.google.com/github/StevenVuong/MSc_Project/blob/master/v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# install dependencies
!pip install deepbrain; # semi-colon to hide the output
!pip install pydicom;



In [2]:
from google.colab import drive

# mount google drive into google colab
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
import os
    
# go to where the data is
print (os.listdir())
os.chdir('gdrive/My Drive/msc_project')
os.listdir()

['.config', 'gdrive', 'sample_data']


['t1_scan',
 'T1_SAG_SIEMEN_3T_CLEAN_5_29_2019.csv',
 'T1_SAG_SIEMEN_3T_CLEAN_1',
 'T1_SAG_SIEMEN_3T_CLEAN_5_29_2019.gsheet',
 'T1_SAG_SIEMEN_3T_CLEAN',
 'loaded_slices',
 'processed_augmented_slices']

In [4]:
import pandas as pd
# https://www.kaggle.com/sentdex/first-pass-through-data-w-3d-convnet
patients_file_dir = 'T1_SAG_SIEMEN_3T_CLEAN'

patients = os.listdir(patients_file_dir) # get all patients ID's in scan
patient_df = pd.read_csv('T1_SAG_SIEMEN_3T_CLEAN_5_29_2019.csv') # get dataframe too to cross reference

patient_df.head() # so we have a dataframe of our patients' data

Unnamed: 0,Image Data ID,Subject,Group,Sex,Age,Visit,Modality,Description,Type,Acq Date,Format,Downloaded
0,1130198,75422,GenCohort Unaff,M,73,1,MRI,MPRAGE GRAPPA,Original,11/13/2018,DCM,5/07/2019
1,1130190,75414,GenCohort Unaff,F,73,1,MRI,Sag MPRAGE GRAPPA,Original,12/13/2018,DCM,4/24/2019
2,1130191,75414,GenCohort Unaff,F,73,1,MRI,Sag MPRAGE GRAPPA,Original,12/13/2018,DCM,4/24/2019
3,1125041,74375,GenCohort Unaff,F,59,1,MRI,MPRAGE_GRAPPA,Original,9/06/2018,DCM,4/24/2019
4,1003469,72138,GenCohort Unaff,F,55,1,MRI,MPRAGE GRAPPA,Original,2/19/2018,DCM,4/24/2019


In [0]:
# Map GenCohort to regular PD and Controls
patient_df['Group'] = patient_df['Group'].replace({'GenCohort PD':'PD', 'GenCohort Unaff':'Control'})

In [0]:
def get_grappa_dir(path):
  # get the file ending with 'GRAPPA', would need to accomodate this for grappa also
  returning_path = None
  for next_path in os.listdir(path):
    if (next_path.split("_")[-1] == 'GRAPPA'): # for the t1 weighted
      returning_path = next_path
      return returning_path

In [0]:
def get_dcm_s(path):
  # get the path beginning with S, so doesn't clash with GZ File
  for next_path in os.listdir(path):
    if (next_path[0] == 'S'):
      return next_path

In [0]:
def get_img_no(path):
  # get the image identification numberm any image will do for this so take first
  image_number = None
  for image_file in os.listdir(path):
    image_number = int(image_file.split("_")[-1][1:-4]) # index to get the ID
   
  return image_number

In [0]:
def filename_sort(filename):
    
    # split by underlines and delimiter
    split_line = filename.split("_")
    int_return = int(split_line[-3])
    
    return int_return

In [10]:
import pydicom
import numpy as np

cwd = os.getcwd()
print ("Current Working Dir: %s " % cwd)

total_slices = [] # build all the slices
counter = 0

for patient in patients[:150]:
  try:
    # label = patient_df.get_value(patient, 'Subject') # cannot go by patient, must get the ID
    path = cwd + '/' + patients_file_dir +'/' + patient # get to the GRAPPA 
    path = path + '/' + get_grappa_dir(path)
    path = path + '/' + os.listdir(path)[-1] # get the most recent scan for patient
    path = path + '/' + get_dcm_s(path)

    # get information related around the image
    image_number = get_img_no(path)
    image_row = patient_df.loc[patient_df['Image Data ID'] == image_number] # relate to df
    image_sex = image_row.Sex.values[0]
    image_group = image_row.Group.values[0]
    image_age = image_row.Age.values[0]

    # create image object and append to total info
    image_info = [image_number, image_sex, image_group, image_age]

    # print ("Sex: %s, Age: %s, Group: %s " % (image_sex, image_age, image_group))

    # get files and sort them in order
    dcm_files = os.listdir(path)
    dcm_files = sorted(dcm_files, key=lambda filename: filename_sort(filename)) # some have length 3

    slices = []
    # loop through slices and build the array
    for dcm_file in dcm_files:
      path_to_file = path + '/' + dcm_file
      slices.append(pydicom.read_file(path_to_file).pixel_array)
    slices = np.array(slices)[15:175, :, :]
    
    if (np.shape(slices)[0] == 160): # only add if slices are 160
      total_slices.append([slices, image_info])
    
      # print for counter
      counter = counter+1
      print ("%d slices loaded" % counter)

      print (np.shape(slices)) # each patient has different number of slices, trim it to [15:175, 30:230, 30:230]
 
  except Exception as e:
    print ("No File Found: %s" % str(e))

Current Working Dir: /content/gdrive/My Drive/msc_project 
1 slices loaded
(160, 256, 240)
2 slices loaded
(160, 256, 240)
3 slices loaded
(160, 256, 240)
4 slices loaded
(160, 256, 240)
5 slices loaded
(160, 256, 240)
6 slices loaded
(160, 256, 240)
7 slices loaded
(160, 256, 240)
8 slices loaded
(160, 256, 240)
9 slices loaded
(160, 256, 240)
10 slices loaded
(160, 256, 240)
11 slices loaded
(160, 256, 240)
12 slices loaded
(160, 256, 240)
13 slices loaded
(160, 256, 240)
14 slices loaded
(160, 256, 240)
15 slices loaded
(160, 256, 240)
16 slices loaded
(160, 256, 240)
17 slices loaded
(160, 256, 240)
18 slices loaded
(160, 256, 240)
19 slices loaded
(160, 256, 240)
20 slices loaded
(160, 256, 240)
21 slices loaded
(160, 256, 240)
22 slices loaded
(160, 256, 240)
23 slices loaded
(160, 256, 240)
24 slices loaded
(160, 256, 240)
25 slices loaded
(160, 256, 240)
26 slices loaded
(160, 256, 240)
27 slices loaded
(160, 256, 240)
28 slices loaded
(160, 256, 240)
29 slices loaded
(160, 256

In [0]:
import pickle

In [0]:
# Save the loaded slices
with open('loaded_slices', "wb") as f:
    pickle.dump(total_slices, f)   

In [0]:
# load our slices
total_slices = pickle.load( open( "loaded_slices", "rb" ) )

In [14]:
from deepbrain import Extractor
import nibabel as nb

total_slices_processed = []
slice_info = []

for total_slice in total_slices:
  # deal with mixed slice information
  slices = total_slice[0]
  slice_info.append(total_slice[1])
  
  # transform into axial view
  slice_axial = slices.transpose((1,2,0))
  
  # initialise skull stripper
  ext = Extractor()

  # get probability of part of image being brain tissue or not
  prob = ext.run(slice_axial)
  mask = prob > 1e-3 # mask can be obtained as:
  slice_axial[~mask] = 0 # apply mask
  
  slice_axial = slice_axial[30:230, 30:230, :] # trim blank ones
  total_slices_processed.append(slice_axial) # add original
  
  # flip images and add to total processed arrays
  flipped_slices = [np.flip(sl,1) for sl in slice_axial]
  total_slices_processed.append(flipped_slices)
  slice_info.append(total_slice[1]) # add info twice
    
  print ("Regular Shape: %s " % (np.shape(slice_axial), ))
  print ("Flipped Shape: %s " % (np.shape(flipped_slices), ))

Instructions for updating:
Use tf.gfile.GFile.
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200, 160) 
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200, 160) 
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200, 160) 
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200, 160) 
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200, 160) 
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200, 160) 
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200, 160) 
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200, 160) 
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200, 160) 
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200, 160) 
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200, 160) 
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200, 160) 
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200, 160) 
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200, 160) 
Regular Shape: (200, 200, 160) 
Flipped Shape: (200, 200,

In [15]:
total_slices_processed = np.array(total_slices_processed) # turn into array
total_slices_processed = np.expand_dims(total_slices_processed, axis=4) # expand dimensions

np.shape(total_slices_processed)

(238, 200, 200, 160, 1)

In [0]:
# need to build the y-data set (PD or Healthy)
# then split into test and training set
# then try run through a basic model

In [17]:
from keras.utils import to_categorical

# build y-outputs
diagnosis = [s[2] for s in slice_info] # we got our y-values
diagnosis = [1 if s=='PD' else s for s in diagnosis]
diagnosis = [0 if s=='Control' else s for s in diagnosis]

y_output = to_categorical(diagnosis, 2) # convert to something categorical with keras util
y_output = np.array(y_output)

print (y_output)

[[0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 

Using TensorFlow backend.


In [18]:
from sklearn.model_selection import train_test_split

# split into training and test set
X_train, X_test, y_train, y_test = train_test_split(total_slices_processed, y_output, test_size=0.2, shuffle=True)

np.shape(X_train)

(190, 200, 200, 160, 1)

In [0]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution3D, MaxPooling3D
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils import np_utils, generic_utils
from keras.layers import LeakyReLU

In [20]:
model = Sequential()

model.add(Convolution3D(filters=8, kernel_size=2, strides=1, 
                        padding='same', input_shape=(200,200,160,1))) # or should activation be linear?
model.add(LeakyReLU(alpha=0.01)) # set to 0.01
model.add(MaxPooling3D())

model.add(Convolution3D(filters=16, kernel_size=2, strides=1, 
                        padding='same'))
model.add(LeakyReLU(alpha=0.01)) 
model.add(MaxPooling3D())

model.add(Convolution3D(filters=32, kernel_size=3, strides=1, 
                        padding='same'))
model.add(LeakyReLU(alpha=0.01))
model.add(MaxPooling3D())

model.add(Convolution3D(filters=64, kernel_size=3, strides=1, 
                        padding='same'))
model.add(LeakyReLU(alpha=0.01))
model.add(MaxPooling3D())

model.add(Convolution3D(filters=128, kernel_size=4, strides=1, 
                        padding='same'))
model.add(LeakyReLU(alpha=0.01))
model.add(MaxPooling3D())

model.add(Convolution3D(filters=256, kernel_size=4, strides=1, 
                        padding='same'))
model.add(LeakyReLU(alpha=0.01))
model.add(MaxPooling3D())

model.add(Flatten())

model.add(Dense(1024))
model.add(LeakyReLU(alpha=0.01))

model.add(Dense(2))

model.compile(optimizer=Adam(lr=0.00005), loss='categorical_crossentropy',metrics = ['categorical_accuracy']) # metrics=['categorical_accuracy']

Instructions for updating:
Colocations handled automatically by placer.


In [21]:
# optimising with: https://towardsdatascience.com/hyperparameter-optimization-with-keras-b82e6364ca53
print (model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d_1 (Conv3D)            (None, 200, 200, 160, 8)  72        
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 200, 200, 160, 8)  0         
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 100, 100, 80, 8)   0         
_________________________________________________________________
conv3d_2 (Conv3D)            (None, 100, 100, 80, 16)  1040      
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 100, 100, 80, 16)  0         
_________________________________________________________________
max_pooling3d_2 (MaxPooling3 (None, 50, 50, 40, 16)    0         
_________________________________________________________________
conv3d_3 (Conv3D)            (None, 50, 50, 40, 32)    13856     
__________

In [2]:
model.fit(x=X_train, y=y_train, batch_size=10, epochs=5, verbose=1,
          validation_data=(X_test, y_test), shuffle=True)

NameError: ignored

In [0]:
# Save the trained models to evaluate in future (with validation set?)
with open('trained_model', "wb") as f:
    pickle.dump(model, f)  

In [0]:
# load our model
total_slices = pickle.load( open( "trained_model", "rb" ) )

In [1]:
# https://github.com/MinhazPalasara/keras/blob/master/examples/shapes_3d_cnn.py
score = model.evaluate(X_test, y_test, batch_size=None)
print('Test score:', score[0])
print('Test accuracy:', score[1])

NameError: ignored

In [0]:
np.shape(np.squeeze(total_slices_processed, axis=4))

In [0]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets # interactive plots
import matplotlib.pyplot as plt
%matplotlib inline

total_slices_processed = np.squeeze(total_slices_processed, axis=4)
def g(i): # basic slideshow plot to get an idea of the effectiveness of the mask itself
    plt.figure(figsize=(15,8)) # make plot larger
    plt.imshow(total_slices_processed[1][i])
    plt.show()
    return None
  
interact(g, i=widgets.IntSlider(min=0,max=(len(total_slices_processed[1])-1),step=1,value=65)); # plots our axial view, this is it