<a href="https://colab.research.google.com/github/StayFrostea/LearningML/blob/main/Roszell_Thesis_CT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Loading in the images from goodle drive

In [None]:
## Loading the google drive where I stored the MOSMEDDATA files
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
## Paths for the data

normal_path = '/content/drive/MyDrive/Colab Notebooks/Data/Keras CT'
abnormal_path = '/content/drive/MyDrive/Colab Notebooks/Data/Keras CT'

normal_path_output = '/content/drive/MyDrive/Colab Notebooks/Data/Keras CT/output'
abnormal_path_output = '/content/drive/MyDrive/Colab Notebooks/Data/Keras CT/output'
len(normal_path)

52

## Splitting the "files" into seperate 

In [None]:
## A tool for spliting the image files before processing

!pip install split-folders 

Collecting split-folders
  Downloading split_folders-0.4.3-py3-none-any.whl (7.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.4.3


In [None]:
## Next up is to load the images into seperated folders
## I want 90% of them to be invoplved in the train/validate split
## Then I want 10% of them to never be touched by the model till I predict
## I want to also split them before I slice them down into 2d chunks

##import splitfolders

##splitfolders.ratio(normal_path, output=normal_path_output, seed=1337, ratio=(0.8, 0.2))
##splitfolders.ratio(abnormal_path, output=abnormal_path_output, seed=1337, ratio=(0.8, 0.2))

In [None]:
## Check how many files got split up
import os

path, dirs, files = next(os.walk(abnormal_path_output + '/val/class1'))
file_count = len(files)
file_count

20

In [None]:
path, dirs, files = next(os.walk(abnormal_path_output + '/train/class1'))
file_count = len(files)
file_count

80

## Now we are ready to load the files into the notebook as NifTi images

In [None]:
import numpy as np
import nibabel as nib
from nibabel.testing import data_path
from scipy import ndimage

In [None]:
def read_NifTi(fp):
    scan = nib.load(fp)
    scan = scan.get_fdata()
    return scan

In [None]:
def resizeVolume(vol):

    ## desired
    d_depth = 64
    d_width = 128
    d_height = 128

    ## current
    c_depth = vol.shape[-1]
    c_width = vol.shape[0]
    c_height = vol.shape[1]

    ## factor to change by
    d_factor = d_depth/c_depth
    w_factor = d_width/c_width
    h_factor = d_height/c_height

    ## Adjust proper rotation
    vol = ndimage.rotate(vol, 90, reshape = False)

    ## apply the factors
    vol = ndimage.zoom(vol, (w_factor, h_factor, d_factor), order = 1)

    return vol

In [None]:
def normalizeVolume(vol):
    min = -1000
    max = 400
    vol[vol < min] = min
    vol[vol > max] = max
    vol = (vol - min) / (max - min)
    vol = vol.astype("float32")
    return vol

In [None]:
def processVolume(path):
    volume = read_NifTi(path)
    volume = normalizeVolume(volume)
    volume = resizeVolume(volume)
    return volume

In [None]:
normal_scan_paths = [
    os.path.join(os.getcwd(), normal_path_output + '/train/class1', x)
    for x in os.listdir(normal_path_output + '/train/class1')
]

abnormal_scan_paths = [
    os.path.join(os.getcwd(), abnormal_path_output + '/train/class1', x)
    for x in os.listdir(abnormal_path_output + '/train/class1')
]

In [None]:
## Normal
normal_volumes = np.array([processVolume(path) for path in normal_scan_paths])
normal_volume_labels = np.array([0 for _ in range(len(normal_volumes))])

In [None]:
## abNormal
abnormal_volumes = np.array([processVolume(path) for path in abnormal_scan_paths])
abnormal_volume_labels = np.array([1 for _ in range(len(abnormal_volumes))])

In [None]:
print("CT scans with normal lung tissue: " + str(len(normal_scan_paths)))
print("CT scans with abnormal lung tissue: " + str(len(abnormal_scan_paths)))

CT scans with normal lung tissue: 80
CT scans with abnormal lung tissue: 80


## Now we can split the images into training and validation in order to train the model

In [None]:
## Example way
## The [:60] means up to index 60
## Therefore we will have a 60/20 split for train/val
X_train = np.concatenate((abnormal_volumes[:60], normal_volumes[:60]), axis=0)
y_train = np.concatenate((abnormal_volume_labels[:60], normal_volume_labels[:60]), axis=0)

X_val = np.concatenate((abnormal_volumes[60:], normal_volumes[60:]), axis=0)
y_val = np.concatenate((abnormal_volume_labels[60:], normal_volume_labels[60:]), axis=0)

## SK way

##from sklearn.model_selection import train_test_split

##vol_data, vol_labels = np.arange(10).reshape((5, 2)), range(5)

##X_train, y_train, X_val, y_val = train_test_split(vol_data, vol_labels, test_size=0.20, random_state=42)
print( 
    
"Number of samples in train and validation are %d and %d."
    % (X_train.shape[0], X_val.shape[0])
)

Number of samples in train and validation are 120 and 40.


## This is where I will put preprocessing.

In [None]:
import random
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

def rotate(volume):

    def scipy_rotate(volume):
        # define some rotation angles
        angles = [-20, -10, -5, 5, 10, 20]
        # pick angles at random
        angle = random.choice(angles)
        # rotate volume
        volume = ndimage.rotate(volume, angle, reshape=False)
        volume[volume < 0] = 0
        volume[volume > 1] = 1
        return volume

    augmented_volume = tf.numpy_function(scipy_rotate, [volume], tf.float32)
    return augmented_volume

## This is to expand dimensions by adding size 1 onto the added dimension
##def expandDims(volume):
  ##volume = tf.expand_dims(volume, axis=3)
  ##return volume

## We rotate at random to remove the orientation effect on the model
def train_preprocess(volume, label):
  volume = rotate(volume)
  volume = tf.expand_dims(volume, axis=3)
  return volume, label

## No need to rotate the validation set
def valid_preprocess(volume, label):
  volume = tf.expand_dims(volume, axis=3)
  return volume, label

## Using tensorflows automated data loaders

In [None]:
## Use a library called tf.data.Dataset.from_tensor_slice

train_loader = tf.data.Dataset.from_tensor_slices((X_train,y_train))

valid_loader = tf.data.Dataset.from_tensor_slices((X_val,y_val))
X_val.shape

(40, 128, 128, 64)

In [None]:
batch_size = 3

train_dataset = (
    train_loader.shuffle(len(X_train))
    .map(train_preprocess)
    .batch(batch_size)
    .prefetch(3)
)

validation_dataset = (
    valid_loader.shuffle(len(X_val))
    .map(valid_preprocess)
    .batch(batch_size)
    .prefetch(3)
)

## Finally the model

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv3D, MaxPooling3D, Dropout, BatchNormalization
import matplotlib.pyplot as plt

In [None]:
def buildModel():

  inputs = keras.Input((128, 128, 64, 1))

  x = layers.Conv3D(filters=64, kernel_size=3, activation="relu")(inputs)
  x = layers.MaxPool3D(pool_size=2)(x)
  x = layers.BatchNormalization()(x)

  x = layers.Conv3D(filters=64, kernel_size=3, activation="relu")(x)
  x = layers.MaxPool3D(pool_size=2)(x)
  x = layers.BatchNormalization()(x)

  x = layers.Conv3D(filters=128, kernel_size=3, activation="relu")(x)
  x = layers.MaxPool3D(pool_size=2)(x)
  x = layers.BatchNormalization()(x)

  x = layers.Conv3D(filters=256, kernel_size=3, activation="relu")(x)
  x = layers.MaxPool3D(pool_size=2)(x)
  x = layers.BatchNormalization()(x)

  x = layers.GlobalAveragePooling3D()(x)
  x = layers.Dense(units=512, activation="relu")(x)
  x = layers.Dropout(0.3)(x)

  outputs = layers.Dense(units=1, activation="sigmoid")(x)

  model = keras.Model(inputs, outputs, name="CT_CNN_3D")
  return model

model = buildModel()
model.summary()

Model: "CT_CNN_3D"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 128, 128, 64, 1)] 0         
_________________________________________________________________
conv3d (Conv3D)              (None, 126, 126, 62, 64)  1792      
_________________________________________________________________
max_pooling3d (MaxPooling3D) (None, 63, 63, 31, 64)    0         
_________________________________________________________________
batch_normalization (BatchNo (None, 63, 63, 31, 64)    256       
_________________________________________________________________
conv3d_1 (Conv3D)            (None, 61, 61, 29, 64)    110656    
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 30, 30, 14, 64)    0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 30, 30, 14, 64)    25

In [None]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    0.0001, decay_steps=100000, decay_rate=0.96, staircase=True
)

model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
              optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule),
              metrics = keras.metrics.BinaryAccuracy(),
              )

In [None]:
## Defining 
checkpoint_cb = keras.callbacks.ModelCheckpoint(
    "3D_CT_classification.h5", save_best_only=True
)
early_stopping_cb = keras.callbacks.EarlyStopping(monitor='loss', patience=15)

In [None]:
## How man runs
epochs = 20

In [None]:
## Training!!!!

model.fit(train_dataset,
          validation_data=validation_dataset,
          epochs=epochs,
          shuffle=True,
          verbose='auto',
          callbacks = [ checkpoint_cb , early_stopping_cb],
          )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20


<keras.callbacks.History at 0x7f833029f390>