# CNN-based Brain Tumour Segmentation Network
## Import packages
Please make sure you have all the required packages installed. 

In [3]:
import os
import random
import shutil
import numpy as np
import nibabel as nib
import matplotlib.pyplot as plt
import tensorflow
import cv2
import keras
import ipywidgets as widgets
import keras_tuner as kt


from matplotlib.widgets import Slider
from ipywidgets import interact, interactive, fixed, interact_manual

from keras import layers
from keras.layers import *
from keras.models import *
# from keras.applications.vgg16 import VGG16
from keras.applications.xception import Xception
from keras.optimizers import Adam
from keras.models import load_model
from keras.callbacks import EarlyStopping
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

## Visualise MRI Volume Slices and Segmentation Maps
Each MRI image contains information about a three-dimensional (3D) volume of space. An MRI image is composed of a number of voxels, which is like pixels in 2D images. Here try to visualise the axial plane (usually has a higher resolution) of some of the volumes and the corresponding segmentation maps.

In [4]:
# Data Visualization
# Choose set on the selection bar, then use the trackbar for moving up and down

# messages
no_selection_hint = "Nothing Selected"

# path related
img_path = 'dataset_segmentation/'
train_path = os.path.join(img_path, "train")

# global variables 
view_pla_path = None
view_seg_path = None
view_pla_load = None
view_seg_load = None
slices = None
sliders = None

def update_slice(val):
    global view_pla_load
    global view_seg_load
    imgfla = view_pla_load[:,:,val]
    imgseg = view_seg_load[:,:,val]
    
    plt.figure(figsize=(12, 6))
    plt.subplot(1,2,1)
    plt.imshow(imgfla, cmap='gray')
    plt.title('FLA')

    plt.subplot(1,2,2)
    plt.imshow(imgseg, cmap='gray')
    plt.title('SEG')
    plt.show()
    return()

def update_set(strval):
    global view_pla_path 
    global view_seg_path
    global view_pla_load
    global view_seg_load
    global slices
    global sliders
    try:
        if sliders is not None:
            sliders.close()
    except NameError:
        pass
    if strval['type'] == 'change' and strval['name'] == 'value':
        set_str = strval['new']
    # print(set_str)
    if set_str == no_selection_hint:
        return()
    view_pla_path = os.path.join(train_path, set_str, set_str + "_fla.nii.gz")
    view_seg_path = os.path.join(train_path, set_str, set_str + "_seg.nii.gz")

    view_pla_load = nib.load(view_pla_path).get_fdata()
    np.random.shuffle(self.indexes)

    slices = view_pla_load.shape
    sliders = interactive(update_slice, val=widgets.IntSlider(value=0, min=0, max=slices[2]-1, step=1) )
    display(sliders)
    return()

dataset_subfolder = []

for CLASS in os.listdir(train_path):
    if not CLASS.startswith('.'):
        dataset_subfolder.append(CLASS)

dataset_subfolder.sort()
dataset_subfolder.insert(0, no_selection_hint)

dropdown = widgets.Dropdown(options=dataset_subfolder, value=no_selection_hint, description='Data Select')

dropdown.observe(update_set, names='value')
display(dropdown)








Dropdown(description='Data Select', options=('Nothing Selected', '001', '002', '003', '004', '005', '006', '00…

In [5]:
# Segmentation of DataSets for training and test

#uncomment if using linux/macos
!rm -rf Train Val
!mkdir Train Val 

#uncomment if using windows
# !rmdir Train Val /s /q
# !md Train Val Train\Yes Train\No Val\Yes Val\No

# data segmentation lists
path_list = []
train_list = []
val_list = []

for CLASS in os.listdir(train_path):
    if not CLASS.startswith('.'):
        view_pla_path = os.path.join(train_path, str(CLASS), str(CLASS) + "_fla.nii.gz")
        view_seg_path = os.path.join(train_path, str(CLASS), str(CLASS) + "_seg.nii.gz")
        path_list.append(str(CLASS)+"_fla.nii.gz")
        
random.shuffle(path_list)
pla_num = len(path_list)
for (n, file_name) in enumerate(path_list):
    img = os.path.join(train_path, file_name.split('_')[0], file_name)
    seg = os.path.join(train_path, file_name.split('_')[0], file_name.split('_')[0]+'_seg.nii.gz')
    # 80% of images will be used for training, change the number here 
    # to use different number of images for training your model.
    if n < 0.8*pla_num:
        shutil.copy(img, os.path.join('Train/',file_name))
        train_list.append(os.path.join('Train/',file_name))
        shutil.copy(seg, os.path.join('Train/',file_name.split('_')[0]+'_seg.nii.gz'))
    else:
        shutil.copy(img, os.path.join('Val/',file_name))
        val_list.append(os.path.join('Val/',file_name))
        shutil.copy(seg, os.path.join('Val/',file_name.split('_')[0]+'_seg.nii.gz'))

# print("Finish Segmentation")

## Data preprocessing (Optional)

Images in the original dataset are usually in different sizes, so sometimes we need to resize and normalise (z-score is commonly used in preprocessing the MRI images) them to fit the CNN model. Depending on the images you choose to use for training your model, some other preprocessing methods. If preprocessing methods like cropping is applied, remember to convert the segmentation result back to its original size. 

In [6]:
class DataPreProcessor:
    def __init__(self, list_IDs, batch_size=32, dim=(240,240), n_channels=3,
                 n_classes=2, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()


    def DataExtract(self, data_path):
        dirs = os.listdir(data_path)
        dirs.sort()
        for CLASS in dirs:
            if not CLASS.startswith("."):
                vimg_pla_path = os.path.join(data_path, "train", str(CLASS), str(CLASS) + "_pla.nii.gz")
                vimg_seg_path = os.path.join(data_path, "train", str(CLASS), str(CLASS) + "_seg.nii.gz")
                vimg_pla_load = nib.load(vimg_pla_path).get_fdata()
                vimg_seg_load = nib.load(vimg_seg_path).get_fdata()
                for i in vimg_pla_load.shape[2]:
                    img_pla = vimg_pla_load[:,:,i]
                    img_seg = vimg_seg_load[:,:,i]

    # def ImgProcess(self, data_path):
        
                    
                    
           


## Train-time data augmentation
Generalizability is crucial to a deep learning model and it refers to the performance difference of a model when evaluated on the seen data (training data) versus the unseen data (testing data). Improving the generalizability of these models has always been a difficult challenge. 

**Data Augmentation** is an effective way of improving the generalizability, because the augmented data will represent a more comprehensive set of possible data samples and minimizing the distance between the training and validation/testing sets.

There are many data augmentation methods you can choose in this projects including rotation, shifting, flipping, etc.

You are encouraged to try different augmentation method to get the best segmentation result.


## Get the data generator ready

In [7]:
class DataGenerator(tensorflow.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, batch_size=32, dim=(240,240), n_channels=3,
                 n_classes=2, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp): # there requires fix
            # Store sample
            # Add data augmentation here
            X[i,] = np.load(ID)

            # Store class
            y[i] = min(1,np.sum(np.load(ID.split('_')[0]+'_seg.npy')))

        return X, tensorflow.keras.utils.to_categorical(y, num_classes=self.n_classes)

In [8]:
img_size = (250, 250)

## Define a metric for the performance of the model
Dice score is used here to evaluate the performance of your model.
More details about the Dice score and other metrics can be found at 
https://towardsdatascience.com/metrics-to-evaluate-your-semantic-segmentation-model-6bcb99639aa2. Dice score can be also used as the loss function for training your model.

## Build your own model here
The U-Net (https://link.springer.com/chapter/10.1007/978-3-319-24574-4_28) structure is widely used for the medical image segmentation task. You can build your own model or modify the UNet by changing the hyperparameters for our task. If you choose to use Keras, more information about the Keras layers including Conv2D, MaxPooling and Dropout can be found at https://keras.io/api/layers/.

In [10]:
def get_model(img_size, num_classes):
    inputs = keras.Input(shape=img_size+(3,))
    
    # todo 


Model: "xception"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 250, 250, 3  0           []                               
                                )]                                                                
                                                                                                  
 block1_conv1 (Conv2D)          (None, 124, 124, 32  864         ['input_2[0][0]']                
                                )                                                                 
                                                                                                  
 block1_conv1_bn (BatchNormaliz  (None, 124, 124, 32  128        ['block1_conv1[0][0]']           
 ation)                         )                                                          

## Train your model here
Once you defined the model and data generator, you can start training your model.

## Save the model
Once your model is trained, remember to save it for testing.

## Run the model on the test set
After your last Q&A session, you will be given the test set. Run your model on the test set to get the segmentation results and submit your results in a .zip file. If the MRI image is named '100_fla.nii.gz', save your segmentation result as '100_seg.nii.gz'. 