# CNN-based Brain Tumour Segmentation Network
## Import packages
Please make sure you have all the required packages installed. 

In [10]:
import os
import random
import shutil
import numpy as np
import nibabel as nib
import matplotlib.pyplot as plt
import tensorflow
import cv2
import keras
import ipywidgets as widgets
import keras_tuner as kt

from PIL import Image
from matplotlib.widgets import Slider
from ipywidgets import interact, interactive, fixed, interact_manual

from keras import layers
from keras.layers import *
from keras.models import *
# from keras.applications.vgg16 import VGG16
from keras.applications.xception import Xception
from keras.optimizers import Adam
from keras.models import load_model
from keras.callbacks import EarlyStopping
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

from tensorflow import data as tf_data
from tensorflow import image as tf_image
from tensorflow import io as tf_io

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

2024-07-11 11:33:31.991103: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-07-11 11:33:31.991672: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-07-11 11:33:31.991881: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-07-11 11:33:31.992067: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-07-11 11:33:31.992240: I tensorflow/compiler/xla/stream_executo

## Visualise MRI Volume Slices and Segmentation Maps
Each MRI image contains information about a three-dimensional (3D) volume of space. An MRI image is composed of a number of voxels, which is like pixels in 2D images. Here try to visualise the axial plane (usually has a higher resolution) of some of the volumes and the corresponding segmentation maps.

In [11]:
# Data Visualization
# Choose set on the selection bar, then use the trackbar for moving up and down

# messages
no_selection_hint = "Nothing Selected"

# path related
img_path = 'dataset_segmentation/'
train_path = os.path.join(img_path, "train")

# global variables 
view_pla_path = None
view_seg_path = None
view_pla_load = None
view_seg_load = None
slices = None
sliders = None

def update_slice(val):
    global view_pla_load
    global view_seg_load
    imgfla = view_pla_load[:,:,val]
    imgseg = view_seg_load[:,:,val]
    
    plt.figure(figsize=(12, 6))
    plt.subplot(1,2,1)
    plt.imshow(imgfla, cmap='gray')
    plt.title('FLA')

    plt.subplot(1,2,2)
    plt.imshow(imgseg, cmap='gray')
    plt.title('SEG')
    plt.show()
    return()

def update_set(strval):
    global view_pla_path 
    global view_seg_path
    global view_pla_load
    global view_seg_load
    global slices
    global sliders
    try:
        if sliders is not None:
            sliders.close()
    except NameError:
        pass
    if strval['type'] == 'change' and strval['name'] == 'value':
        set_str = strval['new']
    # print(set_str)
    if set_str == no_selection_hint:
        return()
    view_pla_path = os.path.join(train_path, set_str, set_str + "_fla.nii.gz")
    view_seg_path = os.path.join(train_path, set_str, set_str + "_seg.nii.gz")

    view_pla_load = nib.load(view_pla_path).get_fdata()
    view_seg_load = nib.load(view_seg_path).get_fdata()

    slices = view_pla_load.shape
    sliders = interactive(update_slice, val=widgets.IntSlider(value=0, min=0, max=slices[2]-1, step=1) )
    display(sliders)
    return()

dataset_subfolder = []

for CLASS in os.listdir(train_path):
    if not CLASS.startswith('.'):
        dataset_subfolder.append(CLASS)

dataset_subfolder.sort()
dataset_subfolder.insert(0, no_selection_hint)

dropdown = widgets.Dropdown(options=dataset_subfolder, value=no_selection_hint, description='Data Select')

dropdown.observe(update_set, names='value')
display(dropdown)








Dropdown(description='Data Select', options=('Nothing Selected', '001', '002', '003', '004', '005', '006', '00…

In [2]:
# Segmentation of DataSets for training and test

#uncomment if using linux/macos
!rm -rf Train Val
!mkdir Train Val 
!mkdir Train/image Train/masks Val/image Val/masks

#uncomment if using windows
# !rmdir Train Val /s /q
# !md Train Val Train\Yes Train\No Val\Yes Val\No

def nii_to_jpg(nii_file, npy_file):
    nii_img = nib.load(nii_file)
    
    nii_data = nii_img.get_fdata()

    num_slices = nii_data.shape[2]
    
    # Save each slice as a separate .npy file
    for i in range(num_slices):
        slice_data = nii_data[:, :, i]
        jpg_file_slice = npy_file + f'_slice_{i}.jpg'
        img = Image.fromarray(slice_data, mode='L')
        img.save(jpg_file_slice)
    return()

# data segmentation lists
train_list = []
val_list = []

dir_list = os.listdir(train_path)
random.shuffle(dir_list)
data_num = len(dir_list)
for (n, file_name) in enumerate(dir_list):
    if not file_name.startswith('.'):
        view_pla_path = os.path.join(train_path, file_name, file_name + "_fla.nii.gz")
        view_seg_path = os.path.join(train_path, file_name, file_name + "_seg.nii.gz")
        if n < 0.8*data_num:  # Train
            nii_to_jpg(view_pla_path, os.path.join("Train", "Image", file_name + "_pla"))
            nii_to_jpg(view_seg_path, os.path.join("Train", "Target", file_name + "_seg"))
        else: # Val
            nii_to_jpg(view_pla_path, os.path.join("Val", "Image", file_name + "_pla"))
            nii_to_jpg(view_seg_path, os.path.join("Val", "Target", file_name + "_seg"))

print("Finish Segmentation")

NameError: name 'os' is not defined

## Data preprocessing (Optional)

Images in the original dataset are usually in different sizes, so sometimes we need to resize and normalise (z-score is commonly used in preprocessing the MRI images) them to fit the CNN model. Depending on the images you choose to use for training your model, some other preprocessing methods. If preprocessing methods like cropping is applied, remember to convert the segmentation result back to its original size. 

In [13]:
# class DataPreProcessor:
#     def __init__(self, list_IDs, batch_size=32, dim=(240,240), n_channels=3,
#                  n_classes=2, shuffle=True):
#         'Initialization'
#         self.dim = dim
#         self.batch_size = batch_size
#         self.list_IDs = list_IDs
#         self.n_channels = n_channels
#         self.n_classes = n_classes
#         self.shuffle = shuffle
#         self.on_epoch_end()


#     def DataExtract(self, data_path):
#         dirs = os.listdir(data_path)
#         dirs.sort()
#         for CLASS in dirs:
#             if not CLASS.startswith("."):
#                 vimg_pla_path = os.path.join(data_path, "train", str(CLASS), str(CLASS) + "_pla.nii.gz")
#                 vimg_seg_path = os.path.join(data_path, "train", str(CLASS), str(CLASS) + "_seg.nii.gz")
#                 vimg_pla_load = nib.load(vimg_pla_path).get_fdata()
#                 vimg_seg_load = nib.load(vimg_seg_path).get_fdata()
#                 for i in vimg_pla_load.shape[2]:
#                     img_pla = vimg_pla_load[:,:,i]
#                     img_seg = vimg_seg_load[:,:,i]

#     # def ImgProcess(self, data_path):
        
                    
                    
           


## Train-time data augmentation
Generalizability is crucial to a deep learning model and it refers to the performance difference of a model when evaluated on the seen data (training data) versus the unseen data (testing data). Improving the generalizability of these models has always been a difficult challenge. 

**Data Augmentation** is an effective way of improving the generalizability, because the augmented data will represent a more comprehensive set of possible data samples and minimizing the distance between the training and validation/testing sets.

There are many data augmentation methods you can choose in this projects including rotation, shifting, flipping, etc.

You are encouraged to try different augmentation method to get the best segmentation result.


## Get the data generator ready

In [14]:
class DataGenerator(tensorflow.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, batch_size=32, dim=(240,240), n_channels=3,
                 n_classes=2, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp): # there requires fix
            # Store sample
            # Add data augmentation here
            X[i,] = np.load(ID)

            # Store class
            y[i] = min(1,np.sum(np.load(ID.split('_')[0]+'_seg.npy')))

        return X, tensorflow.keras.utils.to_categorical(y, num_classes=self.n_classes)
    



In [15]:
def get_dataset(
    batch_size,
    img_size,
    input_img_paths,
    target_img_paths,
    max_dataset_len=None,
):
    """Returns a TF Dataset."""

    def load_img_masks(input_img_path, target_img_path):
        input_img = tf_io.read_file(input_img_path)
        input_img = tf_io.decode_png(input_img, channels=3)
        input_img = tf_image.resize(input_img, img_size)
        input_img = tf_image.convert_image_dtype(input_img, "float32")

        target_img = tf_io.read_file(target_img_path)
        target_img = tf_io.decode_png(target_img, channels=1)
        target_img = tf_image.resize(target_img, img_size, method="nearest")
        target_img = tf_image.convert_image_dtype(target_img, "uint8")

        # Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
        target_img -= 1
        return input_img, target_img

    # For faster debugging, limit the size of data
    if max_dataset_len:
        input_img_paths = input_img_paths[:max_dataset_len]
        target_img_paths = target_img_paths[:max_dataset_len]
    dataset = tf_data.Dataset.from_tensor_slices((input_img_paths, target_img_paths))
    dataset = dataset.map(load_img_masks, num_parallel_calls=tf_data.AUTOTUNE)
    return dataset.batch(batch_size)

In [29]:
train_input_dir = "Train/Image/"
train_target_dir = "Train/Target/"
val_input_dir = "Val/Image/"
val_target_dir = "Val/Target/"
img_size = (250, 250)
num_classes = 3
batch_size = 1

train_input_img_paths = sorted(
    [
        os.path.join(train_input_dir, fname)
        for fname in os.listdir(train_input_dir)
        if fname.endswith(".jpg")
    ]
)

val_input_img_paths = sorted(
    [
        os.path.join(val_input_dir, fname)
        for fname in os.listdir(val_input_dir)
        if fname.endswith(".jpg")
    ]
)

train_target_img_paths = sorted(
    [
        os.path.join(train_target_dir, fname)
        for fname in os.listdir(train_target_dir)
        if fname.endswith(".jpg") and not fname.startswith(".")
    ]
)

val_target_img_paths = sorted(
    [
        os.path.join(val_target_dir, fname)
        for fname in os.listdir(val_target_dir)
        if fname.endswith(".jpg")
    ]
)

print("Number of Train samples:", len(train_input_img_paths))

# for input_path, target_path in zip(train_input_img_paths[:10], train_target_img_paths[:10]):
#     print(input_path, "|", target_path)

print("Number of Val samples:", len(val_input_img_paths))

# for input_path, target_path in zip(val_input_img_paths[:10], val_target_img_paths[:10]):
#     print(input_path, "|", target_path)



Number of Train samples: 26040
Number of Val samples: 6510


In [30]:
train_input_img_paths_ = train_input_img_paths[:]
train_target_img_paths_ = train_target_img_paths[:]

val_input_img_paths_ = val_input_img_paths[:]
val_target_img_paths_ = val_target_img_paths[:]

train_dataset = get_dataset(
    batch_size,
    img_size,
    train_input_img_paths,
    train_target_img_paths,
    max_dataset_len=1000,
)

valid_dataset = get_dataset(
    batch_size, img_size, val_input_img_paths, val_target_img_paths
)


## Define a metric for the performance of the model
Dice score is used here to evaluate the performance of your model.
More details about the Dice score and other metrics can be found at 
https://towardsdatascience.com/metrics-to-evaluate-your-semantic-segmentation-model-6bcb99639aa2. Dice score can be also used as the loss function for training your model.

## Build your own model here
The U-Net (https://link.springer.com/chapter/10.1007/978-3-319-24574-4_28) structure is widely used for the medical image segmentation task. You can build your own model or modify the UNet by changing the hyperparameters for our task. If you choose to use Keras, more information about the Keras layers including Conv2D, MaxPooling and Dropout can be found at https://keras.io/api/layers/.

In [35]:
def get_model(img_size, num_classes):
    inputs = keras.Input(shape=img_size + (3,))

    ### [First half of the network: downsampling inputs] ###

    # Entry block
    x = layers.Conv2D(32, 3, strides=2, padding="same")(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    previous_block_activation = x  # Set aside residual

    # Blocks 1, 2, 3 are identical apart from the feature depth.
    for filters in [64, 128, 256]:
        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

        # Project residual
        residual = layers.Conv2D(filters, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    ### [Second half of the network: upsampling inputs] ###

    for filters in [256, 128, 64, 32]:
        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.UpSampling2D(2)(x)

        # Project residual
        residual = layers.UpSampling2D(2)(previous_block_activation)
        residual = layers.Conv2D(filters, 1, padding="same")(residual)
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    # Add a per-pixel classification layer
    outputs = layers.Conv2D(num_classes, 3, activation="softmax", padding="same")(x)

    # Define the model
    model = keras.Model(inputs, outputs)
    return model


# Build model
model = get_model(img_size, num_classes)
model.summary()


Model: "xception"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 250, 250, 3  0           []                               
                                )]                                                                
                                                                                                  
 block1_conv1 (Conv2D)          (None, 124, 124, 32  864         ['input_5[0][0]']                
                                )                                                                 
                                                                                                  
 block1_conv1_bn (BatchNormaliz  (None, 124, 124, 32  128        ['block1_conv1[0][0]']           
 ation)                         )                                                          

## Train your model here
Once you defined the model and data generator, you can start training your model.

In [36]:
model.compile(
    optimizer=keras.optimizers.Adam(1e-4), loss="sparse_categorical_crossentropy"
)

callbacks = [
    keras.callbacks.ModelCheckpoint("oxford_segmentation.keras", save_best_only=True)
]

# Train the model, doing validation at the end of each epoch.
epochs = 50
model.fit(
    train_dataset,
    epochs=epochs,
    validation_data=valid_dataset,
    callbacks=callbacks,
    verbose=2
)

Epoch 1/50


InvalidArgumentError: Graph execution error:

Detected at node 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits' defined at (most recent call last):
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/ipykernel_launcher.py", line 18, in <module>
      app.launch_new_instance()
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/traitlets/config/application.py", line 1075, in launch_instance
      app.start()
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 739, in start
      self.io_loop.start()
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/asyncio/base_events.py", line 601, in run_forever
      self._run_once()
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once
      handle._run()
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue
      await self.process_one()
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 534, in process_one
      await dispatch(*args)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell
      await result
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 362, in execute_request
      await super().execute_request(stream, ident, parent)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 778, in execute_request
      reply_content = await reply_content
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 449, in do_execute
      res = shell.run_cell(
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3048, in run_cell
      result = self._run_cell(
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3103, in _run_cell
      result = runner(coro)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3308, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3490, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3550, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_11742/307275434.py", line 11, in <module>
      model.fit(
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/keras/engine/training.py", line 1650, in fit
      tmp_logs = self.train_function(iterator)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/keras/engine/training.py", line 1249, in train_function
      return step_function(self, iterator)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/keras/engine/training.py", line 1233, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/keras/engine/training.py", line 1222, in run_step
      outputs = model.train_step(data)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/keras/engine/training.py", line 1024, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/keras/engine/training.py", line 1082, in compute_loss
      return self.compiled_loss(
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/keras/engine/compile_utils.py", line 265, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/keras/losses.py", line 152, in __call__
      losses = call_fn(y_true, y_pred)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/keras/losses.py", line 284, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/keras/losses.py", line 2098, in sparse_categorical_crossentropy
      return backend.sparse_categorical_crossentropy(
    File "/home/terence/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/keras/backend.py", line 5633, in sparse_categorical_crossentropy
      res = tf.nn.sparse_softmax_cross_entropy_with_logits(
Node: 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits'
logits and labels must have the same first dimension, got logits shape [64,2048] and labels shape [62500]
	 [[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_43983]

## Save the model
Once your model is trained, remember to save it for testing.

model.save("Win.h5")

## Run the model on the test set
After your last Q&A session, you will be given the test set. Run your model on the test set to get the segmentation results and submit your results in a .zip file. If the MRI image is named '100_fla.nii.gz', save your segmentation result as '100_seg.nii.gz'. 