# Pancreas Predictions on Test Set with 2D model.
2D-UNET approach.
Data are supposed to be on Google Drive

## Test Setup
- Image resolution: 128x128x128


## Environment Setup
Here we setup the environment. This is a required step to get access to data from Google Drive.

In [None]:
# SETUP FOR DRIVE ENVIRONMENT
from google.colab import drive
ROOT_PATH = '/content/gdrive'
drive.mount(ROOT_PATH)

In [None]:
!pip install tensorflow_io

In [None]:
import glob
import tensorflow as tf
import os
import numpy as np
import nibabel as nib
import pandas as pd
import re
import tensorflow_io as tfio
import matplotlib.pyplot as plt

# FOLDER TO LOAD DATA FROM

MAIN_PATH = os.path.join(ROOT_PATH, 'My Drive', 'Colab Notebooks')
CODE_PATH = os.path.join(MAIN_PATH, 'PANCREAS_2.5D', 'Notebooks')
DATA_PATH = os.path.join(MAIN_PATH, 'PANCREAS_2.5D', 'Data', 'Test')
MODEL_PATH = os.path.join(MAIN_PATH, 'PANCREAS_2.5D', 'Model')
DATA_HDF5_PATH = os.path.join(MAIN_PATH, 'PANCREAS_2.5D', 'HDF5')

printed_test_folders = []
for test_index, test_folder in enumerate(os.listdir(ROOT_FOLDER)):
    if (os.path.isdir(os.path.join(ROOT_FOLDER, test_folder)) and 'DATA' in test_folder):
        printed_test_folders.append(test_folder)
printed_test_folders = sorted(printed_test_folders)

# Ask for test folder
test_folder_found = False
while (not test_folder_found):
    test_folders = []
    for test_index, test_folder in enumerate(printed_test_folders):
        print('{}\t{}'.format(test_index, test_folder))
        test_folders.append(test_folder)
    test_value = input('Please select test folder:\n')
    try:
        if (test_folders[int(test_value)] in os.listdir(ROOT_FOLDER)):
            test_folder_found = True
            test_folder = os.path.join(
                ROOT_FOLDER, test_folders[int(test_value)])
            print('Folder: {}'.format(test_folder))
        else:
            print('Could not find folder with test value {:02d}'.format(
                int(test_value)))
    except Exception as e:
        print(e)
        print('Please enter a valid number')

model_folder = os.path.join(test_folder, 'MODEL')
data_folder = os.path.join(test_folder, 'DATA')
if ('Test_2D_00' in test_folder):
    data_folder = os.path.join(
        ROOT_PATH, 'My Drive', 'Colab Notebooks', 'PANCREAS_2.5D', 'Data', 'Test')

model_filename = ''
# Check existance of model in model_folder
if (len(os.listdir(model_folder)) > 1):
    print('Multiple models exist, please pick one')
    for model_index, model_file in enumerate(os.listdir(model_folder)):
        print('{}\t{}'.format(model_index, model_file))
    model_selected = False
    while (not model_selected):
        model_index = int(input('Select model:\n'))
        if (model_index >= 0 and model_index < len(os.listdir(model_folder))):
            model_filename = os.path.join(
                model_folder, os.listdir(model_folder)[model_index])
            model_selected = True
elif (len(os.listdir(model_folder)) == 1):
    model_filename = glob.glob(os.path.join(model_folder, '*.h5'))[0]
else:
    print('No model in folder!')
    raise

# Load model without compiling it
try:
    print('Loading model: {}'.format(model_filename))
    model = tf.keras.models.load_model(model_filename, compile=False)
    # Extract rows, columns, and slices from model filename
    model_string = model_filename[model_filename.find('model'):]
    pattern = re.compile("model_\d{1}_\d{3}x\d{3}x\d{3}.h5")
    if (bool(pattern.match(model_string))):
        substring = model_filename[model_filename.rfind(
            '_') + 1:model_filename.rfind('.')]
        N_ROWS = int(substring[:substring.find('x')])
        N_COLUMNS = int(substring[substring.find('x')+1:substring.rfind('x')])
        N_SLICES = int(substring[substring.rfind('x')+1:])
    else:
        print('Model filename does not match pattern')
        raise
except Exception as e:
    print(e)
    print('Couldn\'t load any model.')

# Type
SHAPE_OF_INTEREST = 'reshape_knee'
VOLUME_TYPE = 'nii'
VOLUME_TEMPLATE = ("{path}/Patient{patient}/volumeCT_{shape}_"
                   "{rows}_{columns}_{slices}_{patient}.{volume}")

LABEL_TEMPLATE = ("{path}/Patient{patient}/volumeLabel_{shape}_"
                  "{rows}_{columns}_{slices}_{patient}.{volume}")
########################
#     SEGMENTATION     #
########################
N_CLASSES = 3
'''
C0: background      (value 0 in the original label volume)
C1: 'pancreas'      (value 1 in the original label volume)
C2: pancreas lesion (value 2 in the original label volume)
'''

## Perform Predictions
Here we use our generator to perform predictions on the test set.

In [None]:
def expand_dataset(volume, label):
    volume = tf.expand_dims(volume, axis=-1)
    return volume, label


hdf5_filepath = os.path.join(
    DATA_HDF5_PATH, f'{N_ROWS}_{N_COLUMNS}_{N_SLICES}_reduced.hdf5')

# Zip together samples and corresponding labels
x_test = tfio.IODataset.from_hdf5(hdf5_filepath, dataset='/image_test')
y_test = tfio.IODataset.from_hdf5(hdf5_filepath, dataset='/label_test')

test = tf.data.Dataset.zip((x_test, y_test)).repeat(1) \
                      .batch(128, drop_remainder=True) \
                      .map(expand_dataset, num_parallel_calls=tf.data.experimental.AUTOTUNE)

# Predict data
predictions = model.predict(test)

In [None]:
import h5py


def read_image_label_hdf5(patient, slice_val):
    '''
    Read image and associated label from the hdf5 dataset
    for the patient and slice value passed in as parameters
    to the function.
    '''
    hdf5_filepath = os.path.join(
        DATA_HDF5_PATH, f'{N_ROWS}_{N_COLUMNS}_{N_SLICES}_reduced.hdf5')
    with h5py.File(hdf5_filepath, 'r') as f:
        image = f['image_test'][patient*128+slice_val]
        label = f['label_test'][patient*128+slice_val]
        return image, label


def plot_label_prediction(label, prediction):
    '''
    Plot label and associated preediction in a subplot-style
    using the label and prediction passed in as parameters to
    the function.
    '''
    plt.figure(figsize=(12, 8))
    plt.subplots_adjust(wspace=0.2)
    plt.subplot(2, 3, 1)
    plt.imshow(label[:, :, 0], cmap='gray')
    plt.title('True Background')
    plt.subplot(2, 3, 2)
    plt.imshow(label[:, :, 1], cmap='gray')
    plt.title('True Femur')
    plt.subplot(2, 3, 3)
    plt.imshow(label[:, :, 2], cmap='gray')
    plt.title('True Tibia')

    plt.subplot(2, 3, 4)
    plt.imshow(prediction[:, :, 0], cmap='gray')
    plt.title('Predicted Background')
    plt.subplot(2, 3, 5)
    plt.imshow(prediction[:, :, 1], cmap='gray')
    plt.title('Predicted Femur')
    plt.subplot(2, 3, 6)
    plt.imshow(prediction[:, :, 2], cmap='gray')
    plt.title('Predicted Tibia')


def plot_prediction_remapped(prediction, remapped):
    plt.figure(figsize=(16, 8))
    plt.subplot(1, 5, 1)
    plt.title('Predicted')
    plt.imshow(prediction)
    plt.subplot(1, 5, 2)
    plt.title('Pred Remap')
    plt.imshow(remapped, cmap='gray')
    plt.subplot(1, 5, 3)
    plt.title('Pred Back')
    plt.imshow(prediction[:, :, 0], cmap='gray')
    plt.subplot(1, 5, 4)
    plt.title('Pred Femur')
    plt.imshow(prediction[:, :, 1], cmap='gray')
    plt.subplot(1, 5, 5)
    plt.title('Pred Tibia')
    plt.imshow(prediction[:, :, 2], cmap='gray')


def remapLabelsSingleDimension(labels3D):
    '''
    Remap from 3D labels to a single dimension
    '''
    labels = np.zeros((labels3D.shape[0],
                       labels3D.shape[1],
                       1))
    labels[labels3D[:, :, 0] > 0.5] = 0
    labels[labels3D[:, :, 1] > 0.5] = 1
    labels[labels3D[:, :, 2] > 0.5] = 2
    labels = labels.squeeze(axis=2)
    # Cast to uint8
    labels = labels.astype(np.uint8)
    return labels


def remap3DLabelsSingleDimension(labels3D):
    '''
    Remap from 3D labels to a single dimension
    '''
    labels = np.zeros((labels3D.shape[0],
                       labels3D.shape[1],
                       labels3D.shape[2],
                       1))
    labels[labels3D[:, :, :, 0] > 0.5] = 0
    labels[labels3D[:, :, :, 1] > 0.5] = 1
    labels[labels3D[:, :, :, 2] > 0.5] = 2
    labels = labels.squeeze(axis=3)
    # Cast to uint8
    labels = labels.astype(np.uint8)
    return labels

In [None]:
patient = 2  # @param {type:"slider", min:1, max:20, step:1}
slice_val = 44  # @param {type:"slider", min:1, max:128, step:1}

patient = patient - 1
slice_val = slice_val - 1

image, label = read_image_label_hdf5(patient, slice_val)
plot_label_prediction(label, predictions[patient*128+slice_val, :, :])

### Store predictions in .nii format
We first test the remapping of the predictions, and then we convert the predictions in .nii format and store them on Google Drive.

In [None]:
patient = 1  # @param {type:"slider", min:1, max:20, step:1}
slice_val = 39  # @param {type:"slider", min:1, max:128, step:1}

patient = patient - 1
slice_val = slice_val - 1

remapped = remapLabelsSingleDimension(predictions[patient*128+slice_val])

plot_prediction_remapped(predictions[patient*128+slice_val, :, :], remapped)

In [None]:
from pathlib import Path
from tqdm.auto import tqdm


def loadTrueLabel(patient_index):
    path = Path(ROOT_PATH) / 'My Drive' / 'Colab Notebooks' / \
        'MEDACTA' / 'Test_00' / 'Data'
    patient = patient_index
    shape = 'reshape_knee'
    rows = N_ROWS
    columns = N_COLUMNS
    slices = N_SLICES
    patient_path = path / f"Patient{patient}" / f"volumeLabel_{shape}_" \
        f"{rows}_{columns}_{slices}_{patient}"".nii"
    true_label = nib.load(patient_path)
    true_image = true_label.get_fdata()
    return true_label, np.asarray(true_image)


# Extract test value
test_number = int(test_folder[test_folder.rfind('_')+1:])

# Create directory
predictions_folder = Path(ROOT_FOLDER) / \
    test_folders[int(test_value)] / 'Predictions'
predictions_folder.mkdir(exist_ok=True, parents=True)

for patient_index in tqdm(range(181, 201), total=20):
    # Get header from label
    true_label, arr = loadTrueLabel(patient_index)
    prediction = predictions[(patient_index-181) *
                             128:(patient_index-180)*128, :, :, :]
    # prediction = predictions[0:128,:,:,:]
    # swap axis
    predictionCorrect = np.zeros(shape=(N_ROWS, N_COLUMNS, N_SLICES))
    for i in range(N_SLICES):
        # get label
        lab = prediction[i, :, :, :]
        lab = remapLabelsSingleDimension(lab)
        predictionCorrect[:, :, i] = lab
    # create image
    img = nib.Nifti1Image(predictionCorrect, None, header=true_label.header)
    # get patient id
    patient_id = str(patient_index)
    # Get model value for filename
    model_string = model_filename[model_filename.find('model_'):]
    model_value = model_string[model_string.find(
        '_')+1:model_string.find(str(N_ROWS))-1]
    # Set up filename
    file_name = 'volumeLabel_predicted_2D_{}_{}_{}_t{:02d}_m{:02d}.nii'.format(
        N_ROWS,
        N_COLUMNS,
        patient_id,
        int(test_number),
        int(model_value)
    )
    patient_folder = Path(predictions_folder) / f'Patient{patient_id}'
    patient_folder.mkdir(exist_ok=True, parents=True)
    nib.save(img, os.path.join(patient_folder, file_name))
    # print('Saving prediction for patient {} in folder: {}'.format(patient_id, patient_folder))
    # print('\tFilename: {}'.format(file_name))

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




### Compute and save metrics

Here, for each predicted volume, we compute the following metrics:

- Sorenser-Dice index, defined as: $D=\dfrac{2|X \cap Y|}{|X| + |Y|}=\dfrac{2TP}{2TP+FP+FN}$

- Jaccard index, defines as: $J = \dfrac{D}{2-D}$

We compute these values for each volume and for each class, then we compute also the average across classes and the average across all the subjects.
The data are stored in a Pandas DataFrame and then in a *csv* file.

In [None]:
def count_tp(cl, trueLabel, predictedLabel):
    '''
    Return total number of true positives for the specified class, given
    the true and predicted labels.
    '''
    match = trueLabel[np.nonzero(predictedLabel == cl)]
    return (len(np.nonzero(match == cl)[0]))


def count_fp(cl, trueLabel, predictedLabel):
    '''
    Return total number of false positives for the specified class, given
    the true and predicted labels.
    '''
    match = trueLabel[np.nonzero(predictedLabel != cl)]
    return (len(np.nonzero(match == cl)[0]))


def count_tn(cl, trueLabel, predictedLabel):
    '''
    Return total number of true negatives for the specified class, given
    the true and predicted labels.
    '''
    match = trueLabel[np.nonzero(predictedLabel != cl)]
    return (len(np.nonzero(match != cl)[0]))


def count_fn(cl, trueLabel, predictedLabel):
    '''
    Return total number of false negatives for the specified class, given
    the true and predicted labels.
    '''
    match = trueLabel[np.nonzero(predictedLabel == cl)]
    return (len(np.nonzero(match != cl)[0]))


def compute_ppv_class(cl, trueLabel, predictedLabel):
    tp = count_tp(cl, trueLabel, predictedLabel)
    fp = count_fp(cl, trueLabel, predictedLabel)
    ppv = tp/(tp+fp)
    return ppv


def compute_dice_class(cl, trueLabel, predictedLabel):
    tp = count_tp(cl, trueLabel, predictedLabel)
    fp = count_fp(cl, trueLabel, predictedLabel)
    fn = count_fn(cl, trueLabel, predictedLabel)
    try:
        dice = 2*tp/(2*tp+fp+fn)
    except ZeroDivisionError:
        dice = 0
    return dice


def compute_dice(trueLabel, predictedLabel, return_average=True):
    dice_values = np.zeros(N_CLASSES)
    for cl_index, cl_value in enumerate(range(N_CLASSES)):
        dice_values[cl_index] = compute_dice_class(cl_value,
                                                   trueLabel,
                                                   predictedLabel)
    if (return_average):
        return dice_values, np.mean(dice_values)
    else:
        return dice_values


def compute_jaccard(trueLabel, predictedLabel, return_average=True):
    jaccard_values = np.zeros(N_CLASSES)
    for cl_index, cl_value in enumerate(range(N_CLASSES)):
        dice_temp = compute_dice_class(cl_value,
                                       trueLabel,
                                       predictedLabel)
        jaccard_values[cl_index] = dice_temp/(2-dice_temp)
    if (return_average):
        return jaccard_values, np.mean(jaccard_values)
    else:
        return jaccard_values


def compute_sensitivity(trueLabel, predictedLabel):
    sensitivity_values = np.zeros(N_CLASSES)
    for cl_index, cl_value in enumerate(range(N_CLASSES)):
        tp = count_tp(cl_value, trueLabel, predictedLabel)
        fn = count_fn(cl_value, trueLabel, predictedLabel)
        try:
            sensitivity_values[cl_index] = tp/(tp+fn)
        except:
            sensitivity_values[cl_index] = 0
    return sensitivity_values


def compute_precision(trueLabel, predictedLabel):
    precision_values = np.zeros(N_CLASSES)
    for cl_index, cl_value in enumerate(range(N_CLASSES)):
        tp = count_tp(cl_value, trueLabel, predictedLabel)
        fp = count_fp(cl_value, trueLabel, predictedLabel)
        try:
            precision_values[cl_index] = tp/(tp+fp)
        except ZeroDivisionError:
            precision_values[cl_index] = 0
    return precision_values


def compute_for(trueLabel, predictedLabel):
    for_values = np.zeros(N_CLASSES)
    for cl_index, cl_value in enumerate(range(N_CLASSES)):
        tn = count_tn(cl_value, trueLabel, predictedLabel)
        fn = count_fn(cl_value, trueLabel, predictedLabel)
        try:
            for_values[cl_index] = fn/(tn+fn)
        except ZeroDivisionError:
            for_values[cl_index] = 0
    return for_values


# Set up dataframe to store metrics
metrics_df = pd.DataFrame(index=range(181, 201),
                          columns=['Patient_Id', 'Dice_0', 'Dice_1', 'Dice_2',
                                   'Dice_Mean', 'Jaccard_0', 'Jaccard_1',
                                   'Jaccard_2', 'Jaccard_Mean',
                                   'Sens_0', 'Sens_1', 'Sens_2',
                                   'Prec_0', 'Prec_1', 'Prec_2',
                                   'For_0', 'For_1', 'For_2',
                                   ])


for patient_index in range(181, 201):
    # Get header from label
    true_label_nii, trueLabel = loadTrueLabel(patient_index)
    prediction = predictions[(patient_index-181) *
                             128:(patient_index-180)*128, :, :, :]
    predictedLabel = np.zeros(shape=(N_ROWS, N_COLUMNS, N_SLICES))
    for i in range(N_SLICES):
        # get label
        lab = prediction[i, :, :, :]
        lab = remapLabelsSingleDimension(lab)
        predictedLabel[:, :, i] = lab
    # Compute all the metrics
    dice_values, dice_mean = compute_dice(trueLabel, predictedLabel)
    jaccard_values, jaccard_mean = compute_jaccard(trueLabel, predictedLabel)
    sens_values = compute_sensitivity(trueLabel, predictedLabel)
    prec_values = compute_precision(trueLabel, predictedLabel)
    for_values = compute_for(trueLabel, predictedLabel)

    patient_id = patient_index

    metrics_df.Patient_Id.loc[patient_index] = patient_id
    metrics_df.Dice_0.loc[patient_index] = dice_values[0]
    metrics_df.Dice_1.loc[patient_index] = dice_values[1]
    metrics_df.Dice_2.loc[patient_index] = dice_values[2]
    metrics_df.Dice_Mean.loc[patient_index] = dice_mean
    metrics_df.Jaccard_0.loc[patient_index] = jaccard_values[0]
    metrics_df.Jaccard_1.loc[patient_index] = jaccard_values[1]
    metrics_df.Jaccard_2.loc[patient_index] = jaccard_values[2]
    metrics_df.Jaccard_Mean.loc[patient_index] = jaccard_mean
    metrics_df.Sens_0.loc[patient_index] = sens_values[0]
    metrics_df.Sens_1.loc[patient_index] = sens_values[1]
    metrics_df.Sens_2.loc[patient_index] = sens_values[2]
    metrics_df.Prec_0.loc[patient_index] = prec_values[0]
    metrics_df.Prec_1.loc[patient_index] = prec_values[1]
    metrics_df.Prec_2.loc[patient_index] = prec_values[2]
    metrics_df.For_0.loc[patient_index] = for_values[0]
    metrics_df.For_1.loc[patient_index] = for_values[1]
    metrics_df.For_2.loc[patient_index] = for_values[2]

summary_path = predictions_folder / 'Summary'
summary_path.mkdir(exist_ok=True, parents=True)
# Save dataframe
metrics_df.to_csv(
    summary_path / f"summary_t{int(test_number):02d}_m{int(model_value):02d}.csv")
print('Summary saved')