# Use the DECiSION model to predict segmentation maps
Use a trained U-Net model created by `DECiSION_train.ipynb` to produce segmentation maps for MRI images.

# Import packages

In [None]:
import DECiSION_settings as settings
from thesis_common import read_images, read_groundtruths, convert_pred_to_img, group_images,\
    show_image, create_hdf5_db, convert_img_to_pred
from thesis_metric_loss import evaluate_model, dice_coef,\
    weighted_pixelwise_crossentropy_loss

from dltoolkit.utils.generic import list_images
from dltoolkit.nn.segment import UNet_NN
from dltoolkit.utils.visual import plot_roc_curve, plot_precision_recall_curve,\
    print_confusion_matrix, print_classification_report

from keras.optimizers import Adam
    
import os, cv2, shutil, time
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

# Determine test/inference settings
Set `IS_INFERENCE` to `False` to create segmentation maps for test MRI images for which ground truths **are** available. This is typically used to obtain a final test error estimate at the very end of the training process. Set it to `False` to use a trained model for MRI images for which ground truths are not available, i.e. simulating a production scenario.

During development set `IS_DEVELOPMENT` to `True` to always use the training data set. 

In [None]:
IS_INFERENCE = False
IS_DEVELOPMENT = True

# Path to the trained model to load
Enter the full path to the saved trained model here.

In [None]:
TRAINED_MODEL_NAME = "../savedmodels/DECiSION_UNet_brain_3layer_BN_ep100.model"
# TRAINED_MODEL_NAME = "../savedmodels/DECiSION_UNet_brain_4layer_BN_ep100.model"

# Convert test data to HDF5
The first function `perform_hdf5_conversion_test()` converts data in the `test` folder to HDF5. Data comprises MRI images as well as their ground truths. This function is used to apply a trained model to MRI images for which ground truths are available so that various performance metrics can be calculated.

The second function `perform_hdf5_conversion_inference()` does the same but does not include ground truths. It is used to apply a trained model to MRI images for which ground truths are not available, e.g. in a production environment.

**Note**: during development and pipeline testing neither function is used. Instead, the training/validation set is used, the same sets used to train the model.

In [None]:
def perform_hdf5_conversion_test(settings):
    # Prepare the path to the TEST images AND ground truths. Note that the data is NOT shuffled.
    img_path = os.path.join(settings.TEST_PATH, settings.FLDR_IMAGES)
    msk_path = os.path.join(settings.TEST_PATH, settings.FLDR_GROUND_TRUTH)

    # Create a list of paths to the individual patient folders
    patient_fld_imgs = sorted([os.path.join(img_path, e.name)
                               for e in os.scandir(img_path) if e.is_dir()])
    patient_fld_masks = sorted([os.path.join(msk_path, e.name)
                                for e in os.scandir(msk_path) if e.is_dir()])

    # Obtain a list of paths to the test images and ground truths for each patient
    img_list = []
    msk_list = []
    for patient_ix, (p_fld_imgs, p_fld_masks) in enumerate(zip(patient_fld_imgs,
                                                               patient_fld_masks)):
        img_list.extend(sorted(list(list_images(basePath=p_fld_imgs,
                                                validExts=settings.IMG_EXTENSION)))
                        [settings.SLICE_START:settings.SLICE_END])
        msk_list.extend(sorted(list(list_images(basePath=p_fld_masks,
                                                validExts=settings.IMG_EXTENSION)))
                        [settings.SLICE_START:settings.SLICE_END])

    # Create the HDF5 data sets
    output_paths = []

    # Test images
    output_paths.append(create_hdf5_db(img_list, "test", img_path,
                                       (settings.IMG_HEIGHT,
                                        settings.IMG_WIDTH,
                                        settings.IMG_CHANNELS),
                                       key=settings.HDF5_KEY,
                                       ext=settings.HDF5_EXT,
                                       settings=settings))

    # Test ground truths
    output_paths.append(create_hdf5_db(msk_list, "test", msk_path,
                                       (settings.IMG_HEIGHT,
                                        settings.IMG_WIDTH,
                                        settings.IMG_CHANNELS),
                                       key=settings.HDF5_KEY,
                                       ext=settings.HDF5_EXT,
                                       settings=settings,
                                       is_mask=True))
    
    return output_paths

def perform_hdf5_conversion_inference(settings):
    # Prepare the path to the TEST images (NO ground truths because they are not available)
    test_path = os.path.join(settings.TEST_PATH, settings.FLDR_IMAGES)

    # Create a list of paths to the individual patient folders
    test_imgs = sorted(list(list_images(basePath=test_path,
                                        validExts=settings.IMG_EXTENSION)))[settings.SLICE_START:settings.SLICE_END]

    # Create the HDF5 data sets
    output_paths = []

    # Test images (no ground truths available, no need to split). The assumption is only
    # relevant images are placed in the test folder, i.e. the pipeline will not exclude
    # any slices
    output_paths.append(create_hdf5_db(test_imgs, "test", test_path,
                                       (settings.IMG_HEIGHT,
                                        settings.IMG_WIDTH,
                                        settings.IMG_CHANNELS),
                                       key=settings.HDF5_KEY,
                                       ext=settings.HDF5_EXT,
                                       settings=settings))

    return output_paths

In [None]:
if IS_DEVELOPMENT:
    print("Development mode, no test set created. Using the training/validation set.")
else:
    if IS_INFERENCE:
        output_paths = perform_hdf5_conversion_inference(settings)
        print("Converted test images WITHOUT ground truths: {}".format(output_paths))
    else:
        output_paths = perform_hdf5_conversion_test(settings)
        print("Converted test images WITH ground truths: {}".format(output_paths))

# Load data
The cell below performs the actual loading of data. The assumption is that all test data fits into memory, generators are NOT used.

In [None]:
# Load the images and ground truths
if IS_DEVELOPMENT:
    # Use training images for pipeline validation and training
    print("WARNING: using TRAINING images, NOT TEST images for PIPELINE DEVELOPMENT")
    test_imgs = read_images("../data/MSC8002/training/train_images.h5",
                            settings.HDF5_KEY)
    test_ground_truths = read_groundtruths("../data/MSC8002/training/train_groundtruths.h5",
                                           settings.HDF5_KEY)
else:
    if IS_INFERENCE:
        print("--- Pre-processing test images without ground truths for INFERENCE")
        # Use for inference on test images without ground truths
        test_imgs = read_images(output_paths[0], settings.HDF5_KEY)
    else:
        # Use test images WITH ground truths for final model evaluation
        print("WARNING: using TEST images, NOT TRAINING images for MODEL EVALUATION")
        test_imgs = read_images(output_paths[0], settings.HDF5_KEY)
        test_ground_truths = read_groundtruths(output_paths[1], settings.HDF5_KEY)

# Show one image to check
Show a single image and, if available, its ground truth just as a check.

In [None]:
IX = 0

show_image(np.squeeze(test_imgs[IX]), 'Example image')
print("       Max image intensity: {} - {} - {}".format(np.max(test_imgs[IX]),
                                                        test_imgs.dtype,
                                                        test_imgs.shape))

if not IS_INFERENCE or IS_DEVELOPMENT:
    show_image(np.squeeze(test_ground_truths[IX]), 'Example ground truth')
    print("Max ground truth intensity: {} - {} - {}".format(np.max(test_ground_truths[IX]),
                                                            test_ground_truths.dtype,
                                                            test_ground_truths.shape))

# Load the trained model
Load the saved model located in the location specified by `TRAINED_MODEL_NAME`. Use the same `build_model_XXX()` method that was used by `DECIiSION_training.ipynb` to train the model. Keras is unable to load a model when a different architecture has been loaded.

In [None]:
# Create the UNet model and load its saved weights
unet = UNet_NN(img_height=settings.IMG_HEIGHT,
               img_width=settings.IMG_WIDTH,
               img_channels=settings.IMG_CHANNELS,
               num_classes=settings.NUM_CLASSES)
model = unet.build_model_BRAIN_3layer(use_bn=True, use_dropout=False)
# model = unet.build_model_BRAIN_4layer(use_bn=True, use_dropout=False)
model.load_weights(TRAINED_MODEL_NAME)
model.summary()

# Produce segmentation maps
Predict segmentation maps for all MRI images in the `test_imgs` array and calculate overall loss and metric values.

In [None]:
start_time = time.time()
print("Number of samples: {}".format(test_imgs.shape))
predictions = model.predict(test_imgs, batch_size=settings.TRN_BATCH_SIZE, verbose=2)
print("Elapsed time: {:.2f}s".format(time.time() - start_time))

In [None]:
eval_list = evaluate_model(model, test_imgs, test_ground_truths,
                           Adam(amsgrad=True), weighted_pixelwise_crossentropy_loss,
                           dice_coef, convert_img_to_pred, settings)

for name, val in (zip(model.metrics_names, eval_list)):
    print("{} = {:.4f}".format(name, val))

# Convert predictions to segmentation map images
Convert the predictions (i.e. scores for both classes for each pixel) to a shape that can be displayed later. A threshold `TRN_PRED_THRESHOLD` is used to determine whether a pixel should be assigned the background or blood vessel class.

In [None]:
predictions_imgs = convert_pred_to_img(predictions,
                                       threshold=settings.TRN_PRED_THRESHOLD,
                                       verbose=settings.VERBOSE)

# Show/save segmentation maps
Display a single MRI image, its ground truth (if available) and the predicted segmentation map. In addition, save a number of images, ground truths and segmentation maps into a single image (one for each type) for visualization and save them to disk.

In [None]:
# Show a single image, ground truth and segmentation map
MAX_IMAGES = len(predictions_imgs)
show_image(np.squeeze(test_imgs[0]), 'Original image')
show_image(np.squeeze(predictions_imgs[0]), 'Segmentation map')

if not IS_INFERENCE or IS_DEVELOPMENT:
    show_image(np.squeeze(test_ground_truths[0]), 'Ground truth')

    # Plot a number of ground truths in a single image 
    group_images(test_ground_truths[0:min(16, MAX_IMAGES)],
                 4, 1.0, False, "../output/DECISION_"+unet.title+"_grp_groundtruths")
    
    print("  gr truth {} dtype {}".format(np.max(test_ground_truths[0]),
                                          test_ground_truths[0].dtype))

# Plot images and segmentation maps in a single image each
group_images(test_imgs[0:min(16, MAX_IMAGES)],
             4, 1.0, False, "../output/DECISION_" + unet.title+"_grp_images")
group_images(predictions_imgs[0:min(16, MAX_IMAGES)],
             4, 1.0, False, "../output/DECISION_" + unet.title+"_grp_predictions")

print("  original {} dtype {}".format(np.max(test_imgs[0]), test_imgs[0].dtype))
print("prediction {} dtype {}".format(np.max(predictions_imgs[0]), predictions_imgs[0].dtype))

# Show metrics
The cells below produxe a number of metrics for the current model and the segmentation maps it produced. This is not possible when ground truths are not available.

## ROC curve
Plot the Receiver Operator Curve.

In [None]:
if not IS_INFERENCE or IS_DEVELOPMENT:
    roc_path = os.path.join(settings.OUTPUT_PATH, "DECiSION_" + unet.title)
    plot_roc_curve(test_ground_truths, predictions[:,:,:,1], show=True, save_path=roc_path)

## Precision/Recall curve
Plot the Precision/Recall curve.

In [None]:
if not IS_INFERENCE or IS_DEVELOPMENT:
    prec_path = os.path.join(settings.OUTPUT_PATH, "DECiSION_" + unet.title)
    plot_precision_recall_curve(test_ground_truths,
                                predictions,
                                settings.NUM_CLASSES,
                                show=True,
                                save_path=prec_path)

## Confusion matrix
Plot the pixel wise confusion matrix.

In [None]:
if not IS_INFERENCE or IS_DEVELOPMENT:
    (conf_ind, conf_mat) = print_confusion_matrix(test_ground_truths, predictions_imgs)

## Classification report
Plot the pixel wise classification report.

In [None]:
if not IS_INFERENCE or IS_DEVELOPMENT:
    print_classification_report(test_ground_truths, predictions_imgs)

# Write segmentation maps, ground truths and MRI images to disk
Save the predicted segmentation maps, ground truths (after applying the binary mask) and original MRI images to disk.

In [None]:
# Remove existing files
if os.path.exists(settings.SEGMAP_PATH):
    shutil.rmtree(settings.SEGMAP_PATH)
os.makedirs(settings.SEGMAP_PATH)

In [None]:
for i in range(len(predictions_imgs)):
    cv2.imwrite(settings.SEGMAP_PATH + "DECiSION_segmap_{}.jpg".format(i),
                predictions_imgs[i],
                (cv2.IMWRITE_JPEG_QUALITY, 100))
    
if not IS_INFERENCE or IS_DEVELOPMENT:
    for i in range(len(test_ground_truths)):
        cv2.imwrite(settings.SEGMAP_PATH + "DECiSION_groundtruth_{}.jpg".format(i),
                    test_ground_truths[i],
                    (cv2.IMWRITE_JPEG_QUALITY, 100))
        
for i in range(len(test_imgs)):
    cv2.imwrite(settings.SEGMAP_PATH + "DECiSION_image_{}.jpg".format(i),
                test_imgs[i].astype(np.float32)*255,
                (cv2.IMWRITE_JPEG_QUALITY, 100))

# Testing/inference complete