In [8]:
notebook_name = "SegmentationEvaluation_Grouped"

In [9]:
from local_vars import root_folder
import os

# Updating these names to reflect your data folders and files.

notebook_fullpath = os.path.join(root_folder, "LeaveOneOutNotebooks")

test_arrays_folder = "LeaveOneOutTestArrays"

outList = [r"q000", r"q001", r"q002", r"q003", r"q004", r"q005", r"q006", r"q007"]

acceptable_margin_mm = 1
mm_per_pixel = 1

roc_thresholds = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1,
                  0.08, 0.06, 0.04, 0.02, 0.01,
                  0.008, 0.006, 0.004, 0.002, 0.001,
                  0.0008, 0.0006, 0.0004, 0.0002, 0.0001,
                  0.00001, 0.000001]


In [10]:
import datetime
import matplotlib.pyplot as plt
import numpy as np

import scipy.ndimage

from random import sample

import evaluation_metrics

In [11]:
def read_data(groundtruth_fullname, prediction_fullname):
    groundtruth_data = np.load(groundtruth_fullname)
    prediction_data = np.load(prediction_fullname)

    num_groundtruth = groundtruth_data.shape[0]
    num_prediction = prediction_data.shape[0]

    print("Found {} ground truth images and {} predictions\n".format(num_groundtruth, num_prediction))

    if num_groundtruth != num_prediction:
        print("Number of images should be equal!")
        raise
    
    return groundtruth_data, prediction_data, num_groundtruth, num_prediction

def dilate_stack(segmentation_data, iterations):
    return np.array([scipy.ndimage.binary_dilation(y, iterations=iterations) for y in segmentation_data])

def dilate_ground(groundtruth_data, acceptable_margin_mm, mm_per_pixel):
    acceptable_margin_pixel = int(acceptable_margin_mm / mm_per_pixel)
    acceptable_region = dilate_stack(groundtruth_data[:, :, :, 0], acceptable_margin_pixel)
    return acceptable_region

def compute_regions(groundtruth_data, prediction_data, acceptable_region):
    true_pos_prediction = np.minimum(groundtruth_data[:,:,:,0], prediction_data[:,:,:,1])
    not_acceptable_region = 1 - acceptable_region
    false_pos_prediction = np.minimum(not_acceptable_region, prediction_data[:, :, :, 1])
    return true_pos_prediction, false_pos_prediction, not_acceptable_region

def compute_prediction_amounts(groundtruth_data, not_acceptable_region, true_pos_prediction, false_pos_prediction):
    fpp = np.sum(false_pos_prediction[:,:,:])
    tna = np.sum(not_acceptable_region[:,:,:])
    tpp = np.sum(true_pos_prediction)
    tpa = np.sum(groundtruth_data[:,:,:,0])

    print("Total false positive prediction amount per image: {:.2f}".format(fpp / num_groundtruth))
    print("Total true negative area per image:               {:.2f}".format(tna / num_groundtruth))
    print("  {:.2f}% of the true negative area was correctly predicted".format((tna - fpp) / tna * 100))
    print("")
    print("Total true positive prediction amount per image: {:.2f}".format(tpp / num_groundtruth))
    print("Total true positive area per image:              {:.2f}".format(tpa / num_groundtruth))
    print("  {:.2f}% of the true positive area was correctly predicted\n".format(tpp / tpa * 100))

def compute_roc(roc_thresholds, prediction_data, groundtruth_data, acceptable_margin_mm, mm_per_pixel):
    false_positives = np.zeros(len(roc_thresholds))
    true_positives = np.zeros(len(roc_thresholds))

    for i in range(len(roc_thresholds)):
        threshold = roc_thresholds[i]
        prediction_thresholded = np.copy(prediction_data)
        prediction_thresholded[prediction_thresholded >= threshold] = 1.0
        prediction_thresholded[prediction_thresholded < threshold] = 0.0
        metrics = evaluation_metrics.compute_evaluation_metrics(
            prediction_thresholded, groundtruth_data, acceptable_margin_mm=acceptable_margin_mm, mm_per_pixel=mm_per_pixel)
        true_negative_area_perc = metrics[evaluation_metrics.TRUE_NEGATIVE_AREA_PERCENT]
        false_positives[i] = (100 - true_negative_area_perc) / 100.0
        true_positives[i] = metrics[evaluation_metrics.TRUE_POSITIVE_AREA_PERCENT] / 100.0
    
    return true_positives, false_positives

# Goodness is defined as distance from the diagonal of the ROC curve
def compute_goodness(roc_thresholds, true_positives, false_positives):
    goodnesses = np.zeros(len(roc_thresholds))
    for i in range(len(roc_thresholds)):
        crossprod = np.cross((1.0, 1.0), (false_positives[i], true_positives[i]))
        goodnesses[i] = np.linalg.norm(crossprod)/np.linalg.norm([1.0, 1.0])

    best_threshold_index = np.argmax(goodnesses)
    print("Best threshold:           {}".format(roc_thresholds[best_threshold_index]))
    print("Best true positive rate:  {}".format(true_positives[best_threshold_index]))
    print("Best false positive rate: {}\n".format(false_positives[best_threshold_index]))

def compute_AUC(true_positives, false_positives):
    area = 0.0

    fps = np.zeros(len(false_positives) * 2)
    tps = np.zeros(len(false_positives) * 2)

    for i in range(len(false_positives)):
        fps[i*2] = false_positives[i]
        tps[i*2] = true_positives[i]
        if i == len(false_positives) - 1:
            fps[i*2+1] = 1.0
            tps[i*2+1] = true_positives[i]
            area = area + (1.0 - false_positives[i]) * true_positives[i]
        else:
            fps[i*2+1] = false_positives[i+1]
            tps[i*2+1] = true_positives[i]
            area = area + (false_positives[i+1] - false_positives[i]) * true_positives[i]

    print("AUC = {}\n".format(area))
    
    return area

In [12]:
# This runs the bulk of the computation of metrics.
for out in outList:
    
    print("#######################################################")
    print("Assessing Predictions on model that left out", out)
    print("#######################################################\n")

    # Use these for the Leave One Out Data Set.
    test_arrays_fullpath = os.path.join(root_folder, test_arrays_folder)
    groundtruth_fullname = os.path.join(test_arrays_fullpath, out + r"_segmentation.npy")
    prediction_fullname  = os.path.join(test_arrays_fullpath, out + r"_prediction.npy")

    # Use these for the Children's Data Set.
    #groundtruth_fullname = r"c:\Data\ChildrensTestArrays\segmentation-test.npy"
    #prediction_fullname=r"c:\Data\ChildrensTestArrays\\" + out + r"_prediction.npy"

    # Prepare data
    
    groundtruth_data, prediction_data, num_groundtruth, num_prediction =\
        read_data(groundtruth_fullname, prediction_fullname)
    acceptable_region = dilate_ground(groundtruth_data, acceptable_margin_mm, mm_per_pixel)
    true_pos_prediction, false_pos_prediction, not_acceptable_region =\
        compute_regions(groundtruth_data, prediction_data, acceptable_region)
    
    # Compute metrics.
    compute_prediction_amounts(groundtruth_data, not_acceptable_region, true_pos_prediction, false_pos_prediction)
    true_positives, false_positives = compute_roc(roc_thresholds, prediction_data, groundtruth_data, acceptable_margin_mm, mm_per_pixel)
    compute_goodness(roc_thresholds, true_positives, false_positives)
    area = compute_AUC(true_positives, false_positives)

#######################################################
Assessing Predictions on model that left out q000
#######################################################

Found 523 ground truth images and 523 predictions

Total false positive prediction amount per image: 91.65
Total true negative area per image:               16281.60
  99.44% of the true negative area was correctly predicted

Total true positive prediction amount per image: 36.11
Total true positive area per image:              45.86
  78.74% of the true positive area was correctly predicted

Best threshold:           0.001
Best true positive rate:  0.9790685068590251
Best false positive rate: 0.030992528957950752

AUC = 0.9930328046429661

#######################################################
Assessing Predictions on model that left out q001
#######################################################

Found 355 ground truth images and 355 predictions

Total false positive prediction amount per image: 45.36
Total true negative 

In [13]:
# Save notebook

from IPython.display import Javascript
script = '''
require(["base/js/namespace"],function(Jupyter) {
    Jupyter.notebook.save_checkpoint();
});
'''
Javascript(script)

<IPython.core.display.Javascript object>

In [14]:
# Archive notebook with unique filenames based on timestamps in one single HTML file.

timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
if not os.path.exists(notebook_fullpath):
    os.makedirs(notebook_fullpath)
    print("Creating folder: {}".format(notebook_fullpath))
notebook_file_name = notebook_name + "_" + timestamp + ".html"
notebook_fullname = os.path.join(notebook_fullpath, notebook_file_name)

os.system("jupyter nbconvert --to html "+ notebook_name +" --output " + notebook_fullname)
print("Notebook saved to: {}".format(notebook_fullname))

Notebook saved to: d:\Data\LeaveOneOutNotebooks\SegmentationEvaluation_Grouped_2019-10-13_14-47-38.html
