In [9]:
import numpy as np
import os
import cv2

In [10]:
# Set the number of class of your dataset 
# (background included if the output of your net includes it)
num_of_classes = 6

# set clas names in ascending order (pixel value for that class)
class_names = ["background", "liver", "bladder", "lungs", "kidneys", "bones"]

assert num_of_classes == len(class_names)

n_pixel_classes = []
for i in range(num_of_classes):
    n_pixel_classes.append(0)

In [11]:
def prepare_prediction(pred):
    mask = np.argmax(pred, axis=-1)
    mask = mask.astype('uint8')
    return mask

In [12]:
def explode_img(img, num_classes):
    exploded = np.zeros(shape=img.shape + (num_classes,), dtype=np.uint8)
    for i in range(num_classes):
        exploded[:, :, i] = (img[:, :] == i).astype(np.uint8)
    return exploded

In [13]:
def sensitivity_single(pred, true):
    TP = np.sum(pred * true)
    FN = np.sum(true * (1 - pred))
    return (TP + 1) / (TP + FN + 1)


def specificity_single(pred, true):
    TN = np.sum((1 - true) * (1 - pred))
    FP = np.sum((1 - true) * pred)
    return (TN + 1) / (TN + FP + 1)


def dice_single(pred, true):
    intersection = np.sum(pred * true)
    union = np.sum(pred + true)
    return (2 * intersection + 1) / (union + 1)

In [14]:
def score_single_slice(pred, true, score):
    global num_of_classes
    global n_pixel_classes
    
    true = explode_img(true, 6)
    
    # compute score for each class and their global and local (current slice) weights
    scores = []
    classes_w = []
    slice_score = 0
    
    for i in range(num_of_classes):
        scores.append(score(pred == i, true[:, :, i]))
        classes_w.append(np.sum(true[:, :, i]))
        n_pixel_classes[i] += classes_w[i]
        
    for i in range(1, num_of_classes):
        slice_score += classes_w[i]*scores[i]                
    
    slice_score += 1
    slice_score /= (sum(classes_w[1:])+1)

    return slice_score, np.array(scores) * np.array(classes_w)

In [17]:
def evaluate_results(dir_pred='predictions', dir_true='labels'):
    global num_of_classes
    global n_pixel_classes
    global class_names
    
    divider = '---------------------------------------'
    
    list_pred = os.listdir(dir_pred)
    list_true = os.listdir(dir_true)
    
    list_pred = sorted(list_pred)
    list_true = sorted(list_true)
    
    #  list_pred = list_pred[:10]
    #  list_true = list_true[:10]
    for pred, true in zip(list_pred, list_true):
        assert pred[:-4] == true[:-4]
    
    
    # define metrics to evaluate results
    metrics = [dice_single, sensitivity_single, specificity_single]
    metrics_labels = ['dice', 'sensitivity', 'specificity']
    assert len(metrics) == len(metrics_labels)

    for metric, metrics_label in zip(metrics, metrics_labels):
        for i in range(num_of_classes):
            n_pixel_classes[i] = 0

        scores = [] #  scores of single slices
        score = 0 #  sum of scores of single slices (redundant)
        scores_classes_total = [] # sum of scores for each class
        current_score = 0 # score of the current slice 
        
        for i in range(num_of_classes):
            n_pixel_classes[i] = 0
            scores_classes_total.append([])
        
        for i in range(len(list_pred)):
            pred = np.load(dir_pred + '/' + list_pred[i])
            true = np.load(dir_true + '/' + list_true[i])
            # pred = prepare_prediction(pred)
            current_score, current_classes_scores = score_single_slice(pred, true, metric)
            
            for j in range(num_of_classes):
                scores_classes_total[j].append(current_classes_scores[j])
            
            score += current_score
            scores.append(current_score)

            # scores.append(current_score)
        # print(scores)
        print(divider)
        print('Global ', metrics_label, ':')
        print("Mean on slices: %.2f +- %.2f" % (np.mean(scores) * 100, np.std(scores) * 100))
        
        
        std_classes = 0
        for i in range(1, num_of_classes):
            std_classes += np.std(scores_classes_total[i])
        
        std_classes /= sum(n_pixel_classes[1:]) 
        
        mean_on_classes = 0
        for i in range(1, num_of_classes):
            mean_on_classes += np.sum(scores_classes_total[i])
            
        mean_on_classes /= sum(n_pixel_classes[1:])
        
        print('Weighted Mean on classes: %.2f +- %.2f' % (mean_on_classes * 100, std_classes * 100))
        print(divider)

        print('Classes ', metrics_label)
        
        for i in range(num_of_classes):
            print('%s: %.2f +- %.2f' % (class_names[i], np.sum(scores_classes_total[i])/n_pixel_classes[i] * 100,
                 np.std(scores_classes_total[i])/n_pixel_classes[i] * 100))
        print(divider)
        

In [18]:
evaluate_results()

---------------------------------------
Global  dice :
Mean on slices: 87.00 +- 6.72
Weighted Mean on classes: 90.14 +- 0.07
---------------------------------------
Classes  dice
background: 98.66 +- 0.01
liver: 90.89 +- 0.09
bladder: 74.88 +- 0.10
lungs: 93.37 +- 0.09
kidneys: 79.09 +- 0.09
bones: 89.54 +- 0.03
---------------------------------------
---------------------------------------
Global  sensitivity :
Mean on slices: 90.16 +- 5.85
Weighted Mean on classes: 92.04 +- 0.07
---------------------------------------
Classes  sensitivity
background: 98.35 +- 0.01
liver: 91.63 +- 0.09
bladder: 79.99 +- 0.10
lungs: 92.21 +- 0.09
kidneys: 82.52 +- 0.09
bones: 95.29 +- 0.03
---------------------------------------
---------------------------------------
Global  specificity :
Mean on slices: 99.15 +- 0.71
Weighted Mean on classes: 99.07 +- 0.07
---------------------------------------
Classes  specificity
background: 93.59 +- 0.01
liver: 98.72 +- 0.09
bladder: 99.43 +- 0.11
lungs: 99.35 +-