# Test `totalsegmentator` usage / evaluation

**Requires SimpleITK <= 2.1.0, working with 2.0.2**

## Predictions on dataset

In [None]:
import os
import torch

from tqdm import tqdm
from totalsegmentator.python_api import totalsegmentator

from datasets import get_dataset

os.environ['CUDA_VISIBLE_DEVICES'] = ''

dataset = get_dataset('total_segmentator')

models  = ['totalsegmentator_lowres', 'totalsegmentator_fullres']

for model in models:
    for _, row in tqdm(dataset.iterrows(), total = len(dataset)):
        output_file = os.path.join('evaluations', model, 'segmentations', row['images'].split('/')[-2] + '.nii.gz')
        if not os.path.exists(output_file):
            res = totalsegmentator(
                row['images'],
                output  = output_file,
                ml      = True,
                fast    = True if 'lowres' in model else False,
                verbose = False,
                quiet   = True
            )
print(res)

2023-06-02 16:27:11.716382: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-02 16:27:11.831257: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-06-02 16:27:11.855172: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Loading dataset total_segmentator...


100%|█████████████████████████████████████████████████████████████████████████████| 1203/1203 [00:00<00:00, 1771.29it/s]
  0%|                                                                                          | 0/1203 [00:00<?, ?it/s]

No GPU detected. Running on CPU. This can be very slow. The '--fast' option can help to some extend.
Predicting part 1 of 5 ...




Predicting part 2 of 5 ...
Predicting part 3 of 5 ...
Predicting part 4 of 5 ...
Predicting part 5 of 5 ...


  1%|█                                                                              | 16/1203 [03:36<4:28:03, 13.55s/it]

No GPU detected. Running on CPU. This can be very slow. The '--fast' option can help to some extend.
Predicting part 1 of 5 ...


## Evaluation

In [None]:
import os
import importlib
import numpy as np
import pandas as pd
import tensorflow as tf

from tqdm import tqdm
from sklearn.metrics import multilabel_confusion_matrix

from utils import plot_utils
from utils import load_json, dump_json, plot, plot_multiple, plot_volume
from datasets import get_dataset, train_test_split
from utils.med_utils import TOTALSEGMENTATOR_LABELS, load_medical_seg, transform_mask

tf.config.set_visible_devices(tf.config.list_physical_devices('GPU')[1:], 'GPU')

models       = ['totalsegmentator_lowres', 'totalsegmentator_fullres']
dataset_name = 'total_segmentator'

dataset = get_dataset(dataset_name)

keep_mask = dataset['segmentation'].apply(lambda f: f.endswith('.npz'))
skipped   = dataset[~keep_mask]
dataset   = dataset[keep_mask]

if isinstance(dataset, dict):
    train, valid = dataset['train'], dataset['valid']
else:
    train, valid = train_test_split(
        dataset, train_size = 0.9, shuffle = True, random_state = 10, split_by_unique = True, min_occurence = 0
    )
    #valid = pd.concat([valid, skipped], axis = 0)
    

print('Dataset length ({} data skipped, {} ids) :\n  Train size : {} ({} ids)\n  Valid size : {} ({} ids)'.format(
    len(keep_mask) - np.sum(keep_mask.values), len(dataset['id'].unique()), 
    len(train), len(train['id'].unique()), len(valid), len(valid['id'].unique())
))
print('# ids in valid that are also in train : {}'.format(len([id_i for id_i in valid['id'].unique() if id_i in train['id'].values])))

In [None]:
def compute_confusion_matrix(y_true, y_pred, labels):
    y_true = transform_mask(y_true, 'dense', is_one_hot = len(y_true.shape) == 4)
    y_pred = transform_mask(y_pred, 'dense', is_one_hot = len(y_pred.shape) == 4)
    
    if hasattr(y_true, 'numpy'): y_true = y_true.numpy()
    if hasattr(y_pred, 'numpy'): y_pred = y_pred.numpy()

    cm = multilabel_confusion_matrix(y_true.reshape([-1]), y_pred.reshape([-1]))
    return {
        label : {
            'tp' : cm[i, 1, 1], 'fp' : cm[i, 0, 1], 'fn' : cm[i, 1, 0], 'tn' : cm[i, 0, 0] 
        } for i, label in enumerate(labels) if i < len(cm)
    }

def compute_metrics(metrics, metric_name, ids = None):
    results = {}
    for subj_id, infos in metrics.items():
        if ids and subj_id not in ids: continue
        for c, cm in infos.items():
            if c in (None, 'null'): c = 'background'
            results.setdefault(c, []).append(_metrics_methods[metric_name](** cm))
    
    return {c : [vi for vi in v if vi is not None] for c, v in results.items()}

def dice_coeff(tp, fp, fn, tn):
    if tp + fn + fp == 0: return None
    inter = tp
    union = 2 * tp + fp + fn
    return 2. * inter / max(1, union)

_metrics_methods = {
    'dice' : dice_coeff
}

importlib.reload(plot_utils)

samples   = dataset
overwrite = False

all_results = {}
for model in models:
    results_file = os.path.join('evaluations', model, 'results.json')
    results = load_json(results_file, default = {}) if not overwrite else {}

    for idx, row in tqdm(samples.iterrows(), total = len(samples)):
        subject = row['images'].split('/')[-2]
        if subject in results: continue
        
        true, _ = load_medical_seg(row['segmentation'], mask_labels = row['label'], mapping = TOTALSEGMENTATOR_LABELS)
        pred, _ = load_medical_seg(os.path.join('evaluations', model, 'segmentations', '{}.nii.gz'.format(subject)))
        if pred.dtype in (np.float32, np.float64): pred = pred.astype(np.int32)
        
        #plot_volume(true, strides = 3)
        #plot_volume(pred, strides = 3)
            
        results[subject] = compute_confusion_matrix(true, pred, labels = TOTALSEGMENTATOR_LABELS)
        
        dump_json(results_file, results, indent = 4)
    
    all_results[model] = results

formatted = {
    k : compute_metrics(v, 'dice') for k, v in all_results.items()
}

formatted = {
    '\n{} (avg : {:.2f} %)'.format(k, np.mean([np.mean(vi) for vi in v.values()])) : {'x' : v}
    for k, v in formatted.items()
}
plot_utils.plot_multiple(
    ** formatted,
    plot_type = 'boxplot', ncols = 1, x_size = 10, y_size = 15, ytick_rotation = 10, vert = False,
    xlabel = 'score', ylabel = 'organ', title = 'Dice score', use_subplots = True
)