In [1]:
import os
import sys
import argparse
import numpy as np
import torch
import pandas as pd
import nibabel as nib    
from pathlib import Path
from surface_distance import metrics
from tqdm import tqdm
import torch.nn as nn
import ants
import json

In [23]:
def pred_invert_resample(path_to_pred, path_to_orig, path_to_resampled):
    # path_to_pred = *npz, path_to_orig = *1_reg, path_to_resampled = *4a_resample
    # *.npz archives sometimes can be recognised wrong, if extracted and saved back at the same time
#     data = np.load(path_to_pred, allow_pickle=True)['arr_0']
    # schw
    if np.shape(data)[0] == 1:
        data = data.transpose(0,3,2,1).astype('float32')
        old_orig = ants.image_read(path_to_resampled)
        new_orig = ants.image_read(path_to_orig)
        old_like = old_orig.new_image_like(data[0])
        new_img = ants.resample_image(old_like, new_orig.spacing, False, 0)
        output_file = new_img.numpy().astype('float16')
    
    # gbm and lgg
    elif np.shape(data)[0] > 1:
        data = data.transpose(0,3,2,1).astype('float32')

        old_orig_ct1 = ants.image_read(path_to_resampled)
        new_orig_ct1 = ants.image_read(path_to_orig)

        old_like_ch_0 = old_orig_ct1.new_image_like(data[0])
        old_like_ch_1 = old_orig_ct1.new_image_like(data[1])
        old_like_ch_2 = old_orig_ct1.new_image_like(data[2])

        new_img_0 = ants.resample_image(old_like_ch_0, new_orig_ct1, False, 0)
        new_img_1 = ants.resample_image(old_like_ch_1, new_orig_ct1, False, 0)
        new_img_2 = ants.resample_image(old_like_ch_2, new_orig_ct1, False, 0)

        new_img_shape =  new_img_2.numpy().shape

        new_array = np.zeros(tuple([3] + list(new_img_shape)), dtype='float16')
        new_array[0] = new_img_0.numpy()
        new_array[1] = new_img_1.numpy()
        new_array[2] = new_img_2.numpy()
        output_file = new_array.transpose(0,3,2,1).astype('float16')
    return (output_file)

def pred_invert_resample_1cl(data, path_to_orig, path_to_resampled):
    # path_to_pred = *npz, path_to_orig = *1_reg, path_to_resampled = *4a_resample
    # *.npz archives sometimes can be recognised wrong, if extracted and saved back at the same time
#     data = np.load(path_to_pred, allow_pickle=True)['arr_0']
    # schw
        data = data.transpose(0,3,2,1).astype('float32')
        old_orig = ants.image_read(path_to_resampled)
        new_orig = ants.image_read(path_to_orig)
        old_like = old_orig.new_image_like(data[0])
        new_img = ants.resample_image(old_like, new_orig.spacing, False, 0)
        output_file = new_img.numpy().astype('float16')
        return (output_file)
    
def pred_invert_resample_classes(data, path_to_orig, path_to_resampled):

    # gbm and lgg
        data = data.transpose(0,3,2,1).astype('float32')
        print(data.shape)
        old_orig_ct1 = ants.image_read(path_to_resampled)
        new_orig_ct1 = ants.image_read(path_to_orig)
        print(old_orig_ct1.shape)
        print(new_orig_ct1.shape)
        old_like_ch_0 = old_orig_ct1.new_image_like(data[0])
        old_like_ch_1 = old_orig_ct1.new_image_like(data[1])
        old_like_ch_2 = old_orig_ct1.new_image_like(data[2])
        print(old_like_ch_2.shape)
        print('old_like')
        print(old_like_ch_2.shape)
        new_img_0 = ants.resample_image(old_like_ch_0, new_orig_ct1.spacing, False, 0)
        new_img_1 = ants.resample_image(old_like_ch_1, new_orig_ct1.spacing, False, 0)
        new_img_2 = ants.resample_image(old_like_ch_2, new_orig_ct1.spacing, False, 0)
        print('resample')
        print(new_img_2.shape)
        new_img_shape =  new_img_2.numpy().shape

        new_array = np.zeros(tuple([3] + list(new_img_shape)), dtype='float16')
        new_array[0] = new_img_0.numpy()
        new_array[1] = new_img_1.numpy()
        new_array[2] = new_img_2.numpy()
        output_file = new_array.astype('float16')
        
#         temp_arr[:img_orig.shape[0],:img_orig.shape[1],:img_orig.shape[2]]
        
        print(output_file.shape)
        return (output_file)

In [24]:
def sensitivity_and_specificity(mask_gt, mask_pred):
    """ Computes sensitivity and specificity
     sensitivity  = TP/(TP+FN)
     specificity  = TN/(TN+FP) """
    volume_sum = mask_gt.sum() + mask_pred.sum()
    tp = (mask_gt & mask_pred).sum()
    tn = (~mask_gt & ~mask_pred).sum()
    fp = (~mask_gt & mask_pred).sum()
    fn = (mask_gt & ~mask_pred).sum()
#     TP/(TP+FP) - precision; TP/(TP+FN) - recall
    return tp/(tp+fn), tp/(tp+fp), tn/(tn+fp)

In [25]:
def calculate_metrics_brats_1cl(true_mask, pred_mask, ids, spaces):
    """ Takes two file locations as input and validates surface distances.
    Be careful with dimensions of saved `pred` it should be 3D.
    
    """
    
    _columns = ['Ids','Dice_1'
                'Hausdorff95_1',
                'Sensitivity_1',
               'Specificity_1',
               'Surface_dice_1',
               'Precision_1']
    
    df = pd.DataFrame(columns = _columns)
    df.at[0,'Ids'] = ids
    #class 1
    distances = metrics.compute_surface_distances((true_mask==1), (pred_mask==1), spaces)
    df.at[0,'Dice_1'] = metrics.compute_dice_coefficient((true_mask==1), (pred_mask==1))
    df.at[0,'Surface_dice_1'] = metrics.compute_surface_dice_at_tolerance(distances,1)
    df.at[0,'Hausdorff95_1'] = metrics.compute_robust_hausdorff(distances, 95)
    sens, precision, spec = sensitivity_and_specificity((true_mask==1), (pred_mask==1))
    df.at[0,'Sensitivity_1'] = sens
    df.at[0,'Precision_1'] = precision
    df.at[0,'Specificity_1'] = spec
    return df

def calculate_metrics_brats(true_mask, pred_mask, ids, spaces):
    """ Takes two file locations as input and validates surface distances.
    Be careful with dimensions of saved `pred` it should be 3D.
    
    """
    
    _columns = ['Ids','Dice_1', 'Dice_2', 'Dice_3',
                'Hausdorff95_1', 'Hausdorff95_2', 'Hausdorff95_3',
                'Sensitivity_1', 'Sensitivity_2', 'Sensitivity_3',
               'Specificity_1', 'Specificity_2', 'Specificity_3',
               'Surface_dice_1', 'Surface_dice_2', 'Surface_dice_3',
               'Precision_1', 'Precision_2', 'Precision_3']
    
    
    df = pd.DataFrame(columns = _columns)
    df.at[0,'Ids'] = ids
    #class 1
    distances = metrics.compute_surface_distances((true_mask[0,:,:,:]==1), (pred_mask[0,:,:,:]==1), spaces)
    df.at[0,'Dice_1'] = metrics.compute_dice_coefficient((true_mask[0,:,:,:]==1), (pred_mask[0,:,:,:]==1))
    df.at[0,'Surface_dice_1'] = metrics.compute_surface_dice_at_tolerance(distances,1)
    df.at[0,'Hausdorff95_1'] = metrics.compute_robust_hausdorff(distances, 95)
    sens, precision, spec = sensitivity_and_specificity((true_mask[0,:,:,:]==1), (pred_mask[0,:,:,:]==1))
    df.at[0,'Sensitivity_1'] = sens
    df.at[0,'Precision_1'] = precision
    df.at[0,'Specificity_1'] = spec
    #class 2
    distances = metrics.compute_surface_distances((true_mask[1,:,:,:]==1), (pred_mask[1,:,:,:]==1), spaces)
    df.at[0,'Dice_2'] = metrics.compute_dice_coefficient((true_mask[1,:,:,:]==1), (pred_mask[1,:,:,:]==1))
    df.at[0,'Surface_dice_2'] = metrics.compute_surface_dice_at_tolerance(distances,1)
    df.at[0,'Hausdorff95_2'] = metrics.compute_robust_hausdorff(distances, 95)
    sens,precision, spec= sensitivity_and_specificity((true_mask[1,:,:,:]==1), (pred_mask[1,:,:,:]==1))
    df.at[0,'Sensitivity_2'] = sens
    df.at[0,'Precision_2'] = precision
    df.at[0,'Specificity_2'] = spec
    #class 3
    distances = metrics.compute_surface_distances((true_mask[2,:,:,:]==1), (pred_mask[2,:,:,:]==1), spaces)
    df.at[0,'Dice_3'] = metrics.compute_dice_coefficient((true_mask[2,:,:,:]==1), (pred_mask[2,:,:,:]==1))
    df.at[0,'Surface_dice_3'] = metrics.compute_surface_dice_at_tolerance(distances,1)
    df.at[0,'Hausdorff95_3'] = metrics.compute_robust_hausdorff(distances, 95)
    sens, precision, spec= sensitivity_and_specificity((true_mask[2,:,:,:]==1), (pred_mask[2,:,:,:]==1))
    df.at[0,'Sensitivity_3'] = sens
    df.at[0,'Precision_3'] = precision
    df.at[0,'Specificity_3'] = spec
    return df

In [26]:
def calculate_metrics(subjects, path_to_orig, path_to_pred, path_to_resamp, path_to_target, dataset, out = '/home/polina/glioma/all_dice_metrics.csv'  ):
    
    """ 
    - path_to_pred - path to folder with predict subjects
    - path_to_target - path to folder with target subjects
    - name_pred - name for prediction, ex -brainTumorMask_SRI.nii.gz
    - name_target - name for targets, ex -GTV_to_SRI.nii.gz
    - spaces - if false - [1,1,1]
    - name_csv - name files for each subjects
    - path_csv_all - path to the main file with metrics for each subjects
    """
    _columns = ['Ids','Dice_1', 'Dice_2', 'Dice_3',
                'Hausdorff95_1', 'Hausdorff95_2', 'Hausdorff95_3',
                'Sensitivity_1', 'Sensitivity_2', 'Sensitivity_3',
               'Specificity_1', 'Specificity_2', 'Specificity_3',
               'Surface_dice_1', 'Surface_dice_2', 'Surface_dice_3',
               'Precision_1', 'Precision_2', 'Precision_3']
#     _columns = ['Ids','Dice_1'
#                 'Hausdorff95_1',
#                 'Sensitivity_1',
#                'Specificity_1',
#                'Surface_dice_1',
#                'Precision_1']
    
    af_all = pd.DataFrame(columns = _columns)
    pred_folder = Path(path_to_pred)
    orig_folder = Path(path_to_orig)
    resamp_folder = Path(path_to_resamp)
    target_folder = Path(path_to_target)
    for ids in tqdm(subjects):
        print(ids)
        pred_sub = os.path.join(pred_folder, ids + '.npy.npz')
        orig_sub = os.path.join(orig_folder, ids,'CT1_SEG.nii.gz')
        resamp_sub = os.path.join(resamp_folder, ids,'CT1_SEG.nii.gz')
#         targets = nib.load(os.path.join(target_folder, ids + '_seg.nii.gz'))
#         targets = ants.image_read(orig_sub)
#         targets = ants.reorient_image2(label_orig, orientation = 'LAI')
#         targets = nib.load(orig_sub)
#         spaces = targets.header.get_zooms()
#         spaces = targets.spacing
#         targets = targets.numpy().astype('int')
        targets = ants.image_read(f'{target_folder}/{ids}/CT1_SEG.nii.gz')
        spaces = targets.spacing
#         targets = ants.reorient_image2(targets, orientation = 'LAI').numpy()
        targets = targets.numpy()
#         print(targets.shape)
#         print(np.unique(targets))
        data = np.load(pred_sub, allow_pickle=True)['arr_0']
#         print(data.shape)
        if np.shape(data)[0] == 1: 
            prediction = pred_invert_resample_1cl(data, orig_sub, resamp_sub)
            prediction = np.round(prediction, 0)
#         pred = np.transpose(pred, (0, 3, 2, 1))
#             print(prediction.shape)
#             print(np.unique(prediction))
            df = calculate_metrics_brats_1cl(targets.astype('int'), prediction.astype('int'), ids, spaces)
#             print(df)
        elif np.shape(data)[0] > 1: 
            prediction = pred_invert_resample_classes(data, orig_sub, resamp_sub)
            prediction = np.round(prediction, 0)
            y_wt, y_tc, y_et = targets > 0, ((targets == 1) + (targets == 3)) > 0, targets == 3
            targets = np.stack([y_wt, y_tc, y_et], axis=0).astype(int)
            print(targets.shape)
            print(prediction.shape)
            df=calculate_metrics_brats(targets.astype('int'), prediction.astype('int'), ids, spaces)
#             except:
#                 continue
#         os.makedirs(os.path.join(out, dataset,ids), exist_ok = True)
#         out_path = os.path.join(out, dataset,ids, path_to_pred.split('/')[-2] + '_'+ path_to_pred.split('best_')[-1].replace('=', '_') + '.json')
#        #         df.to_json(out_path, orient='records')
#         sub_dict = {dataset : {path_to_pred.split('/')[-2]: {path_to_pred.split('_')[-2]: {}}}}
#         sub_dict[dataset][path_to_pred.split('/')[-2]][path_to_pred.split('_')[-2]] = df.to_dict('records')[0]
#         print(sub_dict)
#         with open(out_path, 'w') as fp:
#             json.dump(sub_dict, fp)
#         print(df)
        af_all = af_all.append(df)
#     af_all.to_csv(out)  
    print(af_all.mean())
    print(len(af_all))

In [27]:
# Гипотеза первая, проблeма в ориентации 4a_resamp относительно

In [28]:
orig_folder = '/anvar/public_datasets/preproc_study/gbm/orig'
resample_folder = '/anvar/public_datasets/preproc_study/gbm/backlog/4a_resamp'
pred_folder = '/anvar/public_datasets/preproc_study/gbm/infer/gbm_4a_resamp_wo_z/predictions_best_epoch=146-dice_mean=76_62_task=43_fold=0_tta'
target_folder = '/anvar/public_datasets/preproc_study/gbm/1_reg'
reg_1 = '/anvar/public_datasets/preproc_study/gbm/1_reg'
out_json = '/results/metrics'
dataset = 'gbm'

subjects = [each[:-8] for each in os.listdir(pred_folder)]
# ['TCGA-06-0179','TCGA-76-6661']
calculate_metrics(subjects, reg_1, pred_folder, resample_folder, target_folder, dataset, out = out_json)

  0%|          | 0/102 [00:00<?, ?it/s]

TCGA-08-0392
(3, 260, 260, 186)
(260, 260, 186)
(256, 256, 124)
(260, 260, 186)
old_like
(260, 260, 186)
resample
(256, 256, 124)
(3, 256, 256, 124)
(3, 256, 256, 124)
(3, 256, 256, 124)


  1%|          | 1/102 [00:07<13:26,  7.98s/it]

TCGA-76-6664
(3, 220, 220, 154)
(220, 220, 154)
(256, 256, 22)
(220, 220, 154)
old_like
(220, 220, 154)
resample
(256, 256, 22)
(3, 256, 256, 22)
(3, 256, 256, 22)
(3, 256, 256, 22)


  2%|▏         | 2/102 [00:10<10:34,  6.35s/it]

TCGA-02-0085
(3, 200, 200, 143)
(200, 200, 143)
(256, 256, 22)
(200, 200, 143)
old_like
(200, 200, 143)
resample
(256, 256, 22)
(3, 256, 256, 22)
(3, 256, 256, 22)
(3, 256, 256, 22)


  3%|▎         | 3/102 [00:12<08:24,  5.09s/it]

TCGA-14-1794
(3, 230, 230, 150)
(230, 230, 150)
(512, 512, 25)
(230, 230, 150)
old_like
(230, 230, 150)
resample
(512, 512, 25)
(3, 512, 512, 25)
(3, 512, 512, 25)
(3, 512, 512, 25)


  4%|▍         | 4/102 [00:18<08:48,  5.40s/it]

TCGA-19-5958
(3, 256, 256, 192)
(256, 256, 192)
(256, 256, 192)
(256, 256, 192)
old_like
(256, 256, 192)
resample
(256, 256, 192)
(3, 256, 256, 192)
(3, 256, 256, 192)
(3, 256, 256, 192)


  5%|▍         | 5/102 [00:28<10:53,  6.74s/it]

TCGA-76-6656
(3, 230, 230, 150)
(230, 230, 150)
(320, 320, 30)
(230, 230, 150)
old_like
(230, 230, 150)
resample
(320, 320, 30)
(3, 320, 320, 30)
(3, 320, 320, 30)
(3, 320, 320, 30)


  6%|▌         | 6/102 [00:32<09:10,  5.74s/it]

TCGA-02-0070
(3, 220, 220, 156)
(220, 220, 156)
(256, 256, 24)
(220, 220, 156)
old_like
(220, 220, 156)
resample
(256, 256, 24)
(3, 256, 256, 24)
(3, 256, 256, 24)
(3, 256, 256, 24)


  7%|▋         | 7/102 [00:34<07:35,  4.79s/it]

TCGA-76-4934
(3, 230, 230, 160)
(230, 230, 160)
(320, 320, 32)
(230, 230, 160)
old_like
(230, 230, 160)
resample
(320, 320, 32)
(3, 320, 320, 32)
(3, 320, 320, 32)
(3, 320, 320, 32)


  8%|▊         | 8/102 [00:38<07:00,  4.48s/it]

TCGA-76-6662
(3, 240, 240, 168)
(240, 240, 168)
(320, 320, 24)
(240, 240, 168)
old_like
(240, 240, 168)
resample
(320, 320, 24)
(3, 320, 320, 24)
(3, 320, 320, 24)
(3, 320, 320, 24)


  9%|▉         | 9/102 [00:41<06:29,  4.18s/it]

TCGA-12-1094
(3, 240, 240, 143)
(240, 240, 143)
(256, 256, 22)
(240, 240, 143)
old_like
(240, 240, 143)
resample
(256, 256, 22)
(3, 256, 256, 22)
(3, 256, 256, 22)
(3, 256, 256, 22)


 10%|▉         | 10/102 [00:44<05:47,  3.77s/it]

TCGA-76-6663
(3, 220, 220, 165)
(220, 220, 165)
(288, 288, 22)
(220, 220, 165)
old_like
(220, 220, 165)
resample
(288, 288, 22)
(3, 288, 288, 22)
(3, 288, 288, 22)
(3, 288, 288, 22)


 11%|█         | 11/102 [00:47<05:13,  3.44s/it]

TCGA-06-5408
(3, 240, 240, 170)
(240, 240, 170)
(256, 256, 68)
(240, 240, 170)
old_like
(240, 240, 170)
resample
(256, 256, 68)
(3, 256, 256, 68)
(3, 256, 256, 68)
(3, 256, 256, 68)


 12%|█▏        | 12/102 [00:52<05:56,  3.96s/it]

TCGA-19-5951
(3, 173, 230, 140)
(173, 230, 140)
(240, 320, 31)
(173, 230, 140)
old_like
(173, 230, 140)
resample
(241, 320, 31)


 12%|█▏        | 12/102 [00:54<06:50,  4.56s/it]

(3, 241, 320, 31)
(3, 240, 320, 31)
(3, 241, 320, 31)





ValueError: operands could not be broadcast together with shapes (240,320,31) (241,320,31) 

In [57]:
!cd /data_anvar/public_datasets/preproc_study/gbm/1_reg/TCGA-19-5954 && ls

CT1.nii.gz  CT1_SEG.nii.gz  FLAIR.nii.gz  T1.nii.gz  T2.nii.gz


In [241]:
out_json = '/results/metrics/schw/'
dataset = 'schw'
task = 'schw_4a_resamp'
fold = 'fold_0'
dice = []
hausdorff = []
sensitivity = []
specificity = []
surface_dice = []
precision = []
_columns = ['Ids','Dice_1'
                'Hausdorff95_1',
                'Sensitivity_1',
               'Specificity_1',
               'Surface_dice_1',
               'Precision_1'] 
metrics_all = pd.DataFrame(columns = _columns)
for sub in os.listdir(out_json):
    for sub_t in os.listdir(os.path.join(out_json, sub)):
        if task in sub_t:
            if fold in sub_t:
                print(os.path.join(out_json, sub, sub_t))
                metrics = json.load(open(os.path.join(out_json, sub, sub_t)))
                metrics = metrics[dataset][task][fold.replace('_', '=')]
                for each in metrics:
                    metrics_all[each]
                    
                af_all = af_all.append(df)
                dice.append(metrics['Dice_1'])
                hausdorff.append(metrics['Hausdorff95_1'])
                sensitivity.append(metrics['Sensitivity_1'])
                specificity.append(metrics['Specificity_1'])
                surface_dice.append(metrics['Surface_dice_1'])
                precision.append(metrics['Precision_1'])


/results/metrics/schw/VS-SEG-191/schw_4a_resamp_epoch_88-dice_mean_89_45_task_39_fold_0_tta.json
/results/metrics/schw/VS-SEG-108/schw_4a_resamp_epoch_88-dice_mean_89_45_task_39_fold_0_tta.json
/results/metrics/schw/VS-SEG-073/schw_4a_resamp_epoch_88-dice_mean_89_45_task_39_fold_0_tta.json
/results/metrics/schw/VS-SEG-050/schw_4a_resamp_epoch_88-dice_mean_89_45_task_39_fold_0_tta.json
/results/metrics/schw/VS-SEG-062/schw_4a_resamp_epoch_88-dice_mean_89_45_task_39_fold_0_tta.json
/results/metrics/schw/VS-SEG-031/schw_4a_resamp_epoch_88-dice_mean_89_45_task_39_fold_0_tta.json
/results/metrics/schw/VS-SEG-096/schw_4a_resamp_epoch_88-dice_mean_89_45_task_39_fold_0_tta.json
/results/metrics/schw/VS-SEG-111/schw_4a_resamp_epoch_88-dice_mean_89_45_task_39_fold_0_tta.json
/results/metrics/schw/VS-SEG-166/schw_4a_resamp_epoch_88-dice_mean_89_45_task_39_fold_0_tta.json
/results/metrics/schw/VS-SEG-021/schw_4a_resamp_epoch_88-dice_mean_89_45_task_39_fold_0_tta.json
/results/metrics/schw/VS-SEG-0

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 20: invalid start byte