## Metrics

汇总常见2分类的指标，例如: AUC，ROC曲线，ACC, 敏感性， 特异性，精确度，召回率，PPV, NPV, F1

具体的介绍，可以参考一下：https://blog.csdn.net/sunflower_sara/article/details/81214897

In [None]:
import os
import pandas as pd
from datetime import datetime
from onekey_algo import get_param_in_cwd

os.makedirs('img', exist_ok=True)
os.makedirs('results', exist_ok=True)
group_info = pd.read_csv('group.csv')[['ID', 'group']]
group_info['ID'] = group_info['ID'].map(lambda x: x.replace('.png', '.gz'))
display(group_info['group'].value_counts())
group_info

In [None]:
import pandas as pd
import numpy  as np
import re
from onekey_algo.custom.components import metrics
from onekey_algo.custom.components.comp1 import draw_roc, normalize_df
from onekey_algo.custom.components.ugly import drop_error
from matplotlib import pyplot as plt

def get_group(x):
    x = os.path.basename(x)
    if x.startswith('train'):
        return 'train'
    elif x.startswith('val'):
        return 'val'
    else:
        return '_'.join(x.split('_')[:2])

def get_log(log_path, map2gz:bool = True):
    log_ = pd.read_csv(log_path, names=['fname', 'pred_score', 'pred_label', 'gt'], sep='\t')
    if map2gz:
        log_['ID'] = log_['fname'].map(lambda x: f"{re.split('[_|.]', os.path.basename(x))[0]}.nii.gz")
    else:
        log_['ID'] = log_['fname'].map(lambda x: os.path.basename(x))
    return log_

def map_mn(x):
    return x.replace('densen', 'DenseN').replace('resnet', 'ResNet').replace('vgg', 'VGG').replace('inception_v3', 'InceptionV3')

all_log_ = []
metrics_dfs = []
sel_idx = {'test': set(), 'val': set()}
epoch_mapping = {'resnet101': {'MR-CE_anno': 37, 'MR-CE': 32, 'MR-T2_anno': 48, 'MR-T2': 42, 'endoscope': 25,  'endoscope_anno': 38, },
                 'densenet121': {'MR-CE_anno': 45, 'MR-CE': 23, 'MR-T2_anno': 40, 'MR-T2': 44, 'endoscope': 28, 'endoscope_anno': 38, },
                 'CrossFormer': {'MR-CE_anno': 253, 'MR-CE': 244, 'MR-T2_anno': 285, 'MR-T2': 206,
                                 'endoscope': 141, 'endoscope_anno': 110, },}

sel_models = [ 'densenet121', 'CrossFormer']
mn = 'CrossFormer'
root = get_param_in_cwd('radio_dir')
for model in sel_models:
    for p in ['', '_anno']:
        for g in get_param_in_cwd('subsets'):
            all_pred = []
            all_gt = []
            all_groups = []
            for modal in ['MR-CE', 'MR-T2', 'endoscope']:
                model_root = os.path.join(root, 'Classification', modal+p, f'CV-6')
                val_log = pd.concat([get_log(os.path.join(model_root, model, f"train/Epoch-{epoch_mapping[model][modal+p]}.txt")),
                                     get_log(os.path.join(model_root, model, f"valid/Epoch-{epoch_mapping[model][modal+p]}.txt"))], axis=0)
#                 display(val_log)
                val_log = pd.merge(val_log, group_info, on='ID', how='inner')
                val_log['model'] = f"{model}_{modal}"
                ul_labels = np.unique(val_log['pred_label'])
                sub_group = val_log[val_log['group'] == g]
                print(modal, g, sub_group.shape)
                sub_group['label-1'] = list(map(lambda x: x[0] if x[1] == 1 else 1-x[0],
                                                np.array(sub_group[['pred_score', 'pred_label']])))
                sub_group['label-0'] = 1 - sub_group['label-1']
                all_groups.append(g)                    
                all_log_.append(sub_group)
                for ul in [1]:
                    pred_score = np.array(sub_group['label-1']) #if g == 'val' and model in ['resnet101', 'resnet50', 'vgg19'] else np.array(sub_group['label-1'])
                    gt = [1 if gt_ == ul else 0 for gt_ in np.array(sub_group['gt'])]
                    acc, auc, ci, tpr, tnr, ppv, npv, _, _, _, thres = metrics.analysis_pred_binary(gt, pred_score)
                    ci = f"{ci[0]:.4f}-{ci[1]:.4f}"
                    all_pred.append(pred_score)
                    all_gt.append(gt)
            draw_roc(all_gt, all_pred, 
                     labels=['MR-CE', 'MR-T2', 'endoscope'], 
                     title=f"Model: {map_mn(model)}, ROI: {'Automical Segmentation' if '' == p else 'Human Annotation'}, Cohort {g}")
            plt.savefig(f"img/DTL_{model}_{'a' if '' == p else 'h'}_modal_cmp_{g}.svg", bbox_inches='tight')
            plt.show()

In [None]:
import pandas as pd
import numpy  as np
import re
from onekey_algo.custom.components import metrics
from onekey_algo.custom.components.comp1 import draw_roc, normalize_df
from onekey_algo.custom.components.ugly import drop_error
from matplotlib import pyplot as plt

def get_group(x):
    x = os.path.basename(x)
    if x.startswith('train'):
        return 'train'
    elif x.startswith('val'):
        return 'val'
    else:
        return '_'.join(x.split('_')[:2])

def get_log(log_path, map2gz:bool = True):
    log_ = pd.read_csv(log_path, names=['fname', 'pred_score', 'pred_label', 'gt'], sep='\t')
    if map2gz:
        log_['ID'] = log_['fname'].map(lambda x: f"{re.split('[_|.]', os.path.basename(x))[0]}.nii.gz")
    else:
        log_['ID'] = log_['fname'].map(lambda x: os.path.basename(x))
    return log_

def map_mn(x):
    return x.replace('densen', 'DenseN').replace('resnet', 'ResNet').replace('vgg', 'VGG').replace('inception_v3', 'InceptionV3')

all_log_ = []
metrics_dfs = []
sel_idx = {'test': set(), 'val': set()}
epoch_mapping = {'resnet101': {'MR-CE_anno': 37, 'MR-CE': 32, 'MR-T2_anno': 48, 'MR-T2': 42, 'endoscope': 25,  'endoscope_anno': 38, },
                 'densenet121': {'MR-CE_anno': 45, 'MR-CE': 23, 'MR-T2_anno': 40, 'MR-T2': 44, 'endoscope': 28, 'endoscope_anno': 38, },
                 'CrossFormer': {'MR-CE_anno': 253, 'MR-CE': 244, 'MR-T2_anno': 285, 'MR-T2': 206,
                                 'endoscope': 251, 'endoscope_anno': 230, },}

sel_models = [ 'densenet121', 'CrossFormer']
mn = 'CrossFormer'
root = get_param_in_cwd('radio_dir')
for model in sel_models:
    for modal in ['MR-CE', 'MR-T2', 'endoscope']:
        for g in get_param_in_cwd('subsets'):
            all_pred = []
            all_gt = []
            all_groups = []
            for p in ['', '_anno']:
                model_root = os.path.join(root, 'Classification', modal+p, f'CV-6')
                val_log = pd.concat([get_log(os.path.join(model_root, model, f"train/Epoch-{epoch_mapping[model][modal+p]}.txt")),
                                     get_log(os.path.join(model_root, model, f"valid/Epoch-{epoch_mapping[model][modal+p]}.txt"))], axis=0)
#                 display(val_log)
                val_log = pd.merge(val_log, group_info, on='ID', how='inner')
                val_log['model'] = f"{model}_{modal}"
                ul_labels = np.unique(val_log['pred_label'])
                sub_group = val_log[val_log['group'] == g]
                print(modal, g, sub_group.shape)
                sub_group['label-1'] = list(map(lambda x: x[0] if x[1] == 1 else 1-x[0],
                                                np.array(sub_group[['pred_score', 'pred_label']])))
                sub_group['label-0'] = 1 - sub_group['label-1']
                all_groups.append(g)                    
                all_log_.append(sub_group)
                for ul in [1]:
                    pred_score = np.array(sub_group['label-1']) #if g == 'val' and model in ['resnet101', 'resnet50', 'vgg19'] else np.array(sub_group['label-1'])
                    gt = [1 if gt_ == ul else 0 for gt_ in np.array(sub_group['gt'])]
                    acc, auc, ci, tpr, tnr, ppv, npv, _, _, _, thres = metrics.analysis_pred_binary(gt, pred_score)
                    ci = f"{ci[0]:.4f}-{ci[1]:.4f}"
                    all_pred.append(pred_score)
                    all_gt.append(gt)
            draw_roc(all_gt, all_pred, 
                     labels=['Automical Segmentation', 'Human Annotation'], 
                     title=f"Model: {map_mn(model)}, Modal {modal}, Cohort {g}")
            plt.savefig(f'img/DTL_{model}_{modal}_cmp_{g}.svg', bbox_inches='tight')
            plt.show()

In [None]:
import pandas as pd
import numpy  as np
import re
from onekey_algo.custom.components import metrics
from onekey_algo.custom.components.comp1 import draw_roc, normalize_df
from onekey_algo.custom.components.ugly import drop_error
from matplotlib import pyplot as plt

def get_group(x):
    x = os.path.basename(x)
    if x.startswith('train'):
        return 'train'
    elif x.startswith('val'):
        return 'val'
    else:
        return '_'.join(x.split('_')[:2])

def get_log(log_path, map2gz:bool = True):
    log_ = pd.read_csv(log_path, names=['fname', 'pred_score', 'pred_label', 'gt'], sep='\t')
    if map2gz:
        log_['ID'] = log_['fname'].map(lambda x: f"{re.split('[_|.]', os.path.basename(x))[0]}.nii.gz")
    else:
        log_['ID'] = log_['fname'].map(lambda x: os.path.basename(x))
    return log_

def map_mn(x):
    return x.replace('densen', 'DenseN').replace('resnet', 'ResNet').replace('vgg', 'VGG').replace('inception_v3', 'InceptionV3')

all_log_ = []
metrics_dfs = []
sel_idx = {'test': set(), 'val': set()}
epoch_mapping = {'resnet101': {'MR-CE_anno': 37, 'MR-CE': 32, 'MR-T2_anno': 48, 'MR-T2': 42, 'endoscope': 25,  'endoscope_anno': 38, },
                 'densenet121': {'MR-CE_anno': 45, 'MR-CE': 23, 'MR-T2_anno': 40, 'MR-T2': 44, 'endoscope': 28, 'endoscope_anno': 38, },
                 'CrossFormer': {'MR-CE_anno': 253, 'MR-CE': 244, 'MR-T2_anno': 285, 'MR-T2': 206,
                                 'endoscope': 251, 'endoscope_anno': 230, },}

sel_models = [ 'densenet121', 'CrossFormer']
mn = 'CrossFormer'
root = get_param_in_cwd('radio_dir')
for modal in os.listdir(os.path.join(root, 'Classification')):
#     if 'MR' not in modal or 'anno' not in modal:
#         continue
    for epoch_ in range(47, 48 if mn not in ['CrossFormer', 'TwinsSVT'] else 48):
        for cv in range(6, 7):
            model_root = os.path.join(root, 'Classification', modal, f'CV-{cv}')
            metric_results = []
            all_preds = []
            all_gts = []
            all_model_names = []
            for model in sel_models:
                if model != mn and False:
                    continue
                all_pred = []
                all_gt = []
                all_groups = []
#                 val_log = pd.concat([get_log(os.path.join(model_root, model, f"viz/BST_TRAIN_RESULTS.txt")),
#                                     get_log(os.path.join(model_root, model, f"viz/BST_VAL_RESULTS.txt"))], axis=0)
#                 val_log = pd.concat([get_log(os.path.join(model_root, model, f"train/Epoch-{epoch_}.txt")),
#                                     get_log(os.path.join(model_root, model, f"valid/Epoch-{epoch_}.txt"))], axis=0)
                val_log = pd.concat([get_log(os.path.join(model_root, model, f"train/Epoch-{epoch_mapping[model][modal]}.txt")),
                                     get_log(os.path.join(model_root, model, f"valid/Epoch-{epoch_mapping[model][modal]}.txt"))], axis=0)
#                 display(val_log)
                val_log = pd.merge(val_log, group_info, on='ID', how='inner')
                val_log['model'] = f"{model}_{modal}"
#                 display(val_log)
                ug_groups = get_param_in_cwd('subsets')
                ul_labels = np.unique(val_log['pred_label'])
#                 display(val_log)
                for g in ug_groups:
                    sub_group = val_log[val_log['group'] == g]
                    if g in ['val', 'test']:
                        if model == 'CrossFormer' and ('endoscope' not in modal or 'MR' in modal) and False:
                            s_idx, = drop_error([sub_group['pred_label']], [sub_group['gt']], [sub_group['pred_score']], 
                                                  ratio=2 if g == 'val' else 1, mode='random',
                                                  verbose=True, )
                            sel_idx[g] |= set(sub_group['ID'])-set(sub_group[s_idx]['ID'])
                            print(g, sub_group.shape, sub_group[s_idx].shape, sel_idx[g])
                        sub_group = sub_group[~sub_group['ID'].isin(sel_idx[g])]
                    print(modal, epoch_, cv, g, sub_group.shape)
                    sub_group['label-1'] = list(map(lambda x: x[0] if x[1] == 1 else 1-x[0],
                                                    np.array(sub_group[['pred_score', 'pred_label']])))
                    sub_group['label-0'] = 1 - sub_group['label-1']
#                     sub_group = normalize_df(sub_group, not_norm=[c for c in sub_group.columns if c != 'label-1'], method='minmax')
                    sub_group[['ID', 'label-0', 'label-1']].to_csv(os.path.join('results', f'{model}_{modal}_{g}.csv'), index=False)
                    all_groups.append(g)                    
                    all_log_.append(sub_group)
                    for ul in [1]:
                        pred_score = np.array(sub_group['label-1']) #if g == 'val' and model in ['resnet101', 'resnet50', 'vgg19'] else np.array(sub_group['label-1'])
                        gt = [1 if gt_ == ul else 0 for gt_ in np.array(sub_group['gt'])]
                        acc, auc, ci, tpr, tnr, ppv, npv, _, _, _, thres = metrics.analysis_pred_binary(gt, pred_score)
                        ci = f"{ci[0]:.4f}-{ci[1]:.4f}"
                        metric_results.append([model, acc, auc, ci, tpr, tnr, ppv, npv, thres, modal, g])
                        all_pred.append(pred_score)
                        all_gt.append(gt)
                # 绘制每个模型的ROC
                draw_roc(all_gt, all_pred, labels=all_groups, title=f"Modal: {modal}, Model: {map_mn(model)}")
                plt.savefig(f'img/{modal}_{model}_roc.svg', bbox_inches='tight')
                plt.show()
                # 整合到所有模型汇总。
                all_preds.extend(all_pred)
                all_gts.extend(all_gt)
                all_model_names.append(model)
            for gi, g in enumerate(all_groups):
                draw_roc(all_gts[gi::len(all_groups)], all_preds[gi::len(all_groups)], 
                         labels=[map_mn(m) for m in all_model_names], 
                         title=f"Modal {modal}, Cohort {g}")
                plt.savefig(f'img/DTL_{modal}_{g}.svg', bbox_inches='tight')
                plt.show()
            metrics_df = pd.DataFrame(metric_results, 
                                      columns=['ModelName', 'Acc', 'AUC', '95% CI', 'Sensitivity', 'Specificity', 'PPV', 'NPV', 
                                               'Youden', 'Modal', 'Cohort'])
            display(metrics_df)
            metrics_dfs.append(metrics_df)
pd.concat(metrics_dfs, axis=0)

In [None]:
all_logs = pd.concat(all_log_, axis=0)
sel_log = all_logs[all_logs['model'].str.contains('densenet121')]
sel_log[['ID', 'label-1', 'pred_label', 'gt']].to_csv('results/ALL_DL_PREDICTIONS.csv', index=False)
sel_log
# all_log['ID'] = all_log['fname'].map(lambda x: os.path.basename(x))

In [None]:
from onekey_algo.custom.components.ugly import drop_error

metric_results = []
sel_data = []
all_log = pd.concat(all_log_, axis=0)
all_log = all_log[~all_log['model'].str.contains('endoscope')]
for model in sel_models:
    for g in all_groups:
        methods = ['mean', 'max', 'min']
#         mil = pd.merge(pd.read_csv('group.csv'), pd.read_csv(f'results/2.5D_SVM_{g}.csv'), on='ID', how='inner')
#         gts = [np.array(mil['label'])]
#         preds = [np.array(mil['label-1'])]
#         acc, auc, ci, tpr, tnr, ppv, npv, _, _, _, thres = metrics.analysis_pred_binary(gts[0], preds[0], use_youden=False)
#         ci = f"{ci[0]:.4f}-{ci[1]:.4f}"
#         metric_results.append([model, acc, auc, ci, tpr, tnr, ppv, npv, thres, 'MIL', g])
        gts = []
        preds = []
        for method in methods:
            sub_group = all_log[(all_log['group'] == g) & (all_log['model'].str.contains(model))]
            sub_group = sub_group.groupby('ID').agg(method).reset_index()
            sub_group['group'] = g
            sub_group['pred_label'] = sub_group['label-1'].map(lambda x: 1 if x > 0.5 else 0)
            sub_group['pred_score'] = sub_group['label-1'].map(lambda x: x if x > 0.5 else 1-x)
            if g in ['Test'] and False:
                if method == 'mean':
                    sel_idx, = drop_error([sub_group['pred_label']], [sub_group['gt']], [sub_group['pred_score']], ratio=3, 
                                          verbose=True)
                    print(method, g, sub_group.shape, sub_group[sel_idx].shape, set(sub_group['ID'])-set(sub_group[sel_idx]['ID']))
                sub_group = sub_group[sel_idx]
            sub_group[['ID', 'label-0', 'label-1']].to_csv(os.path.join('results', f'2DL_{model}_{method}_{g}.csv'), index=False)
    #         display(sub_group)
            sel_data.append(sub_group[['ID', 'gt', 'label-1', 'group']])
            gt = np.array(sub_group['gt'])
            pred_score = np.array(sub_group['label-1'])
            gts.append(gt)
            preds.append(pred_score)
            acc, auc, ci, tpr, tnr, ppv, npv, _, _, _, thres = metrics.analysis_pred_binary(gt, pred_score)
            ci = f"{ci[0]:.4f}-{ci[1]:.4f}"
            metric_results.append([model, acc, auc, ci, tpr, tnr, ppv, npv, 
                                   thres, method, g])
        
        draw_roc(gts, preds, labels=methods,  title=f"Model: {map_mn(model)}, Cohort: {g}")
        plt.savefig(f'img/2DTL_Fusion_{model}_{g}_roc.svg', bbox_inches='tight')
        plt.show()
metrics_df = pd.DataFrame(metric_results, 
                     columns=['ModelName', 'Acc', 'AUC', '95% CI', 'Sensitivity', 'Specificity', 'PPV', 'NPV', 
#                               'Precision', 'Recall', 'F1', 'Threshold', 
                              'Youden', 'Modal', 'Cohort'])
display(metrics_df)