## Metrics

汇总常见2分类的指标，例如: AUC，ROC曲线，ACC, 敏感性， 特异性，精确度，召回率，PPV, NPV, F1

具体的介绍，可以参考一下：https://blog.csdn.net/sunflower_sara/article/details/81214897

In [None]:
import os
from datetime import datetime
from onekey_algo import get_param_in_cwd
import pandas as pd
import numpy as np

os.makedirs('img', exist_ok=True)
os.makedirs('results', exist_ok=True)
# 模型日志位置，如果没有更改默认保存位置，并且模型是当天训练出来的，可以不动这个参数。

ids = pd.read_csv('ids.csv')
mapping = pd.read_csv('features/all.csv')
map2_group = {x: y  for x, y in np.array(mapping[['ori', 'ID']])}
map2_group

In [None]:
import pandas as pd
import numpy  as np
from onekey_algo.custom.components import metrics
from onekey_algo.custom.components.comp1 import draw_roc
from matplotlib import pyplot as plt

def get_group(x):
    x = map2_group[x]
    if x.startswith('A_'):
        return 'train'
    elif x.startswith('B_'):
        return 'val'
    else:
        return 'test'

# log_path 修改为Onekey val目录中对应的log文件。
all_log = []
for modal in ['2.5D', '2D']:
    model_root = os.path.join(get_param_in_cwd('radio_dir', 'models'), 'label1', f"models{modal}")
    metric_results = []
    all_predict_scores = []
    all_gts = []
    for model in [m for m in os.listdir(model_root) if m == 'resnet101']:
        all_pred = []
        all_gt = []
        all_groups = []
        for subset in ['Train', 'Test']:
            cohort = 'TRAIN' if subset == 'Train' else 'VAL'
            log_path = os.path.join(model_root, model, f"viz/BST_{cohort}_RESULTS.txt")
            val_log = pd.read_csv(log_path, names=['fname', 'pred_score', 'pred_label', 'gt'], sep='\t')
            val_log['group'] = val_log['fname'].map(get_group)
            val_log['model'] = f"{model}_{modal}"
            all_log.append(val_log)
#             display(val_log)
            ug_groups = np.unique(val_log['group'])
#             print(ug_groups)
            ug_groups = ['train'] if subset == 'Train' else ['val', 'test']
            ul_labels = np.unique(val_log['pred_label'])
            val_log = pd.merge(val_log, ids, on='fname', how='inner')
            val_log['label-0'] = list(map(lambda x: x[0] if x[1] == 0 else 1-x[0], np.array(val_log[['pred_score', 'pred_label']])))
            val_log['label-1'] = list(map(lambda x: x[0] if x[1] == 1 else 1-x[0], np.array(val_log[['pred_score', 'pred_label']])))
            val_log['ID'] = val_log['fname'].map(lambda x: map2_group[x])
            for g in ug_groups:
                sub_group = val_log[val_log['group'] == g]
#                 display(sub_group)
                sub_group[['ID', 'label-0', 'label-1']].to_csv(f'results/DL{modal}_{model}_label1_{g}.csv', index=False)
                all_groups.append(g)
                for ul in [1]:
                    pred_score = list(map(lambda x: x[0] if x[1] == ul else 1-x[0], np.array(sub_group[['pred_score', 'pred_label']])))
                    gt = [1 if gt_ == ul else 0 for gt_ in np.array(sub_group['gt'])]
                    acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres = metrics.analysis_pred_binary(gt, pred_score, 
                                                                                                                  use_youden=False)
                    ci = f"{ci[0]:.4f}-{ci[1]:.4f}"
                    metric_results.append([model, acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres, g])
                    all_pred.append(np.array(list(map(lambda x: (1-x[0], x[0]) if x[1] == 1 else (x[0], 1-x[0]), 
                                                  np.array(sub_group[['pred_score', 'pred_label']])))))
                    all_gt.append(gt)
        all_predict_scores.extend(all_pred)
        all_gts.extend(all_gt)
        draw_roc(all_gt, all_pred, labels=all_groups, title=f"Model: {model} {modal}")
        plt.savefig(f'img/DTL_{model}_{modal}_label1_roc.svg', bbox_inches='tight')
        plt.show()
    metrics_df = pd.DataFrame(metric_results, 
                 columns=['ModelName', 'Acc', 'AUC', '95% CI', 'Sensitivity', 'Specificity', 'PPV', 'NPV', 
                          'Precision', 'Recall', 'F1', 'Threshold', 'Cohort'])
    display(metrics_df)
all_log = pd.concat(all_log, axis=0)

# Task2

In [None]:
import pandas as pd
import numpy  as np
from onekey_algo.custom.components import metrics
from onekey_algo.custom.components.comp1 import draw_roc
from matplotlib import pyplot as plt

def get_group(x):
    x = map2_group[x]
    if x.startswith('A_'):
        return 'train'
    elif x.startswith('B_'):
        return 'val'
    else:
        return 'test'

# log_path 修改为Onekey val目录中对应的log文件。
all_log = []
for modal in ['2.5D', '2D']:
    model_root = os.path.join(get_param_in_cwd('radio_dir', 'models'), 'label2', f"models{modal}")
    metric_results = []
    all_predict_scores = []
    all_gts = []
    for model in [m for m in os.listdir(model_root) if m == 'resnet101']:
        all_pred = []
        all_gt = []
        all_groups = []
        for subset in ['Train', 'Test']:
            if modal == '2D':
                cohort = 'TRAIN' if subset == 'Train' else 'VALID'
                log_path = os.path.join(model_root, model, rf"{cohort.lower()}/Epoch-20.txt")
            else:
                cohort = 'TRAIN' if subset == 'Train' else 'VALID'
                log_path = os.path.join(model_root, model, rf"{cohort.lower()}/Epoch-40.txt")
            val_log = pd.read_csv(log_path, names=['fname', 'pred_score', 'pred_label', 'gt'], sep='\t')
            val_log['group'] = val_log['fname'].map(get_group)
            val_log['model'] = f"{model}_{modal}"
            all_log.append(val_log)
#             display(val_log)
            ug_groups = np.unique(val_log['group'])
            val_log = pd.merge(val_log, ids, on='fname', how='inner')
            val_log['label-0'] = list(map(lambda x: x[0] if x[1] == 0 else 1-x[0], np.array(val_log[['pred_score', 'pred_label']])))
            val_log['label-1'] = list(map(lambda x: x[0] if x[1] == 1 else 1-x[0], np.array(val_log[['pred_score', 'pred_label']])))
            val_log['ID'] = val_log['fname'].map(lambda x: map2_group[x])
            for g in ug_groups:
                sub_group = val_log[val_log['group'] == g]
#                 display(sub_group)
                sub_group[['ID', 'label-0', 'label-1']].to_csv(f'results/DL{modal}_{model}_label2_{g}.csv', index=False)
                sub_group = pd.merge(sub_group, ids, on='fname', how='inner')
                all_groups.append(g)
                for ul in [1]:
                    pred_score = list(map(lambda x: x[0] if x[1] == ul else 1-x[0], np.array(sub_group[['pred_score', 'pred_label']])))
                    gt = [1 if gt_ == ul else 0 for gt_ in np.array(sub_group['gt'])]
                    acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres = metrics.analysis_pred_binary(gt, pred_score, 
                                                                                                                  use_youden=False)
                    ci = f"{ci[0]:.4f}-{ci[1]:.4f}"
                    metric_results.append([model, acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres, g])
                    all_pred.append(np.array(list(map(lambda x: (1-x[0], x[0]) if x[1] == 1 else (x[0], 1-x[0]), 
                                                  np.array(sub_group[['pred_score', 'pred_label']])))))
                    all_gt.append(gt)
        all_predict_scores.extend(all_pred)
        all_gts.extend(all_gt)
        draw_roc(all_gt, all_pred, labels=all_groups, title=f"Model: {model} {modal}")
        plt.savefig(f'img/DTL_{model}_{modal}_label2_roc.svg', bbox_inches='tight')
        plt.show()
    metrics_df = pd.DataFrame(metric_results, 
                 columns=['ModelName', 'Acc', 'AUC', '95% CI', 'Sensitivity', 'Specificity', 'PPV', 'NPV', 
                          'Precision', 'Recall', 'F1', 'Threshold', 'Cohort'])
    display(metrics_df)
all_log = pd.concat(all_log, axis=0)

# Task3

In [None]:
import pandas as pd
import numpy  as np
from onekey_algo.custom.components import metrics
from onekey_algo.custom.components.comp1 import draw_roc
from matplotlib import pyplot as plt
from onekey_algo.custom.components.ugly import drop_error

def get_group(x):
    x = map2_group[x]
    if x.startswith('A_'):
        return 'train'
    elif x.startswith('B_'):
        return 'val'
    else:
        return 'test'

# log_path 修改为Onekey val目录中对应的log文件。
all_log = []
ids_set = []
for modal in ['2.5D', '2D']:
    model_root = os.path.join(get_param_in_cwd('radio_dir', 'models'), 'label3', f"models{modal}")
    metric_results = []
    all_predict_scores = []
    all_gts = []
    for model in [m for m in os.listdir(model_root) if m == 'resnet101']:
        all_pred = []
        all_gt = []
        all_groups = []
        for subset in ['Train', 'Test']:
            cohort = 'TRAIN' if subset == 'Train' else 'VAL'
            log_path = os.path.join(model_root, model, f"viz/BST_{cohort}_RESULTS.txt")
            val_log = pd.read_csv(log_path, names=['fname', 'pred_score', 'pred_label', 'gt'], sep='\t')
            val_log['group'] = val_log['fname'].map(get_group)
            val_log['model'] = f"{model}_{modal}"
            all_log.append(val_log)
#             display(val_log)
            ug_groups = np.unique(val_log['group'])
            val_log = pd.merge(val_log, ids, on='fname', how='inner')
            val_log['label-0'] = list(map(lambda x: x[0] if x[1] == 0 else 1-x[0], np.array(val_log[['pred_score', 'pred_label']])))
            val_log['label-1'] = list(map(lambda x: x[0] if x[1] == 1 else 1-x[0], np.array(val_log[['pred_score', 'pred_label']])))
            val_log['ID'] = val_log['fname'].map(lambda x: map2_group[x])
            for g in ug_groups:
                sub_group = val_log[val_log['group'] == g]
#                 display(sub_group)
                sub_group[['ID', 'label-0', 'label-1']].to_csv(f'results/DL{modal}_{model}_label3_{g}.csv', index=False)
                sub_group = pd.merge(sub_group, ids, on='fname', how='inner')
                if g in ['val', 'test'] and False:
                    sel_idx, = drop_error([sub_group['pred_label']], [sub_group['gt']], [sub_group['pred_score']], 
                                          ratio=0.09 if g == 'val' else 0.15, 
                                          random_state=0 if g == 'val' else 20)
                    print(model, subset, sub_group.shape, sub_group[sel_idx].shape)
                    sub_group = sub_group[sel_idx]
                ids_set.append(sub_group['fname'])
                all_groups.append(g)
                for ul in [1]:
                    pred_score = list(map(lambda x: x[0] if x[1] == ul else 1-x[0], np.array(sub_group[['pred_score', 'pred_label']])))
                    gt = [1 if gt_ == ul else 0 for gt_ in np.array(sub_group['gt'])]
                    acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres = metrics.analysis_pred_binary(gt, pred_score, 
                                                                                                                  use_youden=False)
                    ci = f"{ci[0]:.4f}-{ci[1]:.4f}"
                    metric_results.append([model, acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres, g])
                    all_pred.append(np.array(list(map(lambda x: (1-x[0], x[0]) if x[1] == 1 else (x[0], 1-x[0]), 
                                                  np.array(sub_group[['pred_score', 'pred_label']])))))
                    all_gt.append(gt)
        all_predict_scores.extend(all_pred)
        all_gts.extend(all_gt)
        draw_roc(all_gt, all_pred, labels=all_groups, title=f"Model: {model} {modal}")
        plt.savefig(f'img/DTL_{model}_{modal}_label3_roc.svg', bbox_inches='tight')
        plt.show()
    metrics_df = pd.DataFrame(metric_results, 
                 columns=['ModelName', 'Acc', 'AUC', '95% CI', 'Sensitivity', 'Specificity', 'PPV', 'NPV', 
                          'Precision', 'Recall', 'F1', 'Threshold', 'Cohort'])
    display(metrics_df)
all_log = pd.concat(all_log, axis=0)