In [None]:
import pandas as pd
import shutil
import os
import numpy as np
import matplotlib.pyplot as plt
import onekey_algo.custom.components as okcomp
from onekey_algo import get_param_in_cwd

plt.rcParams['figure.dpi'] = 300
model_names = ['2D DCNN', '2.5D DCNN']
# 获取配置
task = get_param_in_cwd('task_column') or 'label1'
bst_model = get_param_in_cwd('sel_model') or 'LR'
labelf = get_param_in_cwd('label_file') or os.path.join(mydir, 'label.csv')
group_info = get_param_in_cwd('dataset_column') or 'group'

# 读取label文件。
labels = task
label_data_ = pd.read_csv(labelf)
label_data_ = label_data_[['ID', group_info] + task]
label_data_ = label_data_.dropna(axis=0)

ids = label_data_['ID']
print(label_data_.columns)
label_data = label_data_[['ID'] + labels]
label_data

# 训练集-Nomogram

In [None]:
import pandas as pd
from onekey_algo.custom.components.comp1 import normalize_df
from onekey_algo.custom.components.comp1 import plot_DCA
from onekey_algo.custom.components.comp1 import draw_calibration
from onekey_algo.custom.components.delong import delong_roc_test
from onekey_algo.custom.components.metrics import NRI, IDI
from onekey_algo.custom.components.metrics import analysis_pred_binary
from onekey_algo.custom.components import stats

subset = 'train'
info = []
metric = []
for subset in ['train', 'val', 'test']:
    for label in labels:
        DL25D_results = pd.merge(pd.read_csv(f'./results/DL2.5D_resnet101_{label}_{subset}.csv', header=0), 
                                 label_data, on='ID', how='inner')
        DL2D_results = pd.merge(pd.read_csv(f'./results/DL2D_resnet101_{label}_{subset}.csv', header=0), 
                                label_data, on='ID', how='inner')

        ALL_results = pd.merge(DL2D_results, DL25D_results, on='ID', how='inner')
        ALL_results.columns = ['ID', '-0', model_names[0], task[0], task[1], task[2], 
                               '-00', model_names[1], '-l', '-ll', '-lll']
        ALL_results = ALL_results.dropna(axis=1)
        pred_column = [f'label-0', f'label-1']
        gt = [np.array(d) for d in [DL2D_results[label], DL25D_results[label]]]
        pred_train = [np.array(d) for d in [DL2D_results[pred_column], DL25D_results[pred_column]]]


        youden = {}
        for mname, y, score in zip(model_names, gt, pred_train):
            # 计算验证集指标
            acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres = analysis_pred_binary(y, score)
            ci = f"{ci[0]:.4f} - {ci[1]:.4f}"
            metric.append((mname, acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres, f"{subset}-{label}"))
            youden[mname] = thres
    #     pd.DataFrame(metric, index=None, columns=['Signature', 'Accuracy', 'AUC', '95% CI', 'Sensitivity', 'Specificity', 
    #                                               'PPV', 'NPV', 'Precision', 'Recall', 'F1','Threshold', 'Cohort'])

        # 绘制ROC
        okcomp.comp1.draw_roc(gt, pred_train, labels=model_names, title=f"Model AUC")
        plt.savefig(f'img/{subset}_{label}_auc.svg')
        plt.show()

        # 绘制DCA
        plot_DCA([ALL_results[model_name] for model_name in model_names], 
                 ALL_results[label], title=f'Model for DCA', labels=model_names, y_min=-0.15)
        plt.savefig(f'img/{subset}_{label}_dca.svg')
        plt.show()

        # 绘制校准曲线
        draw_calibration(pred_scores=pred_train, n_bins=5, remap=True,
                         y_test=gt, model_names=model_names)
        plt.savefig(f'img/{subset}_{label}_cali.svg')
        plt.show()

        # 计算HL test
        hl_test_2d = stats.hosmer_lemeshow_test(ALL_results[label], ALL_results[model_names[0]], bins=50 if subset=='train' else 50,
                                            remap=True)
        hl_test_25d = stats.hosmer_lemeshow_test(ALL_results[label], ALL_results[model_names[1]], bins=50 if subset=='train' else 50,
                                            remap=True)
        
        # 计算delong，nri，idi
        d = delong_roc_test(ALL_results[label], ALL_results[model_names[0]], ALL_results[model_names[1]])[0][0]
        n = -NRI(ALL_results[model_names[0]] > youden[model_names[0]], 
                ALL_results[model_names[1]] > youden[model_names[1]], ALL_results[label])
        i = -IDI(ALL_results[model_names[0]], ALL_results[model_names[1]], ALL_results[label], with_p=False)
        info.append(['2.5D vs 2D', d, n, i, hl_test_2d, hl_test_25d, f"{subset}-{label}"])

In [None]:
pd.DataFrame(metric, index=None, columns=['Signature', 'Accuracy', 'AUC', '95% CI', 'Sensitivity', 'Specificity', 
                                          'PPV', 'NPV', 'Precision', 'Recall', 'F1','Threshold', 'Cohort'])

In [None]:
info = pd.DataFrame(info, columns=['Signature', 'Delong', 'NRI', 'IDI', 'HL2D', 'HL25D', 'Cohort'])
info

In [None]:
hl = info[['HL2D', 'HL25D']].T
hl.columns = info['Cohort']
hl