In [None]:
import pandas as pd
import shutil
import os
import numpy as np
import matplotlib.pyplot as plt
import onekey_algo.custom.components as okcomp
from onekey_algo import get_param_in_cwd

plt.rcParams['figure.dpi'] = 300
model_names = ['Clinic_Sig', 'Pathology_Sig', 'Nomogram']
# 获取配置
task = 'label'
bst_model = 'LightGBM'
labelf = 'group.csv'
group_info = 'group'

# 读取label文件。
labels = [task]
label_data_ = pd.read_csv(labelf)
label_data_['ID'] = label_data_['ID'].astype(str)
label_data_ = label_data_[['ID', group_info, task]]
label_data_ = label_data_.dropna(axis=0)

ids = label_data_['ID']
print(label_data_.columns)
label_data = label_data_[['ID'] + labels]

label_data

# 训练集-汇总

In [None]:
import pandas as pd

subset = 'train'
Clinic_results = pd.merge(pd.read_csv(f'./results/Clinical_{bst_model}_{subset}.csv', header=0, dtype={'ID':str}), 
                          label_data, on='ID', how='inner')
Path_results = pd.merge(pd.read_csv(f'./results/Path_{bst_model}_{subset}.csv', header=0, dtype={'ID':str}), 
                        label_data, on='ID', how='inner')

ALL_results = pd.merge(Clinic_results, Path_results, on='ID', how='inner')
ALL_results.columns = ['ID', '-0', 'Clinic_Sig', task, '-00', 'Pathology_Sig', '-l']

ALL_results = ALL_results.dropna(axis=1)
ALL_results

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from onekey_algo.custom.components import metrics

model = LogisticRegression(random_state=0)
# model = SVC(probability=True, random_state=0)
data_x = ALL_results[['Clinic_Sig', 'Pathology_Sig']]
data_y = ALL_results[task]
model.fit(data_x, data_y)
results = model.predict_proba(data_x)
results = pd.DataFrame(results, index=ALL_results['ID'], columns=[f'{task}-0', f'{task}-1']).reset_index()
results.to_csv(f'./results/Nomo_{subset}.csv', index=False, header=True)
pd.DataFrame([metrics.analysis_pred_binary(ALL_results[task], results[f'{task}-1'])], 
                  columns=['acc', 'auc', '95%CI', 'Sensitivity', 'Specificity', 'PPV', 'NPV', 'Precision', 'Recall', 'F1', 'Threshold'])

In [None]:
pred_column = [f'{task}-0', f'{task}-1']
Nomo_results = pd.merge(pd.read_csv(f'./results/Nomo_{subset}.csv', header=0, dtype={'ID':str}), label_data, on='ID', how='inner')
gt = [np.array(d) for d in [Clinic_results[labels], 
                            Path_results[labels],
                            Nomo_results[labels]]]
pred_train = [np.array(d) for d in [Clinic_results[pred_column], 
                                    Path_results[pred_column], 
                                    Nomo_results[pred_column]]]
okcomp.comp1.draw_roc(gt, pred_train, labels=model_names, title=f"Model AUC")
plt.savefig(f'img/{subset}_auc.svg')

In [None]:
from onekey_algo.custom.components.metrics import analysis_pred_binary
metric = []
for mname, y, score in zip(model_names, gt, pred_train):
    # 计算验证集指标
    acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres = analysis_pred_binary(y, score)
    ci = f"{ci[0]:.4f} - {ci[1]:.4f}"
    metric.append((mname, acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres, f"Train"))
pd.DataFrame(metric, index=None, columns=['Signature', 'Accuracy', 'AUC', '95% CI', 'Sensitivity', 'Specificity', 
                                          'PPV', 'NPV', 'Precision', 'Recall', 'F1','Threshold', 'Cohort'])

In [None]:
from onekey_algo.custom.components.delong import delong_roc_test

Nomo_results.columns = ['ID', '-00000', 'Nomogram', '-llll']
ALL_results = pd.merge(ALL_results, Nomo_results, on='ID', how='inner')

delong = []

this_delong = []
delong_columns = []
for i, mni in enumerate(model_names):
    for _, mnj in enumerate(model_names[i+1:]):
        this_delong.append(delong_roc_test(ALL_results[task], ALL_results[mni], ALL_results[mnj])[0][0])
        delong_columns.append(f"{mni} Vs {mnj}")
this_delong.append('Train')
delong_columns.append('cohort')
delong.append(this_delong)
pd.DataFrame(this_delong, index=delong_columns).T

In [None]:
from onekey_algo.custom.components.comp1 import plot_DCA
plot_DCA([ALL_results[f'Clinic_Sig'], 
          ALL_results[f'Pathology_Sig'],
          ALL_results[f'Nomogram']], 
         ALL_results[task], title=f'Model for DCA', labels=model_names)
plt.savefig(f'img/{subset}_dca.svg')

In [None]:
from onekey_algo.custom.components.comp1 import draw_calibration
draw_calibration(pred_scores=pred_train, n_bins=5,# smooth=True,
                 y_test=gt, model_names=model_names)
plt.savefig(f'img/{subset}_cali.svg')

In [None]:
from onekey_algo.custom.components import stats

hosmer = []
hosmer.append([stats.hosmer_lemeshow_test(y_true, y_pred[:,1], bins=10) 
              for fn, y_true, y_pred in zip(model_names, gt, pred_train)])
pd.DataFrame(hosmer, columns=model_names)

# 绘制Nomogram

In [None]:
from onekey_algo.custom.components import nomogram
import shutil

ALL_results = ALL_results.round(decimals=2)
nomogram.risk_nomogram(ALL_results, result=task, columns=['Clinic_Sig', 'Pathology_Sig'], width=1200, height=400,
                      x_range='0.01,0.25,0.5,0.75,0.99')

# 验证集-汇总

In [None]:
import pandas as pd

subset = 'val'
Clinic_results = pd.merge(pd.read_csv(f'./results/Clinical_{bst_model}_{subset}.csv', header=0, dtype={'ID':str}), 
                          label_data, on='ID', how='inner')
Path_results = pd.merge(pd.read_csv(f'./results/Path_{bst_model}_{subset}.csv', header=0, dtype={'ID':str}), 
                        label_data, on='ID', how='inner')

ALL_results = pd.merge(Clinic_results, Path_results, on='ID', how='inner')
ALL_results.columns = ['ID', '-0', 'Clinic_Sig', task, '-00', 'Pathology_Sig', '-l']

ALL_results = ALL_results.dropna(axis=1)
ALL_results

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from onekey_algo.custom.components import metrics

# model = LogisticRegression(random_state=0)
# model = SVC(probability=True, random_state=0)
data_x = ALL_results[['Clinic_Sig', 'Pathology_Sig']]
data_y = ALL_results[task]
# model.fit(data_x, data_y)
results = model.predict_proba(data_x)
results = pd.DataFrame(results, index=ALL_results['ID'], columns=[f'{task}-0', f'{task}-1']).reset_index()
results.to_csv(f'./results/Nomo_{subset}.csv', index=False, header=True)
pd.DataFrame([metrics.analysis_pred_binary(ALL_results[task], results[f'{task}-1'])], 
                  columns=['acc', 'auc', '95%CI', 'Sensitivity', 'Specificity', 'PPV', 'NPV', 'Precision', 'Recall', 'F1', 'Threshold'])

In [None]:
pred_column = [f'{task}-0', f'{task}-1']
Nomo_results = pd.merge(pd.read_csv(f'./results/Nomo_{subset}.csv', header=0, dtype={'ID':str}), label_data, on='ID', how='inner')
gt = [np.array(d) for d in [Clinic_results[labels], 
                            Path_results[labels],
                            Nomo_results[labels]]]
pred_train = [np.array(d) for d in [Clinic_results[pred_column], 
                                    Path_results[pred_column], 
                                    Nomo_results[pred_column]]]
okcomp.comp1.draw_roc(gt, pred_train, labels=model_names, title=f"Model AUC")
plt.savefig(f'img/{subset}_auc.svg')

In [None]:
from onekey_algo.custom.components.metrics import analysis_pred_binary
for mname, y, score in zip(model_names, gt, pred_train):
    # 计算验证集指标
    acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres = analysis_pred_binary(y, score)
    ci = f"{ci[0]:.4f} - {ci[1]:.4f}"
    metric.append((mname, acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres, f"Val"))
pd.DataFrame(metric, index=None, columns=['Signature', 'Accuracy', 'AUC', '95% CI',
                                          'Sensitivity', 'Specificity', 'PPV', 'NPV', 'Precision', 'Recall', 'F1',
                                          'Threshold', 'Cohort'])

In [None]:
from onekey_algo.custom.components.delong import delong_roc_test

Nomo_results.columns = ['ID', '-00000', 'Nomogram', '-llll']
ALL_results = pd.merge(ALL_results, Nomo_results, on='ID', how='inner')

this_delong = []
delong_columns = []
for i, mni in enumerate(model_names):
    for _, mnj in enumerate(model_names[i+1:]):
        this_delong.append(delong_roc_test(ALL_results[task], ALL_results[mni], ALL_results[mnj])[0][0])
        delong_columns.append(f"{mni} Vs {mnj}")
this_delong.append('Val')
delong_columns.append('cohort')
delong.append(this_delong)
pd.DataFrame(delong, columns=delong_columns)

In [None]:
from onekey_algo.custom.components.comp1 import plot_DCA
plot_DCA([ALL_results[f'Clinic_Sig'], 
          ALL_results[f'Pathology_Sig'],
          ALL_results[f'Nomogram']], 
         ALL_results[task], title=f'Model for DCA', labels=model_names)
plt.savefig(f'img/{subset}_dca.svg')

In [None]:
from onekey_algo.custom.components.comp1 import draw_calibration
draw_calibration(pred_scores=pred_train, n_bins=5, #smooth=True, # window_length=7,
                 y_test=gt, model_names=model_names)
plt.savefig(f'img/{subset}_cali.svg')

In [None]:
from onekey_algo.custom.components import stats

hosmer.append([stats.hosmer_lemeshow_test(y_true, y_pred[:,1], bins=10) 
              for fn, y_true, y_pred in zip(model_names, gt, pred_train)])
pd.DataFrame(hosmer, columns=model_names)

# 测试集-汇总

In [None]:
import pandas as pd

subset = 'test'
Clinic_results = pd.merge(pd.read_csv(f'./results/Clinical_{bst_model}_{subset}.csv', header=0, dtype={'ID':str}), 
                          label_data, on='ID', how='inner')
Path_results = pd.merge(pd.read_csv(f'./results/Path_{bst_model}_{subset}.csv', header=0, dtype={'ID':str}), 
                        label_data, on='ID', how='inner')

ALL_results = pd.merge(Clinic_results, Path_results, on='ID', how='inner')
ALL_results.columns = ['ID', '-0', 'Clinic_Sig', task, '-00', 'Pathology_Sig', '-l']

ALL_results = ALL_results.dropna(axis=1)
ALL_results

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from onekey_algo.custom.components import metrics

# model = LogisticRegression(random_state=0)
# model = SVC(probability=True, random_state=0)
data_x = ALL_results[['Clinic_Sig', 'Pathology_Sig']]
data_y = ALL_results[task]
# model.fit(data_x, data_y)
results = model.predict_proba(data_x)
results = pd.DataFrame(results, index=ALL_results['ID'], columns=[f'{task}-0', f'{task}-1']).reset_index()
results.to_csv(f'./results/Nomo_{subset}.csv', index=False, header=True)
pd.DataFrame([metrics.analysis_pred_binary(ALL_results[task], results[f'{task}-1'])], 
                  columns=['acc', 'auc', '95%CI', 'Sensitivity', 'Specificity', 'PPV', 'NPV', 'Precision', 'Recall', 'F1', 'Threshold'])

In [None]:
pred_column = [f'{task}-0', f'{task}-1']
Nomo_results = pd.merge(pd.read_csv(f'./results/Nomo_{subset}.csv', header=0, dtype={'ID':str}), label_data, on='ID', how='inner')
gt = [np.array(d) for d in [Clinic_results[labels], 
                            Path_results[labels],
                            Nomo_results[labels]]]
pred_train = [np.array(d) for d in [Clinic_results[pred_column], 
                                    Path_results[pred_column], 
                                    Nomo_results[pred_column]]]
okcomp.comp1.draw_roc(gt, pred_train, labels=model_names, title=f"Model AUC")
plt.savefig(f'img/{subset}_auc.svg')

In [None]:
from onekey_algo.custom.components.metrics import analysis_pred_binary
for mname, y, score in zip(model_names, gt, pred_train):
    # 计算验证集指标
    acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres = analysis_pred_binary(y, score)
    ci = f"{ci[0]:.4f} - {ci[1]:.4f}"
    metric.append((mname, acc, auc, ci, tpr, tnr, ppv, npv, precision, recall, f1, thres, f"Test"))
pd.DataFrame(metric, index=None, columns=['Signature', 'Accuracy', 'AUC', '95% CI',
                                          'Sensitivity', 'Specificity', 'PPV', 'NPV', 'Precision', 'Recall', 'F1',
                                          'Threshold', 'Cohort'])

In [None]:
from onekey_algo.custom.components.delong import delong_roc_test

Nomo_results.columns = ['ID', '-00000', 'Nomogram', '-llll']
ALL_results = pd.merge(ALL_results, Nomo_results, on='ID', how='inner')

this_delong = []
delong_columns = []
for i, mni in enumerate(model_names):
    for _, mnj in enumerate(model_names[i+1:]):
        this_delong.append(delong_roc_test(ALL_results[task], ALL_results[mni], ALL_results[mnj])[0][0])
        delong_columns.append(f"{mni} Vs {mnj}")
this_delong.append('Test')
delong_columns.append('cohort')
delong.append(this_delong)
pd.DataFrame(delong, columns=delong_columns)

In [None]:
from onekey_algo.custom.components.comp1 import plot_DCA
plot_DCA([ALL_results[f'Clinic_Sig'], 
          ALL_results[f'Pathology_Sig'], 
          ALL_results[f'Nomogram']], 
         ALL_results[task], title=f'Model for DCA', labels=model_names)
plt.savefig(f'img/{subset}_dca.svg')

In [None]:
from onekey_algo.custom.components.comp1 import draw_calibration
draw_calibration(pred_scores=pred_train, n_bins=5, # smooth=True, window_length=9,
                 y_test=gt, model_names=model_names)
plt.savefig(f'img/{subset}_cali.svg')

In [None]:
from onekey_algo.custom.components import stats

hosmer.append([stats.hosmer_lemeshow_test(y_true, y_pred[:,1], bins=10) 
              for fn, y_true, y_pred in zip(model_names, gt, pred_train)])
pd.DataFrame(hosmer, columns=model_names)