In [None]:
import pandas as pd
import shutil
import os
import numpy as np
import matplotlib.pyplot as plt
import onekey_algo.custom.components as okcomp
from onekey_algo import get_param_in_cwd

plt.rcParams['figure.dpi'] = 300
model_names = ['Clinic_Sig', 'Pathology', 'Nomogram']
# 获取配置
task = get_param_in_cwd('task_column') or 'label'
bst_model = get_param_in_cwd('sel_model') or 'LR'
group_info = get_param_in_cwd('dataset_column') or 'group'

# 读取label文件。
labels = [task]
label_data_ = pd.read_csv(get_param_in_cwd('survival_file'))
label_data_ = label_data_.dropna(axis=0)

ids = label_data_['ID']
print(label_data_.columns)
label_data = label_data_
group = pd.read_csv(get_param_in_cwd('label_file'))[['ID', group_info]]

label_data

In [None]:
import pandas as pd
from onekey_algo.custom.components.comp1 import normalize_df
from onekey_algo.custom.utils import print_join_info

subset = 'train'
Clinic_results = pd.read_csv(get_param_in_cwd('clinic_file'), header=0)
Path_results = pd.read_csv(f'./results/metastasis_sup_XGBoost_all.csv', header=0)
Path_results.columns = ['ID', '-', 'Metastasis']
# Path_results = normalize_df(Path_results, not_norm=['ID'], method='minmax')
ALL_results = pd.merge(pd.merge(Clinic_results, Path_results, on='ID', how='inner'), 
                                group[group[group_info] == subset], on='ID', how='inner')
ALL_results = pd.merge(ALL_results, label_data, on='ID', how='inner')
ALL_results = ALL_results.dropna(axis=1)
ALL_results

In [None]:
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index

event_col = 'event'
duration_col = 'duration'

cox_data = ALL_results[['Age', 'ki67', 'Metastasis', event_col, duration_col]]

cph = CoxPHFitter(penalizer=0.3)
cph.fit(cox_data[[c for c in cox_data.columns if c != 'ID']], duration_col=duration_col, event_col=event_col)
cph.print_summary()


c_index_list = [[cph.concordance_index_, 
                 concordance_index(cox_data[duration_col], -cox_data['ki67'], cox_data[event_col]),
                 concordance_index(cox_data[duration_col], -cox_data['Metastasis'], cox_data[event_col]),
                 'Train']]
pd.DataFrame(c_index_list, columns=['Nomogram-Cox', 'ki67', 'Metastasis', 'Cohort'])

In [None]:
import os
import numpy as np

def get_prediction(model: CoxPHFitter, data, ID=None, **kwargs):
    hr = model.predict_partial_hazard(data)
    expectation = model.predict_expectation(data)
    
    predictions = pd.concat([hr, expectation], axis=1)
    predictions.columns = ['HR', 'expectation']
    if ID is not None:
        predictions = pd.concat([ID, hr, expectation], axis=1)
        predictions.columns = ['ID', 'HR', 'expectation']
    else:
        predictions = pd.concat([hr, expectation], axis=1)
        predictions.columns = ['HR', 'expectation']
    return predictions
os.makedirs('results', exist_ok=True)

In [None]:
from lifelines import CoxPHFitter
from lifelines.statistics import logrank_test
from lifelines import KaplanMeierFitter
from lifelines.plotting import add_at_risk_counts

cox_data = ALL_results[['ID','Age', 'ki67', 'Metastasis', event_col, duration_col]]
test_results = get_prediction(cph, cox_data, ID=cox_data['ID'])
test_results.to_csv(f'results/{task}_sup_cox_predictions_{subset}.csv', index=False)

c_index = cph.score(cox_data[[c for c in cox_data.columns if c != 'ID']], scoring_method="concordance_index")
# y_pred = cph.predict_median(cox_data[[c for c in cox_data.columns if c != 'ID']])
# cox_data = pd.concat([cox_data, y_pred], axis=1)
# mean = cox_data.describe()['duration']['mean']
# cox_data['HR'] = cox_data[0.5] < mean

y_pred = cph.predict_partial_hazard(cox_data[[c for c in cox_data.columns if c != 'ID']])
cox_data = pd.concat([cox_data, y_pred], axis=1)
cox_data['HR'] = cox_data[0] > 1

dem = (cox_data["HR"] == True)
results = logrank_test(cox_data[duration_col][dem], cox_data[duration_col][~dem], 
                       event_observed_A=cox_data[event_col][dem], event_observed_B=cox_data[event_col][~dem])
p_value = results.p_value
plt.title(f"C-index:{c_index:.3f}, p_value=<0.0001")
if sum(dem):
    kmf_high = KaplanMeierFitter()
    kmf_high.fit(cox_data[duration_col][dem], event_observed=cox_data[event_col][dem], label="High Risk")
    kmf_high.plot_survival_function(color='r')
if sum(~dem):
    kmf_low = KaplanMeierFitter()
    kmf_low.fit(cox_data[duration_col][~dem], event_observed=cox_data[event_col][~dem], label="Low Risk")
    kmf_low.plot_survival_function(color='g')
add_at_risk_counts(kmf_high, kmf_low, rows_to_show=['At risk'])
plt.savefig(f'img/RFS_sup_KM_{subset}.svg', bbox_inches='tight')
plt.show()

In [None]:
from onekey_algo.custom.components import nomogram

cox_data.to_csv('results/nomo.csv', index=False)

nomogram.nomogram(cox_data, duration=duration_col, result=event_col, 
                  columns=['Age', 'ki67', 'Metastasis'],
                  survs=[12*1, 12*3, 12*5], surv_names=['1 year survival','3 years survival','5 years survival'], with_r=False,
                  width=6500, height=4000, save_name='RFS_sup_nomogram.png',
                  x_range='0.01,0.1, 0.25,0.45,0.5,0.75,0.8,0.9,0.99')

In [None]:
import pandas as pd
from onekey_algo.custom.components.comp1 import normalize_df
subset = 'test'
Clinic_results = pd.read_csv(get_param_in_cwd('clinic_file'), header=0)
Path_results = pd.read_csv(f'./results/metastasis_sup_XGBoost_all.csv', header=0)
Path_results.columns = ['ID', '-', 'Metastasis']
# Path_results = normalize_df(Path_results, not_norm=['ID'], method='minmax')
ALL_results = pd.merge(pd.merge(Clinic_results, Path_results, on='ID', how='inner'), 
                                group[group[group_info] == subset], on='ID', how='inner')
ALL_results = pd.merge(ALL_results, label_data, on='ID', how='inner')
ALL_results = ALL_results.dropna(axis=1)
ALL_results

In [None]:
from lifelines import CoxPHFitter
from lifelines.statistics import logrank_test
from lifelines import KaplanMeierFitter

cox_data = ALL_results[['ID', 'Age', 'ki67', 'Metastasis', event_col, duration_col]]
cph = CoxPHFitter(penalizer=0.3)
cph.fit(cox_data[[c for c in cox_data.columns if c != 'ID']], duration_col=duration_col, event_col=event_col)
test_results = get_prediction(cph, cox_data, ID=cox_data['ID'])
test_results.to_csv(f'results/{task}_sup_cox_predictions_{subset}.csv', index=False)

c_index = cph.score(cox_data[[c for c in cox_data.columns if c != 'ID']], scoring_method="concordance_index")
# y_pred = cph.predict_median(cox_data[[c for c in cox_data.columns if c != 'ID']])
# cox_data = pd.concat([cox_data, y_pred], axis=1)
# mean = cox_data.describe()['duration']['mean']
# cox_data['HR'] = cox_data[0.5] < mean

y_pred = cph.predict_partial_hazard(cox_data[[c for c in cox_data.columns if c != 'ID']])
cox_data = pd.concat([cox_data, y_pred], axis=1)
cox_data['HR'] = cox_data[0] > 1

dem = (cox_data["HR"] == True)
results = logrank_test(cox_data[duration_col][dem], cox_data[duration_col][~dem], 
                       event_observed_A=cox_data[event_col][dem], event_observed_B=cox_data[event_col][~dem])
p_value = results.p_value
plt.title(f"C-index:{c_index:.3f}, p_value={p_value:.6f}")
if sum(dem):
    kmf_high = KaplanMeierFitter()
    kmf_high.fit(cox_data[duration_col][dem], event_observed=cox_data[event_col][dem], label="High Risk")
    kmf_high.plot_survival_function(color='r')
if sum(~dem):
    kmf_low = KaplanMeierFitter()
    kmf_low.fit(cox_data[duration_col][~dem], event_observed=cox_data[event_col][~dem], label="Low Risk")
    kmf_low.plot_survival_function(color='g')
add_at_risk_counts(kmf_high, kmf_low, rows_to_show=['At risk'])
plt.savefig(f'img/RFS_sup_KM_{subset}.svg', bbox_inches='tight')
plt.show()

In [None]:
from lifelines.utils import concordance_index

c_index_list.append([c_index, 
                     concordance_index(cox_data[duration_col], -cox_data['ki67'], cox_data[event_col]),
                     concordance_index(cox_data[duration_col], -cox_data['Metastasis'], cox_data[event_col]),
                     'Test'])
pd.DataFrame(c_index_list, columns=['Nomogram-Cox', 'ki67', 'Metastasis', 'Cohort'])