# Cox生存分析

* `mydir`：自己的数据
* `ostime_column`: 数据对应的生存时间，不一定非的是OST，也可以是DST、FST等。
* `os`：生存状态，不一定非的是OS，也可以是DS、FS等。

In [1]:
from lifelines import CoxPHFitter
import pandas as pd
from onekey_algo.custom.components.comp1 import normalize_df
from sklearn.model_selection import train_test_split
from onekey_algo import get_param_in_cwd
from onekey_algo.custom.components.comp1 import fillna

survival_type = 'OS'
event_col = survival_type
duration_col = f"{survival_type}TIME"

def get_prediction(mn):
    prediction = pd.concat([pd.read_csv(f'results/{mn}_cox_predictions_{subset}.csv') for subset in get_param_in_cwd('subsets')],
                           axis=0)
    prediction.columns = ['ID', f'{mn}_Exp', mn]
    return prediction[['ID', f'{mn}']]

data = None
for mn in get_param_in_cwd('compare_models'):
    pred = get_prediction(mn)
    pred['ID'] = pred['ID'].astype(str)
    if data is None:
        data = pred
    else:
        data = pd.merge(data, pred)

# data = normalize_df(data, not_norm=['ID', 'group'])
label_data = pd.read_csv(get_param_in_cwd('survival_file'), dtype={'ID':str})
group = pd.read_csv('group.csv')[['ID', 'group']]
# label_data = fillna(label_data, fill_mod='50%')
data = pd.merge(data, label_data, on='ID', how='inner')
data = pd.merge(data, group, on='ID', how='inner')
data.to_csv('results/joinit_info.csv', index=False)
data

Unnamed: 0,ID,Clinical-OS,MIL-OS,Combined-OS,Clinical-DFS,MIL-DFS,Combined-DFS,OSTIME,OS,DFSTIME,DFS,group
0,primary-1.nii.gz,62.596,18.307,18.688,59.652,24.457,15.011,20,1,20,1,train
1,primary-3.nii.gz,54.674,56.775,53.924,41.490,54.264,46.073,53,0,53,0,train
2,primary-4.nii.gz,62.596,64.168,65.318,59.652,58.958,63.271,53,0,53,0,train
3,primary-10.nii.gz,62.596,62.386,64.787,59.652,51.879,57.731,48,0,48,0,train
4,primary-12.nii.gz,58.096,58.745,59.356,59.652,58.741,63.138,43,1,29,1,train
...,...,...,...,...,...,...,...,...,...,...,...,...
453,secondary2-66.nii.gz,62.596,62.639,64.866,59.652,62.513,65.167,47,0,47,0,test
454,secondary2-67.nii.gz,62.596,60.913,64.294,59.652,58.236,62.822,56,0,56,0,test
455,secondary2-69.nii.gz,59.332,61.906,62.125,50.999,63.391,62.429,40,0,40,0,test
456,secondary2-71.nii.gz,54.674,62.673,58.911,41.490,57.955,51.452,39,0,39,0,test


In [2]:
from lifelines import CoxPHFitter
from lifelines.statistics import logrank_test
from lifelines import KaplanMeierFitter
from lifelines.plotting import add_at_risk_counts
from lifelines.utils import concordance_index
import numpy as np
from onekey_algo.custom.components.metrics import calc_value_95ci

metrics = []
for mn in get_param_in_cwd('compare_models'):
    task = mn.split('-')[-1]
    metric = []
    for subset in get_param_in_cwd('subsets'):
        subdata = data[data['group'] == subset]
        cindex = concordance_index(subdata[f"{task}TIME"], subdata[mn], subdata[task])
        ma, mi = calc_value_95ci(cindex, sample_num=subdata.shape[0])
        metric.append(f"{cindex:.3f}({ma:.3f}-{mi:.3f})")
    metrics.append(metric)
metrics = pd.DataFrame(np.array(metrics).T, columns=get_param_in_cwd('compare_models'))
metrics['Cohort'] = get_param_in_cwd('subsets')
metrics

Unnamed: 0,Clinical-OS,MIL-OS,Combined-OS,Clinical-DFS,MIL-DFS,Combined-DFS,Cohort
0,0.666(0.610-0.723),0.757(0.705-0.808),0.819(0.773-0.865),0.627(0.570-0.685),0.735(0.682-0.788),0.769(0.719-0.819),train
1,0.772(0.696-0.849),0.754(0.676-0.833),0.822(0.752-0.892),0.624(0.535-0.712),0.732(0.651-0.813),0.747(0.668-0.827),val
2,0.756(0.658-0.853),0.741(0.642-0.840),0.759(0.663-0.856),0.637(0.528-0.745),0.728(0.627-0.829),0.721(0.620-0.823),test
