In [1]:
import pandas as pd
from lifelines.datasets import load_rossi
from lifelines import CoxPHFitter

In [2]:
discrete_features = ['Sex','Smoking history','ECOG score','Histologic type','Classification of Immunotherapy Drugs',
                    'Therapy line','BMI category','Hemoglobin levels','Albumin levels','CRP levels',
                    'LIPI','Stage at diagnosis','COPD','Bone metastasis','Brain  metastasis','Liver metastasis',
                    'Pleural effusion','Pericardial effusion']
continue_feautres = ['Age','Height','Weight','BMI','Hemoglobin','Albumin','NLR','LMR','PLR','ALI','SII']

## Cox analysis for PFS

In [23]:
# merge data
df_RF = pd.read_table('./results/radscore/Label_0_enhancement_radscore_RF.tsv') 
df_RF = df_RF.loc[:, ['patient_id', 'label_label_0', 'Rad_score']]  
df_cli = pd.read_excel('./data/immunotherapy_112_V3.xlsx')
df_merge1 = pd.merge(left=df_cli, right=df_RF, left_on='Rad_id', right_on='patient_id') 
df_merge1 = df_merge1.rename(columns={'Rad_score': 'Rad_score_enhancement'})

df_RF = pd.read_table('./results/radscore/Label_0_pulmonary_radscore_RF.tsv') 
df_RF = df_RF.loc[:, ['patient_id', 'label_label_0', 'Rad_score']]  
df_merge = pd.merge(left=df_cli, right=df_RF, left_on='Rad_id', right_on='patient_id')  
df_merge = df_merge.drop(columns='patient_id_x')  
df_merge = df_merge.drop(columns='patient_id_y') 
df_merge = df_merge.drop(columns='label_label_0')
cols = [i.split('(')[0] for i in df_merge.columns.to_list()] 
cols = [i.split('（')[0] for i in cols]
cols = [i.strip() for i in cols]
df_merge.columns = cols
df_merge = df_merge.rename(columns={'Rad_score': 'Rad_score_pulmonary'})
df_merge.loc[:,"Rad_score_enhancement"] = df_merge1.loc[:, 'Rad_score_enhancement']

# scale feature values
df_merge = df_merge.drop(columns='Rad_id')  
df_merge = df_merge.dropna(axis=1,how='any')
for col in continue_feautres:
    df_merge[col] = (df_merge[col] - df_merge[col].min()) / (df_merge[col].max() - df_merge[col].min())

# df_merge.to_csv('merge_data_PFS.tsv', index=True, sep='\t')

# single cox analysis
df_single_cox = pd.DataFrame(columns=['var', 'single_cox_pvalue'])
select_var = []
for var in df_merge.columns.to_list()[6:]:
    cph = CoxPHFitter() 
    df_tmp = df_merge.loc[:, ['PFS', 'PFS_status', var]]
    try:
        cph.fit(df_tmp, duration_col='PFS', event_col='PFS_status') 
        df_result = cph.summary
        pvalue = df_result.p.values
        if pvalue < 0.05:
            select_var.append(var)
        df_single_cox = df_single_cox.append({'var': var, 'single_cox_pvalue':pvalue[0]}, ignore_index=True)
    except:
        pass    
print('sigle COX result： {}'.format(len(select_var)))
print('select var by single cox：', select_var)
df_single_cox.to_csv('single_cox_PFS.tsv', index=False, sep='\t')

# delete enhancement
select_var = ['Histologic type', 'PLR', 'Stage at diagnosis', 'Bone metastasis', 'Pericardial effusion', 'Rad_score_pulmonary']
cph = CoxPHFitter() 
df_tmp = df_merge.loc[:, ['PFS', 'PFS_status'] + select_var]
cph.fit(df_tmp, duration_col='PFS', event_col='PFS_status')
df_result = cph.summary
df_result = df_result.sort_values('p')
# df_result = df_result[df_result.p < 0.05]
df_result.columns = [i.replace(' ', '_') for i in df_result.columns]
idx = [i.replace('-', '.') for i in df_result.index.to_list()]
df_result.index = idx

df_result = df_result.loc[:, ['exp(coef)', 'exp(coef)_lower_95%', 'exp(coef)_upper_95%', 'p']]
df_result.columns = ['HR', 'HR_lower_95%', 'HR_high_95%', 'pvalue']

df_result.to_csv('./results/cox/multi_var_cox_PFS.tsv', index=True, sep='\t')
df_merge.to_csv('./results/merge/merge_data_PFS.tsv', index=True, sep='\t')
df_result

单变量COX分析筛选到的变量总数为： 7
单变量COX分析筛选的特征为： ['Histologic type', 'PLR', 'Stage at diagnosis', 'Bone metastasis', 'Pericardial effusion', 'Rad_score_pulmonary', 'Rad_score_enhancement']


Unnamed: 0,HR,HR_lower_95%,HR_high_95%,pvalue
Rad_score_pulmonary,10.943319,3.504911,34.168121,3.8e-05
PLR,5.683612,1.680142,19.226616,0.005199
Bone metastasis,1.399506,1.0567,1.853523,0.019043
Pericardial effusion,1.514607,0.945747,2.425631,0.084021
Histologic type,1.163061,0.734133,1.842597,0.519934
Stage at diagnosis,1.019823,0.793069,1.311411,0.878406


## Cox analysis for OS

In [24]:
# merge data
df_RF = pd.read_table('./results/radscore/Label_1_enhancement_radscore_RF.tsv')
df_RF = df_RF.loc[:, ['patient_id', 'label_label_1', 'Rad_score']]  
df_cli = pd.read_excel('./data/immunotherapy_112_V3.xlsx')
df_merge1 = pd.merge(left=df_cli, right=df_RF, left_on='Rad_id', right_on='patient_id')
df_merge1 = df_merge1.rename(columns={'Rad_score': 'Rad_score_enhancement'})

df_RF = pd.read_table('./results/radscore/Label_1_pulmonary_radscore_RF.tsv')
df_RF = df_RF.loc[:, ['patient_id', 'label_label_1', 'Rad_score']]  
df_merge = pd.merge(left=df_cli, right=df_RF, left_on='Rad_id', right_on='patient_id')  
df_merge = df_merge.drop(columns='patient_id_x')   
df_merge = df_merge.drop(columns='patient_id_y')   
df_merge = df_merge.drop(columns='label_label_1')  
cols = [i.split('(')[0] for i in df_merge.columns.to_list()]  
cols = [i.split('（')[0] for i in cols]
cols = [i.strip() for i in cols]
df_merge.columns = cols
df_merge = df_merge.rename(columns={'Rad_score': 'Rad_score_pulmonary'})
df_merge.loc[:,"Rad_score_enhancement"] = df_merge1.loc[:, 'Rad_score_enhancement']


# scale feature values
df_merge = df_merge.drop(columns='Rad_id')  
df_merge = df_merge.dropna(axis=1,how='any')   
for col in continue_feautres:
    df_merge[col] = (df_merge[col] - df_merge[col].min()) / (df_merge[col].max() - df_merge[col].min())


# single cox analysis
select_var = []
df_single_cox = pd.DataFrame(columns=['var', 'single_cox_pvalue'])
for var in df_merge.columns.to_list()[6:]:
    cph = CoxPHFitter()  
    df_tmp = df_merge.loc[:, ['OS', 'OS_status', var]]
    try:
        cph.fit(df_tmp, duration_col='OS', event_col='OS_status')  
        df_result = cph.summary
        pvalue = df_result.p.values
        if pvalue < 0.05:
            select_var.append(var)
        df_single_cox = df_single_cox.append({'var': var, 'single_cox_pvalue':pvalue[0]}, ignore_index=True)
    except:
        pass    
print('sigle COX result： {}'.format(len(select_var)))
print('select var by single cox：', select_var)
df_single_cox.to_csv('single_cox_OS.tsv', index=False, sep='\t')

# delelte enhancement
select_var = ['Histologic type', 'BMI', 'LIPI', 'NLR', 'PLR', 'SII', 'Liver metastasis', 'Rad_score_pulmonary']
# mult-cox analysis
cph = CoxPHFitter()  
df_tmp = df_merge.loc[:, ['OS', 'OS_status'] + select_var]
cph.fit(df_tmp, duration_col='OS', event_col='OS_status')  
df_result = cph.summary
df_result = df_result.sort_values('p')
# df_result = df_result[df_result.p < 0.05]
df_result.columns = [i.replace(' ', '_') for i in df_result.columns]
idx = [i.replace('-', '.') for i in df_result.index.to_list()]
df_result.index = idx

df_result = df_result.loc[:, ['exp(coef)', 'exp(coef)_lower_95%', 'exp(coef)_upper_95%', 'p']]
df_result.columns = ['HR', 'HR_lower_95%', 'HR_high_95%', 'pvalue']

df_result.to_csv('./results/cox/multi_var_cox_OS.tsv', index=True, sep='\t')
df_merge.to_csv('./results/merge/merge_data_OS.tsv', index=True, sep='\t')
df_result


单变量COX分析筛选到的变量总数为： 9
单变量COX分析筛选的特征为： ['Histologic type', 'BMI', 'LIPI', 'NLR', 'PLR', 'SII', 'Liver metastasis', 'Rad_score_pulmonary', 'Rad_score_enhancement']


Unnamed: 0,HR,HR_lower_95%,HR_high_95%,pvalue
Rad_score_pulmonary,58.80574,18.481959,187.107604,5.232094e-12
Liver metastasis,1.78132,1.260623,2.517089,0.001064561
Histologic type,1.556616,0.9879,2.452733,0.05645768
PLR,5.039422,0.255499,99.396624,0.287759
BMI,0.624148,0.183923,2.118062,0.4495851
LIPI,0.925228,0.620249,1.380168,0.7032991
NLR,1.650557,0.103036,26.440525,0.7232731
SII,0.867583,0.063106,11.927519,0.9154049
