In [2]:
import pandas as pd
import numpy as np
from lifelines import CoxPHFitter

In [3]:
file_path = '附件_降维_XY.xlsx'  
df = pd.read_excel(file_path)

In [4]:
df['time'] = df['检测孕周_天数']
df['event'] = (df['Y染色体浓度'] >= 0.04).astype(int)

covars = ['孕妇BMI', 'Y染色体的Z值', '检测抽血次数', 'GC含量', '在参考基因组上比对的比例', 
               'IVF妊娠_编码', '唯一比对的读段数  ', '重复读段的比例', '生产次数', 'T13', 'T18', 'T21',  
               '13号染色体的Z值', '18号染色体的Z值', '21号染色体的Z值', ]  

covars2 = ['原始读段数', '检测孕周_天数','X染色体浓度', 'X染色体的Z值','13号染色体的GC含量', '18号染色体的GC含量','21号染色体的GC含量', 
           '年龄', '身高', '体重', '被过滤掉读段数的比例', '胎儿是否健康_编码', '末次月经_天数差',]

# cols = df.select_dtypes(include=[np.int64, np.float64]).columns.tolist()
# covars = [col for col in cols if col not in ['time', 'event', '检测孕周_天数']]


cox_data = df[['time', 'event'] + covars2].dropna()

print(f"用于Cox模型分析的有效样本量: {len(cox_data)}")
print(f"事件发生数 (Y浓度≥4%): {cox_data['event'].sum()}")

用于Cox模型分析的有效样本量: 1082
事件发生数 (Y浓度≥4%): 937


In [None]:
cph = CoxPHFitter()
cph.fit(cox_data, duration_col='time', event_col='event')

results = cph.summary

In [6]:
def add_significance(p_value):
    if p_value < 0.001:
        return '***'
    elif p_value < 0.01:
        return '**'
    elif p_value < 0.05:
        return '*'
    else:
        return ''

In [7]:
results = results.rename(columns={
    'coef': 'coef',
    'exp(coef)': 'exp(coef)',
    'se(coef)': 'se(coef)',
    'p': 'p'
})

results['significance'] = results['p'].apply(add_significance)

In [None]:
# 6. 输出漂亮的结果表格
print("\n" + "="*60)
print("Cox比例风险模型分析结果")
print("="*60)
print(f"{'变量':<10} {'coef':<8} {'exp(coef)':<12} {'se(coef)':<10} {'p':<8} {'显著性':<6}")
print("-"*60)

for index, row in results.iterrows():
    print(f"{index:<10} {row['coef']:>7.3f} {row['exp(coef)']:>11.3f} "
          f"{row['se(coef)']:>9.3f} {row['p']:>7.3f} {row['significance']:>6}")

In [None]:
print("\n风险比(HR)解读:")
for covariate in covars:
    if covariate in results.index:
        hr = results.loc[covariate, 'exp(coef)']
        p_val = results.loc[covariate, 'p']
        if p_val < 0.05:
            if hr > 1:
                print(f"  {covariate}: HR = {hr:.3f}, 是风险因素 (促进早期达标)")
            else:
                print(f"  {covariate}: HR = {hr:.3f}, 是保护因素 (推迟达标时间)")
        else:
            print(f"  {covariate}: HR = {hr:.3f}, 影响不显著 (p = {p_val:.3f})")
