In [10]:
library(glmnet)
library(ggplot2)
library(dplyr)
library(reshape2)
library(Boruta)
library(survival)
library(broom)
library(rms)

In [11]:
df = read.csv("final_imputed_with_sofa_firsticu.csv")

In [12]:
head(df)

Unnamed: 0_level_0,gender,age,red_blood_cells,hemoglobin,rdw,hematocrit,neutrophils,lymphocytes,platelets,alt,⋯,crrt_used,vasoactive_used,weight_admit,survival_time,outcome,survival_30,tibc_quantile_group,ferritin_quantile_group,iron_quantile_group,transferrin_quantile_group
Unnamed: 0_level_1,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<int>,<int>,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<int>,<int>
1,0,75.59,3.0,9.2,17.6,27.2,77.0,14.0,115,7,⋯,0,1,93.0,28,1,0,1,3,3,1
2,0,67.78,3.91,11.3,16.1,35.7,89.3,15.5,525,25,⋯,0,0,70.5,75,1,1,2,2,3,2
3,1,85.47,4.35,13.8,15.5,40.4,39.0,59.0,120,134,⋯,0,1,72.5,1,1,0,2,4,2,2
4,1,67.85,3.67,10.6,16.2,33.3,90.6,2.8,371,33,⋯,0,1,61.9,563,1,1,2,4,4,2
5,1,60.83,2.6,8.7,24.1,25.6,93.2,96.0,46,109,⋯,0,0,100.8,22,1,0,3,4,4,3
6,1,74.45,2.88,9.4,27.5,27.5,90.5,8.1,208,30,⋯,0,0,64.7,11,1,0,1,4,1,1


In [13]:
data <- df %>% select(-c(transferrin_quantile_group,iron_quantile_group, ferritin_quantile_group, tibc_quantile_group, survival_30))

In [14]:
# 加载必要的包
library(survival)
library(dplyr)
library(tidyr)

# 1. 将连续变量转换为四分位数分类变量并哑变量化
data_processed <- data %>%
  mutate(
    # 将连续变量按四分位数分组
    tibc_quartile = cut(tibc, 
                       breaks = quantile(tibc, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE),
                       include.lowest = TRUE,
                       labels = c("Q1", "Q2", "Q3", "Q4")),
    
    ferritin_quartile = cut(ferritin, 
                           breaks = quantile(ferritin, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE),
                           include.lowest = TRUE,
                           labels = c("Q1", "Q2", "Q3", "Q4")),
    
    iron_quartile = cut(iron, 
                       breaks = quantile(iron, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE),
                       include.lowest = TRUE,
                       labels = c("Q1", "Q2", "Q3", "Q4"))
  ) %>%
  # 将四分位变量转换为因子，确保正确的参照组（Q1作为参照）
  mutate(across(ends_with("_quartile"), ~ factor(., levels = c("Q1", "Q2", "Q3", "Q4"))))

# 2. 建立多元Cox回归模型
cox_model <- coxph(
  Surv(survival_time, outcome) ~ 
    tibc_quartile + ferritin_quartile + iron_quartile,
  data = data_processed
)

# 输出模型摘要
summary(cox_model)

# 3. 计算p for trend
# 方法：将四分位变量作为连续变量（赋值为1,2,3,4）纳入模型
cox_model_trend <- coxph(
  Surv(survival_time, outcome) ~ 
    as.numeric(tibc_quartile) + as.numeric(ferritin_quartile) + as.numeric(iron_quartile),
  data = data_processed
)

# 提取趋势P值
trend_p_values <- summary(cox_model_trend)$coefficients[, "Pr(>|z|)"]
names(trend_p_values) <- c("tibc_p_trend", "ferritin_p_trend", "iron_p_trend")

# 输出趋势P值
print("P for trend values:")
print(trend_p_values)

# 4. 可选：更详细的趋势检验（使用线性对比检验）
# 这种方法更严谨，可以检验是否存在线性趋势

# 为每个变量创建趋势检验函数
test_trend <- function(variable_name, data) {
  # 创建数值型的趋势变量
  trend_var <- as.numeric(data[[paste0(variable_name, "_quartile")]])
  
  # 建立包含趋势变量的模型
  formula <- as.formula(paste("Surv(survival_time, outcome) ~ trend_var"))
  model <- coxph(formula, data = data)
  
  # 返回趋势P值
  return(summary(model)$coefficients["trend_var", "Pr(>|z|)"])
}

# 计算各变量的趋势P值
variables <- c("tibc", "ferritin", "iron")
trend_results <- sapply(variables, test_trend, data = data_processed)

print("Trend p-values using linear contrast:")
print(trend_results)

Call:
coxph(formula = Surv(survival_time, outcome) ~ tibc_quartile + 
    ferritin_quartile + iron_quartile, data = data_processed)

  n= 1128, number of events= 809 

                        coef exp(coef) se(coef)      z Pr(>|z|)    
tibc_quartileQ2     -0.15677   0.85490  0.09640 -1.626  0.10392    
tibc_quartileQ3     -0.40741   0.66537  0.09945 -4.097 4.19e-05 ***
tibc_quartileQ4     -0.45783   0.63266  0.11639 -3.934 8.36e-05 ***
ferritin_quartileQ2  0.09963   1.10476  0.11499  0.866  0.38627    
ferritin_quartileQ3  0.16470   1.17904  0.11935  1.380  0.16758    
ferritin_quartileQ4  0.36602   1.44199  0.12239  2.991  0.00278 ** 
iron_quartileQ2     -0.14461   0.86536  0.10053 -1.438  0.15032    
iron_quartileQ3     -0.25649   0.77376  0.10332 -2.483  0.01304 *  
iron_quartileQ4     -0.03989   0.96089  0.10470 -0.381  0.70318    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                    exp(coef) exp(-coef) lower .95 upper .95
tibc_quartileQ2        

[1] "P for trend values:"
    tibc_p_trend ferritin_p_trend     iron_p_trend 
    6.581050e-06     1.285587e-03     4.934729e-01 
[1] "Trend p-values using linear contrast:"
        tibc     ferritin         iron 
1.177202e-10 6.659755e-08 9.981180e-01 


In [15]:
# 加载必要的包
library(survival)
library(dplyr)

# 1. 将连续变量转换为四分位数分类变量并哑变量化
# 注意：保留数据中的所有其他列
data_processed <- data %>%
  mutate(
    # 将连续变量按四分位数分组（只对这三个变量进行转换）
    tibc_quartile = cut(tibc, 
                       breaks = quantile(tibc, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE),
                       include.lowest = TRUE,
                       labels = c("Q1", "Q2", "Q3", "Q4")),
    
    ferritin_quartile = cut(ferritin, 
                           breaks = quantile(ferritin, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE),
                           include.lowest = TRUE,
                           labels = c("Q1", "Q2", "Q3", "Q4")),
    
    iron_quartile = cut(iron, 
                       breaks = quantile(iron, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE),
                       include.lowest = TRUE,
                       labels = c("Q1", "Q2", "Q3", "Q4"))
  ) %>%
  # 将四分位变量转换为因子，确保正确的参照组
  mutate(across(ends_with("_quartile"), ~ factor(., levels = c("Q1", "Q2", "Q3", "Q4"))))

# 查看处理后的数据结构
str(data_processed)
names(data_processed)

# 2. 建立多元Cox回归模型 - 根据您的需求选择变量
recommended_vars <- c(
  "age", "gender",                    # 人口学
  "sepsis", "aki", "heart_failure",   # 关键合并症
  "albumin", "sodium", "lactate", "lymphocytes"  # 核心实验室指标                  
)

formula_str <- paste(
  "Surv(survival_time, outcome) ~",
  paste(c("tibc_quartile", "ferritin_quartile", "iron_quartile", recommended_vars), collapse = " + ")
)





'data.frame':	1128 obs. of  60 variables:
 $ gender                     : int  0 0 1 1 1 1 1 0 1 0 ...
 $ age                        : num  75.6 67.8 85.5 67.8 60.8 ...
 $ red_blood_cells            : num  3 3.91 4.35 3.67 2.6 2.88 4.24 3.08 3.3 3.93 ...
 $ hemoglobin                 : num  9.2 11.3 13.8 10.6 8.7 9.4 13.5 9.5 10.7 10.9 ...
 $ rdw                        : num  17.6 16.1 15.5 16.2 24.1 27.5 21.6 21.9 20.5 21.7 ...
 $ hematocrit                 : num  27.2 35.7 40.4 33.3 25.6 27.5 39.8 28.7 32.4 35.7 ...
 $ neutrophils                : num  77 89.3 39 90.6 93.2 90.5 97 99 82 86.4 ...
 $ lymphocytes                : num  14 15.5 59 2.8 96 8.1 9.6 90 38 12.1 ...
 $ platelets                  : num  115 525 120 371 46 208 509 521 558 707 ...
 $ alt                        : num  7 25 134 33 109 30 47 100 95 55 ...
 $ ast                        : num  189 222 212 102 59 ...
 $ total_bilirubin            : num  1.2 0.4 1.1 0.4 2.9 4.6 2.4 7 0.4 15.9 ...
 $ albumin              

In [16]:
# 方案B：包含三个四分位变量 + 其他可能的混杂因素
# 假设您还有其他需要调整的变量，比如age, sex等
# 请根据您的实际变量名修改下面的代码
# 方法2：使用 paste() 构建公式字符串
formula_str <- paste(
  "Surv(survival_time, outcome) ~",
  paste(c("tibc_quartile", "ferritin_quartile", "iron_quartile", recommended_vars), collapse = " + ")
)

cox_model_adjusted <- coxph(
  as.formula(formula_str),
  data = data_processed
)

# 方案C：使用所有变量（谨慎使用，可能过拟合）
# cox_model_full <- coxph(
#   Surv(survival_time, outcome) ~ .,
#   data = data_processed %>% select(-tibc, -ferritin, -iron)  # 移除原始连续变量
# )

# 输出模型摘要（以简单模型为例）
summary(cox_model_adjusted)

Call:
coxph(formula = as.formula(formula_str), data = data_processed)

  n= 1128, number of events= 809 

                         coef exp(coef)  se(coef)      z Pr(>|z|)    
tibc_quartileQ2     -0.169079  0.844442  0.100425 -1.684 0.092250 .  
tibc_quartileQ3     -0.286715  0.750726  0.105984 -2.705 0.006825 ** 
tibc_quartileQ4     -0.242955  0.784307  0.125258 -1.940 0.052424 .  
ferritin_quartileQ2  0.077147  1.080200  0.117588  0.656 0.511775    
ferritin_quartileQ3  0.241656  1.273356  0.121680  1.986 0.047035 *  
ferritin_quartileQ4  0.492829  1.636941  0.127099  3.878 0.000106 ***
iron_quartileQ2     -0.146348  0.863857  0.102864 -1.423 0.154814    
iron_quartileQ3     -0.240520  0.786219  0.106555 -2.257 0.023994 *  
iron_quartileQ4     -0.002987  0.997017  0.110392 -0.027 0.978412    
age                  0.011975  1.012047  0.003334  3.592 0.000329 ***
gender              -0.050341  0.950905  0.072991 -0.690 0.490396    
sepsis              -0.079624  0.923463  0.079685 -0.9

In [17]:

# 3. 计算p for trend - 同样需要考虑其他变量的调整



formula_str <- paste(
  "Surv(survival_time, outcome) ~",
  paste(c("as.numeric(tibc_quartile)", "as.numeric(ferritin_quartile)", 
          "as.numeric(iron_quartile)", recommended_vars), collapse = " + ")
)

cox_trend_adjusted <- coxph(
  as.formula(formula_str),
  data = data_processed
)


trend_p_adjusted <- summary(cox_trend_adjusted)$coefficients[1:3, "Pr(>|z|)"]

names(trend_p_adjusted) <- c("tibc_p_trend", "ferritin_p_trend", "iron_p_trend")



print("调整后趋势P值:")
print(trend_p_adjusted)

[1] "调整后趋势P值:"
    tibc_p_trend ferritin_p_trend     iron_p_trend 
    3.429684e-02     1.795995e-05     7.821510e-01 


In [18]:
summary(cox_trend_adjusted)

Call:
coxph(formula = as.formula(formula_str), data = data_processed)

  n= 1128, number of events= 809 

                                   coef exp(coef)  se(coef)      z Pr(>|z|)    
as.numeric(tibc_quartile)     -0.082864  0.920476  0.039150 -2.117 0.034297 *  
as.numeric(ferritin_quartile)  0.165776  1.180309  0.038653  4.289 1.80e-05 ***
as.numeric(iron_quartile)     -0.009937  0.990112  0.035935 -0.277 0.782151    
age                            0.010909  1.010969  0.003297  3.309 0.000936 ***
gender                        -0.044051  0.956905  0.072544 -0.607 0.543699    
sepsis                        -0.083132  0.920229  0.078250 -1.062 0.288056    
aki                            0.336396  1.399894  0.078142  4.305 1.67e-05 ***
heart_failure                 -0.125510  0.882047  0.079502 -1.579 0.114403    
albumin                       -0.221430  0.801372  0.069765 -3.174 0.001504 ** 
sodium                        -0.017414  0.982737  0.007208 -2.416 0.015693 *  
lactate       