In [None]:
# 加载必要的库
library(readxl)
library(dplyr)
library(kableExtra)

# 读取Excel文件
file_path <- "C:\\Users\\fsyan\\Desktop\\副本数据.xlsx"
df <- read_excel(file_path, sheet = "Sheet1")

# 检查列名
colnames(df)

# 修改列名（如果需要）
colnames(df) <- c("time", "status", "NLR", "CA199", "Size", "New_lesions")

# 将分类变量转换为因子
df$status <- as.factor(df$status)
df$NLR <- as.factor(df$NLR)
df$CA199 <- as.factor(df$CA199)
df$Size <- as.factor(df$Size)
df$New_lesions <- as.factor(df$New_lesions)

# 基线分析：描述性统计
# 1. 连续变量（time）
continuous_stats <- df %>%
  summarise(
    Mean = mean(time, na.rm = TRUE),
    SD = sd(time, na.rm = TRUE),
    Median = median(time, na.rm = TRUE),
    Min = min(time, na.rm = TRUE),
    Max = max(time, na.rm = TRUE)
  )
#如果数据中有缺失值（NA），需要在计算统计量时使用 na.rm = TRUE。

# 2. 分类变量（status, NLR, CA199, Size, New_lesions）
categorical_stats <- df %>%
  select(status, NLR, CA199, Size, New_lesions) %>%
  summarise_all(~ list(table(.)))

# 将分类变量的统计结果整理为表格
categorical_stats_table <- data.frame(
  Variable = names(categorical_stats),
  Levels = sapply(categorical_stats, function(x) paste(names(x[[1]]), collapse = ", ")),
  Counts = sapply(categorical_stats, function(x) paste(x[[1]], collapse = ", "))
)

# 输出连续变量的统计结果
print("Continuous Variable (time) Statistics:")
print(continuous_stats)

# 输出分类变量的统计结果
print("Categorical Variables Statistics:")
print(categorical_stats_table)


# 输出连续变量的统计结果
cat("Continuous Variable (time) Statistics:\n")
print(continuous_stats)

# 输出分类变量的统计结果
cat("\nCategorical Variables Statistics:\n")
print(categorical_stats_table)
                  
# 将结果保存为表格（使用kableExtra包）
# 连续变量
continuous_stats %>%
  kable("html", caption = "Continuous Variable (time) Statistics") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

# 分类变量
categorical_stats_table %>%
  kable("html", caption = "Categorical Variables Statistics") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

##第二种方法
library(tableone)
library(readxl)
library(dplyr)                  
file_path <- "C:\\Users\\fsyan\\Desktop\\副本数据.xlsx"
df <- read_excel(file_path, sheet = "Sheet1")

myVars <- c(colnames(df))
catVars <- c("status","NLR","CA199","Size","New_lesions")
nonvar <- c("time")
table <- CreateTableOne(vars = myVars, #条件1
                        factorVars = catVars, #条件2
                        data = df,
                        addOverall = TRUE)
table1 <- print(table,  #构建的table函数（包括条件1.2）
                  showAllLevels=TRUE, #显示所有变量
                  nonnormal = nonvar) #条件3，time汇报为中位数+四分位数
##添加一个分类条件，本例为CA199
table <- CreateTableOne(vars = myVars, #条件1
                        factorVars = catVars, #条件2
                        strata = "CA199", #条件3
                        data = df,
                        addOverall = TRUE)
table1 <- print(table,  #构建的table函数（包括条件1.2.3）
                  showAllLevels=TRUE, #显示所有变量
                  nonnormal = nonvar) #条件4                  
table1 <- print(table,  #构建的table函数（包括条件1.2.3）
                  showAllLevels=TRUE) ##time列SD
