# 环境说明

In [None]:
# 安装所需的包
install.packages("rms")

In [7]:
sessionInfo()$otherPkgs$rms

Package: rms
Version: 7.0-0
Date: 2025-01-15
Title: Regression Modeling Strategies
Authors@R: person(given = c("Frank", "E"), family = "Harrell Jr", role
        = c("aut", "cre"), email = "fh@fharrell.com")
Maintainer: Frank E Harrell Jr <fh@fharrell.com>
Depends: R (>= 4.4.0), Hmisc (>= 5.2-2)
Imports: methods, survival, quantreg, ggplot2, Matrix, SparseM, rpart,
        nlme (>= 3.1-123), polspline, multcomp, htmlTable (>= 1.11.0),
        htmltools, MASS, cluster, digest, colorspace, knitr, grDevices
Suggests: boot, tcltk, plotly (>= 4.5.6), mice, rmsb, nnet, VGAM,
        lattice, kableExtra
Description: Regression modeling, testing, estimation, validation,
        graphics, prediction, and typesetting by storing enhanced model
        design attributes in the fit.  'rms' is a collection of
        functions that assist with and streamline modeling.  It also
        contains functions for binary and ordinal logistic regression
        models, ordinal models for continuous Y with a

# 代码实现

In [2]:
# 加载所需的包
library(rms)
library(pROC)

"package 'rms' was built under R version 4.4.2"
Loading required package: Hmisc

"package 'Hmisc' was built under R version 4.4.2"

Attaching package: 'Hmisc'


The following objects are masked from 'package:base':

    format.pval, units


"package 'pROC' was built under R version 4.4.2"
Type 'citation("pROC")' for a citation.


Attaching package: 'pROC'


The following objects are masked from 'package:stats':

    cov, smooth, var




In [3]:
# 读取数据
data <- read.csv("data/train.csv")

In [4]:
head(data)

Unnamed: 0_level_0,Age,Blood_Sugar,Blood_Pressure,Weight,Heart_Disease
Unnamed: 0_level_1,<int>,<dbl>,<dbl>,<dbl>,<int>
1,58,99.97546,106.3281,62.44868,0
2,71,93.60951,134.1907,67.87585,0
3,48,119.13195,115.7733,49.2631,0
4,34,98.62651,117.652,93.11179,0
5,62,114.05609,119.4568,71.04322,0
6,27,91.46011,131.4862,80.83883,0


## lrm

In [5]:
# 开始建模
lr1 <- lrm(Heart_Disease ~ ., data = data)
print(lr1)

# 输出回归系数并换行
# coefs <- coef(lr1)
# 输出系数名称和对应的值，每行显示一个系数
# for (name in names(coefs)) {
#   cat(name, ": ", coefs[name], "\n")
# }

# 计算概率预测值
probabilities <- predict(lr1)
# 设定阈值 0.5，将概率值转换为 0/1 分类
predicted_class <- ifelse(probabilities > 0.5, 1, 0)
# 计算准确率
accuracy <- mean(predicted_class == data$Heart_Disease)
print(paste("模型准确率:", round(accuracy, 4)))
# 计算 AUC
roc_obj <- roc(data$Heart_Disease, probabilities)
auc_value <- auc(roc_obj)
print(paste("AUC:", round(auc_value, 6)))

Logistic Regression Model

lrm(formula = Heart_Disease ~ ., data = data)

                       Model Likelihood     Discrimination    Rank Discrim.    
                             Ratio Test            Indexes          Indexes    
Obs          1000    LR chi2     238.07     R2       0.859    C       0.997    
 0            968    d.f.             4    R2(4,1000)0.209    Dxy     0.994    
 1             32    Pr(> chi2) <0.0001    R2(4,92.9)0.919    gamma   0.994    
max |deriv| 2e-05                           Brier    0.007    tau-a   0.062    

               Coef      S.E.    Wald Z Pr(>|Z|)
Intercept      -109.6707 24.8000 -4.42  <0.0001 
Age               0.4953  0.1116  4.44  <0.0001 
Blood_Sugar       0.5558  0.1228  4.52  <0.0001 
Blood_Pressure    0.0135  0.0213  0.63  0.5270  
Weight            0.0647  0.0307  2.11  0.0352  

[1] "模型准确率: 0.991"


Setting levels: control = 0, case = 1

Setting direction: controls < cases



[1] "AUC: 0.997062"


## glm

In [6]:
# 开始建模
lr2 <- glm(Heart_Disease ~ ., data = data)
print(lr2)

# # 输出回归系数并换行
# coefs <- coef(lr2)
# # 输出系数名称和对应的值，每行显示一个系数
# for (name in names(coefs)) {
#   cat(name, ": ", coefs[name], "\n")
# }

# 计算概率预测值
probabilities <- predict(lr2, type = "response")
# 设定阈值 0.5，将概率值转换为 0/1 分类
predicted_class <- ifelse(probabilities > 0.5, 1, 0)
# 计算准确率
accuracy <- mean(predicted_class == data$Heart_Disease)
print(paste("模型准确率:", round(accuracy, 4)))
# 计算 AUC
roc_obj <- roc(data$Heart_Disease, probabilities)
auc_value <- auc(roc_obj)
print(paste("AUC:", round(auc_value, 6)))


Call:  glm(formula = Heart_Disease ~ ., data = data)

Coefficients:
   (Intercept)             Age     Blood_Sugar  Blood_Pressure          Weight  
    -0.4494992       0.0021918       0.0035966      -0.0001765       0.0004126  

Degrees of Freedom: 999 Total (i.e. Null);  995 Residual
Null Deviance:	    30.98 
Residual Deviance: 26.26 	AIC: -789.9
[1] "模型准确率: 0.968"


Setting levels: control = 0, case = 1

Setting direction: controls < cases



[1] "AUC: 0.994932"
