In [1]:
# adaboost vs DJPSIIR
library(dplyr)
library(pROC)
# data preparation
djps <- read.csv( "C:/path/merged_predSurv_xixi4.csv", stringsAsFactors = FALSE)
ada  <- read.csv( "C:/path/adaboost_predprob.csv",  stringsAsFactors = FALSE)

merged <- inner_join(
  djps %>% select(NID, follow.up.years, djps_pred_CIF = pred_CIF, true_label),
  ada  %>% select(NID, follow.up.years, ada_pred_CIF = pred_CIF, IIR_3revi),
  by = c("NID", "follow.up.years")
)

if(!all(merged$true_label == merged$IIR_3revi)) {
  stop("Inconsistent labels; please check for data alignment errors.")
}

year_list <- c(5, 6, 7)

results_list <- list()
for(year in year_list) {
  cat("\n==== Year", year, "====\n")
  dat_tmp <- merged %>% filter(follow.up.years == year)
  if(length(unique(dat_tmp$true_label)) < 2) {
    cat("This year's positive and negative samples are insufficient for the DeLong test.\n")
    next
  }
  roc1 <- roc(dat_tmp$true_label, dat_tmp$djps_pred_CIF, quiet=TRUE)
  roc2 <- roc(dat_tmp$true_label, dat_tmp$ada_pred_CIF, quiet=TRUE)

  # AUROC and 95% CI
  auc1 <- as.numeric(auc(roc1))
  auc2 <- as.numeric(auc(roc2))
  ci1 <- as.numeric(ci.auc(roc1))
  ci2 <- as.numeric(ci.auc(roc2))

  # DeLong test
  res <- roc.test(roc1, roc2, paired=TRUE, method="delong")
  print(res)

  # save results
  results_list[[as.character(year)]] <- data.frame(
    Year       = year,
    AUC_DJPS   = auc1,
    CI95_DJPS_Lower = ci1[1],
    CI95_DJPS_Center = ci1[2],
    CI95_DJPS_Upper = ci1[3],
    AUC_Ada    = auc2,
    CI95_Ada_Lower = ci2[1],
    CI95_Ada_Center = ci2[2],
    CI95_Ada_Upper = ci2[3],
    p_value    = res$p.value,
    statistic  = res$statistic
  )
}

# output results
results_df <- do.call(rbind, results_list)
print("\n AUC Comparison Summary:")
print(results_df)

write.csv(
  results_df, 
  "C:/path/delong_auc_compare_adaboost.csv", 
  row.names=F
)


Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


Type 'citation("pROC")' for a citation.


Attaching package: 'pROC'


The following objects are masked from 'package:stats':

    cov, smooth, var





==== Year 5 ====

	DeLong's test for two correlated ROC curves

data:  roc1 and roc2
Z = 10.604, p-value < 2.2e-16
alternative hypothesis: true difference in AUC is not equal to 0
95 percent confidence interval:
 0.06706897 0.09748445
sample estimates:
AUC of roc1 AUC of roc2 
  0.9490934   0.8668167 


==== Year 6 ====

	DeLong's test for two correlated ROC curves

data:  roc1 and roc2
Z = 4.1518, p-value = 3.299e-05
alternative hypothesis: true difference in AUC is not equal to 0
95 percent confidence interval:
 0.04508013 0.12570418
sample estimates:
AUC of roc1 AUC of roc2 
  0.8263762   0.7409840 


==== Year 7 ====

	DeLong's test for two correlated ROC curves

data:  roc1 and roc2
Z = 2.5331, p-value = 0.01131
alternative hypothesis: true difference in AUC is not equal to 0
95 percent confidence interval:
 0.01840314 0.14427372
sample estimates:
AUC of roc1 AUC of roc2 
  0.8030573   0.7217189 

[1] "\n AUC Comparison Summary:"
  Year  AUC_DJPS CI95_DJPS_Lower CI95_DJPS_Center 

In [2]:
# LASSO vs DJPSIIR
library(dplyr)
library(pROC)

# data preparation
djps <- read.csv( "C:/path/merged_predSurv_xixi4.csv", stringsAsFactors = FALSE)
ada  <- read.csv( "C:/path/lasso_regression_predprob.csv",  stringsAsFactors = FALSE)

merged <- inner_join(
  djps %>% select(NID, follow.up.years, djps_pred_CIF = pred_CIF, true_label),
  ada  %>% select(NID, follow.up.years, LASSO_pred_CIF = pred_CIF, IIR_3revi),
  by = c("NID", "follow.up.years")
)

if(!all(merged$true_label == merged$IIR_3revi)) {
  stop("Inconsistent labels; please check for data alignment errors.")
}

year_list <- c(5, 6, 7)

results_list <- list()
for(year in year_list) {
  cat("\n==== Year", year, "====\n")
  dat_tmp <- merged %>% filter(follow.up.years == year)
  if(length(unique(dat_tmp$true_label)) < 2) {
    cat("This year's positive and negative samples are insufficient for the DeLong test.\n")
    next
  }
  roc1 <- roc(dat_tmp$true_label, dat_tmp$djps_pred_CIF, quiet=TRUE)
  roc2 <- roc(dat_tmp$true_label, dat_tmp$LASSO_pred_CIF, quiet=TRUE)

  # AUROC and 95% CI
  auc1 <- as.numeric(auc(roc1))
  auc2 <- as.numeric(auc(roc2))
  ci1 <- as.numeric(ci.auc(roc1))
  ci2 <- as.numeric(ci.auc(roc2))

  # DeLong test
  res <- roc.test(roc1, roc2, paired=TRUE, method="delong")
  print(res)

  # save results
  results_list[[as.character(year)]] <- data.frame(
    Year       = year,
    AUC_DJPS   = auc1,
    CI95_DJPS_Lower = ci1[1],
    CI95_DJPS_Center = ci1[2],
    CI95_DJPS_Upper = ci1[3],
    AUC_LASSO    = auc2,
    CI95_LASSO_Lower = ci2[1],
    CI95_LASSO_Center = ci2[2],
    CI95_LASSO_Upper = ci2[3],
    p_value    = res$p.value,
    statistic  = res$statistic
  )
}

# output results
results_df <- do.call(rbind, results_list)
print("\n AUC Comparison Summary:")
print(results_df)

write.csv(
  results_df, 
  "C:/path/delong_auc_compare_lasso_regression.csv", 
  row.names=F
)


==== Year 5 ====

	DeLong's test for two correlated ROC curves

data:  roc1 and roc2
Z = 10.659, p-value < 2.2e-16
alternative hypothesis: true difference in AUC is not equal to 0
95 percent confidence interval:
 0.07018841 0.10181825
sample estimates:
AUC of roc1 AUC of roc2 
  0.9490934   0.8630901 


==== Year 6 ====

	DeLong's test for two correlated ROC curves

data:  roc1 and roc2
Z = 5.1028, p-value = 3.347e-07
alternative hypothesis: true difference in AUC is not equal to 0
95 percent confidence interval:
 0.06251409 0.14048518
sample estimates:
AUC of roc1 AUC of roc2 
  0.8263762   0.7248765 


==== Year 7 ====

	DeLong's test for two correlated ROC curves

data:  roc1 and roc2
Z = 2.665, p-value = 0.007698
alternative hypothesis: true difference in AUC is not equal to 0
95 percent confidence interval:
 0.02381989 0.15624752
sample estimates:
AUC of roc1 AUC of roc2 
  0.8030573   0.7130236 

[1] "\n AUC Comparison Summary:"
  Year  AUC_DJPS CI95_DJPS_Lower CI95_DJPS_Center 