In [22]:
#Step 9 Mann-Whitney U Test for Blood Culture Comparison

# ✅ Load required libraries
library(readxl)
library(dplyr)
library(janitor)
library(tibble)
library(ggplot2)

# ✅ Load the Excel file
file_path <- "/kaggle/input/raw-data-jaksa/Raw_data_JB_Ozlem.xlsx"
raw_data <- read_excel(file_path) %>% clean_names()

# ✅ Check if hemocultures_positeve_yes_no has usable data
hemoculture_col <- "hemocultures_positeve_yes_no"
fallback_col <- "infection_yes_no"

# Use fallback if hemoculture column has too few values
group_col <- if (n_distinct(na.omit(raw_data[[hemoculture_col]])) < 2) fallback_col else hemoculture_col

# ✅ List of biomarkers to compare
biomarkers <- c(
  "presepsin_d1_ng_m_l", "presepsin_d3_ng_m_l", "presepsin_d5_ng_m_l", "presepsin_d7_ng_m_l",
  "ykl_40_d1_ng_m_l", "ykl_40_d3_ng_m_l", "ykl_40_d5_ng_m_l", "ykl_40_d7_ng_m_l"
)

# ✅ Initialize result table
results <- list()

# ✅ Loop over each biomarker
for (bm in biomarkers) {
  df <- raw_data %>%
    select(all_of(group_col), all_of(bm)) %>%
    filter(!is.na(.data[[group_col]]), !is.na(.data[[bm]])) %>%
    mutate(group = factor(.data[[group_col]]))

  if (n_distinct(df$group) < 2) {
    results[[bm]] <- tibble(
      Biomarker = bm,
      Median_Positive = NA,
      Median_Negative = NA,
      P_Value = NA,
      N_Pos = NA,
      N_Neg = NA,
      Note = "Skipped: Only one group"
    )
  } else {
    pos <- df %>% filter(group == 1) %>% pull(!!sym(bm))
    neg <- df %>% filter(group == 0) %>% pull(!!sym(bm))

    test <- wilcox.test(pos, neg)

    results[[bm]] <- tibble(
      Biomarker = bm,
      Median_Positive = median(pos),
      Median_Negative = median(neg),
      P_Value = test$p.value,
      N_Pos = length(pos),
      N_Neg = length(neg),
      Note = ""
    )
  }
}

# ✅ Combine and show results
summary_tbl <- bind_rows(results)
print(summary_tbl)


[90m# A tibble: 8 × 7[39m
  Biomarker           Median_Positive Median_Negative P_Value N_Pos N_Neg Note 
  [3m[90m<chr>[39m[23m                         [3m[90m<dbl>[39m[23m           [3m[90m<dbl>[39m[23m   [3m[90m<dbl>[39m[23m [3m[90m<int>[39m[23m [3m[90m<int>[39m[23m [3m[90m<chr>[39m[23m
[90m1[39m presepsin_d1_ng_m_l            2.44            3.47   0.444    11    55 [90m"[39m[90m"[39m   
[90m2[39m presepsin_d3_ng_m_l            5.98            3.23   0.371    11    55 [90m"[39m[90m"[39m   
[90m3[39m presepsin_d5_ng_m_l            2.68            2.43   0.770    11    55 [90m"[39m[90m"[39m   
[90m4[39m presepsin_d7_ng_m_l            2.12            2.08   0.897    11    55 [90m"[39m[90m"[39m   
[90m5[39m ykl_40_d1_ng_m_l              82.6            81.1    0.553    11    55 [90m"[39m[90m"[39m   
[90m6[39m ykl_40_d3_ng_m_l             129              86.4    0.212    11    55 [90m"[39m[90m"[39m   
[90m7[39m ykl_40_

compared Presepsin and YKL-40 levels between patients with positive vs. negative hemocultures using the Mann-Whitney U test. Results are presented below, including medians, p-values, and sample sizes per group.


Biomarker	Median (Positive)	Median (Negative)	p-value	N (Positive)	N (Negative)
Presepsin Day 1	2.44	3.47	0.444	11	55
Presepsin Day 3	5.98	3.23	0.371	11	55
Presepsin Day 5	2.68	2.43	0.770	11	55
Presepsin Day 7	2.12	2.08	0.897	11	55
YKL-40 Day 1	82.6	81.1	0.553	11	55
YKL-40 Day 3	129.0	86.4	0.212	11	55
YKL-40 Day 5	60.8	55.3	0.945	11	55
YKL-40 Day 7	64.5	56.0	0.594	11	55

 Interpretation
No statistically significant differences were observed in Presepsin or YKL-40 levels between culture-positive and culture-negative groups on any day.

This suggests that while these biomarkers may be useful for general sepsis prediction, they do not significantly differentiate between positive vs. negative blood culture results in this cohort.