# Risk Score System and Algorithm

In [None]:
source("notebooks/initialize-data-analysis.r")


── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Rows: 5669 Columns: 113
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr   (4): patient_ID, site, dept, LZD_route
dbl  (20): patient_age, patient_weight, charlson, baseline_CLCR, baseline_WB...
lgl  (85): patient_sex, dept_ICU, dept_ER, dept_other, invasive_ETI, invasiv...
date  (4): baseline_date, LZD_start, LZD_end, test_date

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column t

In [None]:
library(tidyverse)
library(tidymodels)


── Attaching packages ────────────────────────────────────── tidymodels 1.1.1 ──

✔ broom        1.0.5     ✔ rsample      1.2.0
✔ dials        1.2.0     ✔ tune         1.1.2
✔ infer        1.0.5     ✔ workflows    1.1.3
✔ modeldata    1.3.0     ✔ workflowsets 1.0.1
✔ parsnip      1.1.1     ✔ yardstick    1.3.0
✔ recipes      1.0.9     

── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
✖ scales::discard() masks purrr::discard()
✖ dplyr::filter()   masks stats::filter()
✖ recipes::fixed()  masks stringr::fixed()
✖ dplyr::lag()      masks stats::lag()
✖ yardstick::spec() masks readr::spec()
✖ recipes::step()   masks stats::step()
• Use suppressPackageStartupMessages() to eliminate package startup messages

## Risk Score System

All code below is not automatic. Variables and ranges are manually defined.

In [None]:
risk_profile_1 <- tibble(
  risk_factor = "patient_age",
  min_range = c(18, 30, 40, 50, 60, 70, 80, 90), # minimum age is 18
  max_range = lead(min_range, default = 101), # maximum age is 101, interval is 10
  reference = (min_range + max_range) / 2,
  flag_base_risk = FALSE
) |>
  mutate(
    flag_base_risk = replace(flag_base_risk, 1, TRUE)
  )

risk_profile_2 <- tibble(
  risk_factor = "baseline_PLT",
  min_range = c(150, 75, 50, 25, 18), # 1% percentile is 18
  max_range = lag(min_range, default = 434), # 99% percentile is 434
  reference = (min_range + max_range) / 2,
  flag_base_risk = FALSE
) |>
  mutate(
    min_range = replace(min_range, n(), -Inf),
    max_range = replace(max_range, 1, Inf),
    flag_base_risk = replace(flag_base_risk, 1, TRUE)
  )

risk_profile_3 <- tibble(
  risk_factor = "LZD_duration",
  min_range = c(3, 7, 14), # 1% percentile is 3
  max_range = lead(min_range, default = 26), # 99% percentile is 26
  reference = (min_range + max_range) / 2,
  flag_base_risk = FALSE
) |>
  mutate(
    min_range = replace(min_range, 1, -Inf),
    max_range = replace(max_range, n(), Inf),
    flag_base_risk = replace(flag_base_risk, 1, TRUE)
  )

risk_profile_4 <- tibble(
  risk_factor = "invasive_CRRT",
  min_range = c(0, 1), # 0 or 1
  max_range = c(0, 1), # 0 or 1
  reference = c(0, 1),
  flag_base_risk = FALSE
) |>
  mutate(
    flag_base_risk = replace(flag_base_risk, 1, TRUE)
  )

risk_profile <- bind_rows(risk_profile_1, risk_profile_2, risk_profile_3, risk_profile_4)


In [None]:
B_constant <- parameter_estimates |>
  filter(risk_factor == "patient_age") |>
  pull(beta) * 10 # constant equivalent to 10-year increase in age

points_system_full <- parameter_estimates |>
  select(risk_factor, beta) |>
  right_join(risk_profile, by = "risk_factor") |>
  group_by(risk_factor) |>
  mutate(
    beta_sum = beta * (reference - reference[which.max(flag_base_risk)]),
    points = round(beta_sum / B_constant, 0)
  )

points_system_factors <- points_system_full |>
  select(risk_factor, min_range, max_range, points)

min_points <- points_system_full |>
  group_by(risk_factor) |>
  summarise(min_points = min(points)) |>
  summarise(total_min_points = sum(min_points)) |>
  pull(total_min_points)

max_points <- points_system_full |>
  group_by(risk_factor) |>
  summarise(max_points = max(points)) |>
  summarise(total_max_points = sum(max_points)) |>
  pull(total_max_points)

risk_function <- function(points) {
  intercept <- parameter_estimates |>
    filter(risk_factor == "Intercept") |>
    pull(beta)

  beta_base <- points_system_full |>
    filter(flag_base_risk == TRUE) |>
    ungroup() |>
    summarise(
      beta_base = sum(beta * reference)
    ) |>
    pull(beta_base)
  
  1 / (1 + exp(-(intercept + beta_base + B_constant * points)))
}

points_system_risks <- tibble(
  points_total = seq(min_points, max_points),
  risk_estimate = risk_function(points_total)
)

points_system_factors |> knitr::kable()


  risk_factor       min_range   max_range   points
  --------------- ----------- ----------- --------
  patient_age              18          30        0
  patient_age              30          40        1
  patient_age              40          50        2
  patient_age              50          60        3
  patient_age              60          70        4
  patient_age              70          80        5
  patient_age              80          90        6
  patient_age              90         101        7
  baseline_PLT            150         Inf        0
  baseline_PLT             75         150        8
  baseline_PLT             50          75       10
  baseline_PLT             25          50       11
  baseline_PLT           -Inf          25       12
  LZD_duration           -Inf           7        0
  LZD_duration              7          14        2
  LZD_duration             14         Inf        5
  invasive_CRRT             0           0        0
  invasive_CRRT             1           1        6


    points_total   risk_estimate
  -------------- ---------------
               0       0.0500605
               1       0.0609661
               2       0.0740624
               3       0.0897031
               4       0.1082606
               5       0.1301086
               6       0.1555964
               7       0.1850152
               8       0.2185565
               9       0.2562666
              10       0.2980019
              11       0.3433959
              12       0.3918449
              13       0.4425226
              14       0.4944260
              15       0.5464499
              16       0.5974786
              17       0.6464804
              18       0.6925861
              19       0.7351422
              20       0.7737319
              21       0.8081662
              22       0.8384543
              23       0.8647605
              24       0.8873585
              25       0.9065883
              26       0.9228209
              27       0.9364304
              28       0.9477759
              29       0.9571890
              30       0.9649682
