# Make All Tables for Dissertation

Kendra Wyant  
January 3, 2025

In [None]:

suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(source("https://github.com/jjcurtin/lab_support/blob/main/format_path.R?raw=true"))
suppressPackageStartupMessages(library(tidyposterior))
library(kableExtra)



Attaching package: 'kableExtra'

The following object is masked from 'package:dplyr':

    group_rows

# Chapter 2: EMA

## Data and calculations

In [None]:
disposition <- read_csv(file.path(path_processed_ema, "disposition.csv"), 
                        col_types = "ccDDcccccccccc")

screen <- read_csv(file.path(path_shared, "screen.csv"), 
                   col_types = cols()) |>
  filter(subid %in% subset(disposition, analysis == "yes")$subid) |> 
  mutate(across(dsm5_1:dsm5_11, ~ recode(., "No" = 0, "Yes" = 1))) |>  
  rowwise() |>  
  mutate(dsm5_total = sum(c(dsm5_1, dsm5_2, dsm5_3, dsm5_4, dsm5_5, dsm5_6, dsm5_7, 
                              dsm5_8, dsm5_9, dsm5_10, dsm5_11))) |>  
  ungroup()

lapses <- read_csv(file.path(path_shared, "lapses.csv"), col_types = cols()) |>
  filter(exclude == FALSE)

# Calcs to make df for table 1 (demographics and clinical characteristics)
n_total <- 151

dem_age <- screen |>
  summarise(mean = as.character(round(mean(dem_1, na.rm = TRUE), 1)),
            SD = as.character(round(sd(dem_1, na.rm = TRUE), 1)),
            min = as.character(min(dem_1, na.rm = TRUE)),
            max = as.character(max(dem_1, na.rm = TRUE))) |>
  mutate(var = "Age",
         n = as.numeric(""),
         perc = as.numeric("")) |>
  select(var, n, perc, everything()) 

dem_sex <-  screen |>
  select(var = dem_2) |>
  group_by(var) |>
  summarise(n = n()) |>
  mutate(perc = (n / sum(n)) * 100) |> 
  add_row(var = "Sex", .before = 1)

dem_race <- screen |>
  select(var = dem_3) |>
  mutate(var = fct_relevel(factor(var,
                         c("American Indian/Alaska Native", "Asian", "Black/African American",
                           "White/Caucasian", "Other/Multiracial")))) |>
  group_by(var) |>
  summarise(n = n()) |>
  mutate(perc = (n / sum(n)) * 100) |>
  add_row(var = "Race", .before = 1)
  
  
dem_ethnicity <- screen |>
  select(var = dem_4) |>
  mutate(var = case_when(var == "No, I am not of Hispanic, Latino, or Spanish origin" ~ "No",
                         TRUE ~ "Yes"),
         var = fct_relevel(factor(var, c("Yes", "No")))) |>
  group_by(var) |>
  summarise(n = n()) |>
  mutate(perc = (n / sum(n)) * 100) |>
  add_row(var = "Hispanic, Latino, or Spanish origin", .before = 1)

dem_education <- screen |>
  select(var = dem_5) |>
  mutate(var = fct_relevel(factor(var,
                         c("Less than high school or GED degree", "High school or GED",
                           "Some college", "2-Year degree", "College degree", "Advanced degree")))) |>
  group_by(var) |>
  summarise(n = n()) |>
  mutate(perc = (n / sum(n)) * 100) |>
  add_row(var = "Education", .before = 1)

dem_employment <- screen |>
  select(var = dem_6, dem_6_1) |>
  mutate(var = case_when(dem_6_1 == "Full-time" ~ "Employed full-time",
                         dem_6_1 == "Part-time" ~ "Employed part-time",
                         TRUE ~ var)) |>
  mutate(var = fct_relevel(factor(var,
                         c("Employed full-time", "Employed part-time", "Full-time student",
                           "Homemaker", "Disabled", "Retired", "Unemployed",
                           "Temporarily laid off, sick leave, or maternity leave",
                           "Other, not otherwise specified")))) |>
  group_by(var) |>
  summarise(n = n()) |>
  mutate(perc = (n / sum(n)) * 100) |>
  add_row(var = "Employment", .before = 1)

dem_income <- screen |>
  summarise(mean = format(round(mean(dem_7, na.rm = TRUE), 0), big.mark = ","),
            SD = format(round(sd(dem_7, na.rm = TRUE), 0), big.mark = ","),
            min =format(round(min(dem_7, na.rm = TRUE), 0), big.mark = ","),
            max = format(round(max(dem_7, na.rm = TRUE), 0), scientific = FALSE, big.mark = ",")) |>
  mutate(var = "Personal Income",
        n = as.numeric(""),
        perc = as.numeric(""),
        mean = str_c("$", as.character(mean)),
        SD = str_c("$", as.character(SD)),
        min = str_c("$", as.character(min)),
        max = as.character(max)) |>
  select(var, n, perc, everything())

dem_marital <- screen |>
  select(var = dem_8) |>
  mutate(var = case_when(var == "Never Married" ~ "Never married",
                         TRUE ~ var)) |>
  mutate(var = fct_relevel(factor(var,
                         c("Never married", "Married", "Divorced", "Separated",
                           "Widowed")))) |>
  group_by(var) |>
  summarise(n = n()) |>
  mutate(perc = (n / sum(n)) * 100) |> 
  add_row(var = "Marital Status", .before = 1)

dem_aud <- screen |>
  summarise(mean = as.character(round(mean(dsm5_total, na.rm = TRUE), 1)),
            SD = as.character(round(sd(dsm5_total, na.rm = TRUE), 1)),
            min = as.character(min(dsm5_total, na.rm = TRUE)),
            max = as.character(max(dsm5_total, na.rm = TRUE))) |>
  mutate(var = "DSM-5 AUD Symptom Count",
         n = as.numeric(""),
         perc = as.numeric("")) |>
  select(var, n, perc, everything()) 

lapses_per_subid <- screen |>
  select(subid) |>
  left_join(lapses |>
  janitor::tabyl(subid) |>
  select(-percent), by = "subid") |>
  mutate(n = if_else(is.na(n), 0, n),
         lapse = if_else(n > 0, "yes", "no"))

lapse_info <- lapses_per_subid |>
  group_by(lapse) |>
  rename(var = lapse) |>
  mutate(var = factor(var, levels = c("yes", "no"), labels = c("Yes", "No"))) |>
  summarise(n = n()) |>
  mutate(perc = (n / n_total) * 100,
         mean = NA_character_,
         SD = NA_character_,
         min = NA_character_,
         max = NA_character_) |>
  full_join(lapses_per_subid |>
  summarise(mean = as.character(round(mean(n), 1)),
            SD = as.character(round(sd(n), 1)),
            min = as.character(round(min(n), 1)),
            max = as.character(round(max(n), 1))) |>
  mutate(var = "Number of reported lapses"),
  by = c("var", "mean", "SD", "min", "max")) |> 
  add_row(var = "Reported 1 or More Lapse During Study Period", .before = 1)

table_dem <- dem_age |> 
  bind_rows(dem_sex) |> 
  bind_rows(dem_race) |> 
  bind_rows(dem_ethnicity) |> 
  bind_rows(dem_education) |> 
  bind_rows(dem_employment) |> 
  bind_rows(dem_income) |> 
  bind_rows(dem_marital) |> 
  bind_rows(dem_aud) |> 
  bind_rows(lapse_info) |> 
  mutate(range = str_c(min, "-", max),
         perc = round(perc, 1)) |> 
  select(-c(min, max)) |> 
  rename(N = n,
         `%` = perc,
         M = mean, 
         Range = range)


In [None]:
# metrics
metrics_week <- read_csv(file.path(path_models_ema, "test_metrics_1week_0_v5_nested.csv"),
                         show_col_types = FALSE)
metrics_day <- read_csv(file.path(path_models_ema, "test_metrics_1day_0_v5_nested.csv"),
                        show_col_types = FALSE)
metrics_hour <- read_csv(file.path(path_models_ema, "test_metrics_1hour_0_v5_nested.csv"),
                         show_col_types = FALSE)

metrics <- metrics_week |> 
  mutate(model = "Week") |> 
  bind_rows(metrics_day |> 
              mutate(model = "Day")) |> 
  bind_rows(metrics_hour |> 
              mutate(model = "Hour")) |> 
  group_by(.metric, model) |> 
  summarize(median = median(.estimate), .groups = "drop") |> 
  pivot_wider(names_from = model, values_from = median) |> 
  select(.metric, Week, Day, Hour)

metrics <- metrics[c(4,5,6, 1, 3, 2),]

table_perf <- metrics |> 
 mutate(.metric = case_when(.metric == "roc_auc" ~ "auROC",
                            .metric == "sens" ~ "sensitivity",
                            .metric == "spec" ~ "specificity",
                            .metric == "bal_accuracy" ~ "balanced accuracy",
                            .metric == "ppv" ~ "positive predictive value",
                            .metric == "npv" ~ "negative predictive value")) |> 
 rename(Metric = .metric)


### Table 1: Demographic and Lapse Characteristics

In [None]:

table_dem |> 
  knitr::kable()


  -------------------------------------------------------------------------------------------
  var                                            N     \% M          SD         Range
  ------------------------------------------ ----- ------ ---------- ---------- -------------
  Age                                                     41         11.9       21-72

  Sex                                                                           

  Female                                        74   49.0                       

  Male                                          77   51.0                       

  Race                                                                          

  American Indian/Alaska Native                  3    2.0                       

  Asian                                          2    1.3                       

  Black/African American                         8    5.3                       

  White/Caucasian                              131   86.8                       

  Other/Multiracial                              7    4.6                       

  Hispanic, Latino, or Spanish origin                                           

  Yes                                            4    2.6                       

  No                                           147   97.4                       

  Education                                                                     

  Less than high school or GED degree            1    0.7                       

  High school or GED                            14    9.3                       

  Some college                                  41   27.2                       

  2-Year degree                                 14    9.3                       

  College degree                                58   38.4                       

  Advanced degree                               23   15.2                       

  Employment                                                                    

  Employed full-time                            72   47.7                       

  Employed part-time                            26   17.2                       

  Full-time student                              7    4.6                       

  Homemaker                                      1    0.7                       

  Disabled                                       7    4.6                       

  Retired                                        8    5.3                       

  Unemployed                                    18   11.9                       

  Temporarily laid off, sick leave, or           3    2.0                       
  maternity leave                                                               

  Other, not otherwise specified                 9    6.0                       

  Personal Income                                         \$34,298   \$31,807   \$0-200,000

  Marital Status                                                                

  Never married                                 67   44.4                       

  Married                                       32   21.2                       

  Divorced                                      45   29.8                       

  Separated                                      5    3.3                       

  Widowed                                        2    1.3                       

  DSM-5 AUD Symptom Count                                 8.9        1.9        4-11

  Reported 1 or More Lapse During Study                                         
  Period                                                                        

  Yes                                           84   55.6                       

  No                                            67   44.4                       

  Number of reported lapses                               6.8        12         0-75
  -------------------------------------------------------------------------------------------


### Table 2: Performance Metrics

In [None]:

table_perf |> 
  knitr::kable()


  Metric                             Week         Day        Hour
  --------------------------- ----------- ----------- -----------
  auROC                         0.8906310   0.8989930   0.9293432
  sensitivity                   0.8232301   0.8275224   0.8636420
  specificity                   0.8192581   0.8454889   0.8813554
  balanced accuracy             0.8275871   0.8345976   0.8536644
  positive predictive value     0.6301088   0.2997223   0.0254053
  negative predictive value     0.9435759   0.9877148   0.9994361


# Chapter 3: Lag

Table 1

In [None]:
ci_baseline <- read_csv(here::here(path_models_lag, "ci_baseline.csv"), 
                        col_types = cols())

ci_lag <- read_csv(here::here(path_models_lag, "ci_lag.csv"), col_types = cols())

table_ci <- ci_baseline |> 
  mutate(ci = str_c("[", round(lower, 3), ", ", round(upper, 3), "]"),
         median = as.character(round(median, 3)),
         probability = as.character(round(probability, 3))) |> 
  select(contrast, median, ci, probability) |> 
  add_row(contrast = "Baseline Contrasts", median = "", ci = "", probability = "") |> 
  mutate(contrast = factor(contrast,
                           levels = c("Baseline Contrasts",
                                      "0 vs. 24",
                                      "0 vs. 72",
                                      "0 vs. 168",
                                      "0 vs. 336"))) |> 
  arrange(contrast) |> 
  rbind(ci_lag |> 
          mutate(ci = str_c("[", round(lower, 3), ", ", round(upper, 3), "]"),
                  median = as.character(round(median, 3)),
         probability = as.character(round(probability, 3))) |>
          select(contrast, median, ci, probability) |> 
          add_row(contrast = "Adjacent Contrasts", median = "", ci = "", 
                  probability = "") |> 
          mutate(contrast = factor(contrast,
                           levels = c("Adjacent Contrasts",
                                      "24 vs. 72",
                                      "72 vs. 168",
                                      "168 vs. 336"))) |> 
          arrange(contrast)) |> 
  rename(Contrast = contrast,
         Median = median,
         `Bayesian CI` = ci,
         Probability = probability)


### Table 1: Model Comparisons

In [None]:

table_ci |> 
  knitr::kable()


  Contrast             Median   Bayesian CI         Probability
  -------------------- -------- ------------------- -------------
  Baseline Contrasts                                
  0 vs. 24             0.006    \[0, 0.012\]        0.956
  0 vs. 72             0.018    \[0.012, 0.025\]    1
  0 vs. 168            0.023    \[0.016, 0.029\]    1
  0 vs. 336            0.041    \[0.033, 0.05\]     1
  Adjacent Contrasts                                
  24 vs. 72            0.012    \[0.006, 0.019\]    0.999
  72 vs. 168           0.004    \[-0.002, 0.011\]   0.862
  168 vs. 336          0.018    \[0.011, 0.026\]    1
