# Event log creation

In [None]:
suppressPackageStartupMessages(library(tidyverse))
library(lubridate)
library(glue)
suppressPackageStartupMessages(library(bupaR))
library(processanimateR)
library(ggplot2)
library(ggfortify)
# # Debian / Ubuntu sudo apt-get install libv8-dev using a terminal window first before gtsummary
library(gtsummary)
library(networkD3)
suppressPackageStartupMessages(library(flextable))

In [None]:
grouping_number = 1

In [None]:
cohort_iuc_df <- readRDS(glue::glue("data/grouping{grouping_number}_iuc_combo_df.rds"))
cohort_ac_df <- readRDS(glue::glue("data/grouping{grouping_number}_ac_combo_df.rds"))

In [None]:
event_iuc_df <- cohort_iuc_df %>%
  # Exclude children
  filter(!is.na(age) & age >= 18 & age <= 107) %>%
  rename(
    complete = end
  ) %>%
  mutate(
    activity = as.factor(activity),
    resource_id = unique_id,
  ) %>%
  activitylog(
    case_id = "activity_instance_id",
    activity_id = "activity",
    resource_id = "resource_id",
    timestamps = c("start", "complete")
  ) %>% 
  # Process Maps requires the data to be in eventlog format it appears
  to_eventlog()

In [None]:
event_ac_df <- cohort_ac_df %>% 
  # Exclude children
  filter(!is.na(age) & age >= 18) %>%
  rename(
    complete = end
  ) %>%
  mutate(
    activity = as.factor(activity),
    resource_id = unique_id,
  ) %>%
  activitylog(
    case_id = "activity_instance_id",
    activity_id = "activity",
    resource_id = "resource_id",
    timestamps = c("start", "complete")
  ) %>% 
  to_eventlog()

In [None]:
combo_df <- bind_rows(event_ac_df, event_iuc_df)

In [None]:
combo_df <- combo_df %>% group_by(person_id) %>% 
    mutate(sex = first(sex), age = first(age)) %>% # Fix for missing sex in one or more fields
    ungroup()

In [None]:
saveRDS(combo_df, glue::glue("data/grouping{grouping_number}_event_log_combo_df.rds"))
#combo_df <- readRDS(glue::glue("data/grouping{grouping_number}_event_log_combo_df.rds"))

In [None]:
combo_df %>% glimpse()

In [None]:
ooh <- function(timestamps) {
  a <- map_chr(timestamps, function(x) {
    if(lubridate::wday(x,week_start = 1) > 5 | !between(lubridate::hour(x), 8, 17)) {
      return('Out-of-hours')
    } else {
      return('In-hours')
    }
  })
  
  return(a)
}

In [None]:
summary_df <- combo_df %>% 
    filter(lifecycle_id == "complete") %>% #head(n=1000) %>% 
    arrange(activity_instance_id_by_bupar) %>%
    mutate(
        theday = wday(timestamp, label=T, week_start = 1),   
        ooh = ooh(timestamp),
        imd_rank = bit64::as.integer.integer64(imd_rank),
        imd_decile = as.factor(bit64::as.integer.integer64(imd_decile)),
    ) %>%
    group_by(activity_instance_id) %>%
      arrange(activity_instance_id_by_bupar) %>%
      mutate(
        current_activity = activity,
        next_activity = lead(activity),
      ) %>% 
      summarise(
        person_id = bit64::as.integer.integer64(first(person_id)),
          # Note typo: symtom_group_desc
        across(c(ooh, age, sex, imd_rank, imd_decile, final_dx_code, symtom_group_desc, symptom_discriminator_desc, gp_surgery_id), first),
        ethnicity = str_trunc(str_replace(first(ethnicity_source_value), " - England and Wales ethnic category 2011 census", ""),80, ellipsis = ""),

        first_current = first(current_activity),
        first_next = first(next_activity),
        first_next = if_else(is.na(first_next), 'No further healthcare contact', first_next),
        hosp = case_when(
         any(activity == "ED") ~ first(hosp[activity == "ED"]),
         TRUE ~ NA_character_
        ),
          
        time_to_ED_non_avoid = case_when(
            any(activity == 'ED' & !is.na(avoidable_admission) & avoidable_admission == 0) ~ 
                as.integer(
                    difftime(
                        first(timestamp[activity == "ED" & !is.na(avoidable_admission) & avoidable_admission == 0]), 
                        first(timestamp[activity %in% c("AC_INDEX", "IUC_INDEX")]), unit = "hours")
                ),
                TRUE ~ NA_integer_
        ),
                                                                                                            
        time_to_ED = case_when(
            any(activity == "ED") ~ 
                as.integer(
                    difftime(
                        first(timestamp[activity == "ED"]), 
                        first(timestamp[activity %in% c("AC_INDEX", "IUC_INDEX")]), unit = "hours")
                ),
                TRUE ~ NA_integer_
        ),
                                                                                                            
        num_contacts_to_ED_non_avoid_attend = case_when(
          any(activity == 'ED' & !is.na(avoidable_admission) & avoidable_admission == 0) ~ first(row_number()[activity == 'ED' & !is.na(avoidable_admission) & avoidable_admission == 0]-1),
          TRUE ~ NA_integer_
        ),
        
        num_GP_contacts_to_ED_attend = case_when(
          any(activity == 'ED') ~ sum(row_number()[activity == 'GP'] < first(row_number()[activity == "ED"])),
          TRUE ~ NA_integer_
        ),
        num_GP_contacts_to_ED_non_avoid_attend = case_when(
          any(activity == 'ED' & !is.na(avoidable_admission) & avoidable_admission == 0) ~ sum(row_number()[activity == 'GP'] < first(row_number()[activity == 'ED' & !is.na(avoidable_admission) & avoidable_admission == 0]), na.rm = T),
          TRUE ~ NA_integer_
        ),
        non_avoid_adn = sum(avoidable_admission == 0, na.rm = T),
        sum_contacts_CAD = sum(activity == "999", na.rm = T),
        sum_contacts_AC = sum(activity == "AC", na.rm = T),
        sum_contacts_ED = sum(activity == "ED", na.rm = T),
        sum_contacts_GP = sum(activity == "GP", na.rm = T),
        sum_contacts_IP = sum(activity == "IP", na.rm = T),
        sum_contacts_IUC = sum(activity == "IUC", na.rm = T),
        one_or_more_CAD = if_else(sum_contacts_CAD > 0, 1, 0),
        one_or_more_AC = if_else(sum_contacts_AC > 0, 1, 0),
        one_or_more_ED = if_else(sum_contacts_ED > 0, 1, 0),
        one_or_more_GP = if_else(sum_contacts_GP > 0, 1, 0),
        one_or_more_IP = if_else(sum_contacts_IP > 0, 1, 0),
        one_or_more_IUC = if_else(sum_contacts_IUC > 0, 1, 0),
      ) %>% ungroup() 

In [None]:
#summary_df %>% filter(!is.na(time_to_ED) & time_to_ED != time_to_ED_non_avoid) %>% glimpse()

In [None]:
summary_df1 <- summary_df %>%
    mutate(
      strata = if_else(grepl("ABN", activity_instance_id), "ABN", "IUC")   
    ) %>%
    select(
        person_id, 
        strata, 
        ooh, 
        age, 
        sex, 
        imd_decile, 
        ethnicity, 
        non_avoid_adn, 
        hosp, 
        gp_surgery_id, 
        first_next, 
        starts_with("num_"),
        starts_with("one_or_more"),
        starts_with("sum_contacts"),
        starts_with("time_to_ED")
    )                                                        

In [None]:
summary_df1 %>% glimpse()

In [None]:
summary_df1 %>% saveRDS(glue::glue("output/grouping{grouping_number}_summary_df.rds"))

# Sankey

In [None]:
sankey_df_fn <- function(df) {
    sankey_df1 <- df %>% 
      filter(lifecycle_id == "complete") %>%
      group_by(activity_instance_id) %>%
      arrange(activity_instance_id_by_bupar) %>%
      transmute(
        activity_instance_id_by_bupar,
        # Rename index cases to avoid removal later on
        source = case_when(
            activity == "AC_INDEX" ~ "ACINDEX",
            activity == "IUC_INDEX" ~ "IUCINDEX",
            TRUE ~ activity
        ),
        target = lead(activity)
      ) %>% 
      ungroup() %>%
      mutate(
        target = if_else(is.na(target), 'End', target)
      )
    
    sankey_df2 <- sankey_df1 %>%
      mutate(
        source_num = 1,
        target_num = 1
      ) %>%
      arrange(activity_instance_id, activity_instance_id_by_bupar) %>%
      group_by(activity_instance_id) %>%
      mutate(
        source_num = case_when(
          row_number() == 1 ~ 1,
          TRUE ~ as.numeric(row_number()) - 1
        ),
        target_num = case_when(
          row_number() == 1 ~ 1,
          TRUE ~ as.numeric(row_number())
        )
      ) %>% ungroup() 
    
    
    sankey_df3 <- sankey_df2 %>%
      transmute(
        source = paste(source, source_num, sep="_"),
        target = paste(target, target_num, sep="_")
      ) %>%
      group_by(source, target) %>%
      summarise(
        counts2 = n()
      ) %>% ungroup()
    
    sankey_df4 <- sankey_df3 %>%
      rename(
        value = counts2
      ) %>%
      separate(source, c('rawsource', 'iteration'), sep="_", remove = F) %>%
      filter(as.numeric(iteration) < 5) %>%
      select(-rawsource, -iteration)

    a <- sankey_df4 %>% distinct(source) 
    b <- sankey_df4 %>% distinct(target) %>% rename(source = target)
    c <- distinct(bind_rows(a, b)) %>% mutate(id = row_number()-1) 
    
    sankey_df5 <- sankey_df4 %>%
      left_join(c, by=c("source"="source")) %>%
      select(-source) %>%
      rename(
        source = id
      ) %>%
      left_join(c, by=c("target" = "source")) %>%
      select(-target) %>%
      rename(
        target = id
      ) %>% as.data.frame()

    nodes <- c %>%
      rename(
        name = source
      ) %>% as.data.frame()
    
    return(list(sankey_df5, nodes))
    
    
}

In [None]:
sankey_ac_list <- sankey_df_fn(event_ac_df)

In [None]:
links <- sankey_ac_list[[1]]
nodes <- sankey_ac_list[[2]]

p <- sankeyNetwork(
    Links = links,
    Nodes = nodes,
    Source = 'source',
    Target = 'target',
    Value = 'value',
    NodeID = 'name', 
    fontSize = 12, 
    nodeWidth = 30, 
    units = 'n', 
    sinksRight = F
)

saveRDS(p, glue::glue("output/grouping{grouping_number}_AC_sankey.rds"))

p

## Figure above: Abandoned Calls sankey

In [None]:
sankey_iuc_list <- sankey_df_fn(event_iuc_df)
links <- sankey_iuc_list[[1]]
nodes <- sankey_iuc_list[[2]]

r <- sankeyNetwork(
    Links = links,
    Nodes = nodes,
    Source = 'source',
    Target = 'target',
    Value = 'value',
    NodeID = 'name', 
    fontSize = 12, 
    nodeWidth = 30, 
    units = 'n', 
    sinksRight = F
)

saveRDS(r, glue::glue("output/grouping{grouping_number}_IUC_sankey.rds"))

r

In [None]:
iuc_sankey <- readRDS(glue::glue("output/grouping{grouping_number}_IUC_sankey.rds"))

In [None]:
iuc_sankey

In [None]:
saveNetwork(iuc_sankey, glue::glue("output/grouping{grouping_number}_IUC_sankey.html"), selfcontained = TRUE)

In [None]:
ac_sankey <- readRDS(glue::glue("output/grouping{grouping_number}_AC_sankey.rds"))

In [None]:
ac_sankey

In [None]:
saveNetwork(ac_sankey, glue::glue("output/grouping{grouping_number}_AC_sankey.html"), selfcontained = TRUE)