# Survival cohort preparation

In [None]:
grouping_number = 3 # This notebook will need to be run 3 times, adjusting the grouping to 1, 2 or 3

follow_up_time = 72

follow_up_time_str = ""
if(follow_up_time != 72) {
 follow_up_time_str <- glue::glue("_{follow_up_time}")   
}

In [None]:
suppressPackageStartupMessages(library(tidyverse))
library(lubridate)
library(glue)
suppressPackageStartupMessages(library(bupaR))
# library(processanimateR)
library(survival)
library(ggplot2)
library(ggfortify)

In [None]:
ab_df <- readRDS(glue::glue("data/final_abandoned_call_nest_df{follow_up_time_str}.rds")) %>% filter(start >= ymd_hms('2022-01-01 00:00:00'))
ab_df %>% glimpse()

In [None]:
if(grouping_number == 1) {
    print('Grouping 1: Abandoned calls with previous 111 triage')
    ab_dfa <- ab_df %>%
        filter(contacted_iuc_b4_ac == 1)
} else if(grouping_number == 2) {
    print('Grouping 2: Abandoned calls with no previous 111 triage')
    ab_dfa <- ab_df %>%
    filter(contacted_iuc_b4_ac == 0)
} else if(grouping_number == 3) {
     print('Grouping 3: All abandoned calls irrespective of previous 111 triage')   
    ab_dfa <- ab_df
}

In [None]:
ab_dfa %>% count() # Grouping 1: 1866 Grouping 2: 28713 Grouping 3: 30579

# Unnest files

In [None]:
# Need to provide an instance ID for each grouping of times e.g. GP start and end time
# Cannot use index_rn for this as there may be multiple GP/ED/EPR records in a 72 hours period

process_nest <- function(df, nest_df_name, nest_df_name_count, activity) {
    
    if(activity == 'IUC') {
        df %>%
            filter(!! rlang::sym(nest_df_name_count) > 0) %>%
            select(unique_id, !! rlang::sym(nest_df_name)) %>%
            rename(activity_instance_id = unique_id) %>%
            unnest(!! rlang::sym(nest_df_name)) %>%
            transmute(
                activity_instance_id,
                person_id,
                unique_id,
                start,
                end,
                age,
                sex,
                imd_rank,
                imd_decile,
                ethnicity_source_value,
                final_dx_code,
                symtom_group_desc,
                symptom_discriminator_desc,
                avoidable_admission = NA,
                hosp = NA,
                activity = !! activity,
                gp_practice_id = gp_surgery_id
            ) %>% distinct(activity_instance_id, start, end, .keep_all = T)
    } else if (activity == 'ED') {
        df %>%
            filter(!! rlang::sym(nest_df_name_count) > 0) %>%
            select(unique_id, !! rlang::sym(nest_df_name)) %>%
            rename(activity_instance_id = unique_id) %>%
            unnest(!! rlang::sym(nest_df_name)) %>%
            transmute(
                activity_instance_id,
                person_id,
                unique_id,
                start,
                end,
                age = ed_age,
                sex = ed_sex,
                avoidable_admission,
                final_dx_code = NA,
                symtom_group_desc = NA,
                symptom_discriminator_desc = NA,
                hosp,
                activity = !! activity,
                imd_rank,
                imd_decile,
                ethnicity_source_value,
                gp_practice_id = NA
            ) %>% distinct(activity_instance_id, start, end, .keep_all = T)
    } else if (activity == 'IP') {
        df %>%
            filter(!! rlang::sym(nest_df_name_count) > 0) %>%
            select(unique_id, !! rlang::sym(nest_df_name)) %>%
            rename(activity_instance_id = unique_id) %>%
            unnest(!! rlang::sym(nest_df_name)) %>%
            transmute(
                activity_instance_id,
                person_id,
                unique_id,
                start,
                end,
                age = hosp_age,
                sex = hosp_sex,
                avoidable_admission = NA,
                final_dx_code = NA,
                symtom_group_desc = NA,
                symptom_discriminator_desc = NA,
                hosp,
                activity = !! activity,
                imd_rank,
                imd_decile,
                ethnicity_source_value,
                gp_practice_id = NA
            ) %>% distinct(activity_instance_id, start, end, .keep_all = T)
    } else if (activity == 'GP') {
        df %>%
            filter(!! rlang::sym(nest_df_name_count) > 0) %>%
            select(unique_id, !! rlang::sym(nest_df_name)) %>%
            rename(activity_instance_id = unique_id) %>%
            unnest(!! rlang::sym(nest_df_name)) %>%
            transmute(
                activity_instance_id,
                person_id,
                unique_id,
                start,
                end,
                age = age,
                sex = sex,
                avoidable_admission = NA,
                final_dx_code = NA,
                symtom_group_desc = NA,
                symptom_discriminator_desc = NA,
                hosp = NA,
                activity = !! activity,
                imd_rank,
                imd_decile,
                ethnicity_source_value,
                # Won't interfere with IUC data which includes gp_surgery_id
                # already allocated to IUC index cases
                gp_practice_id = gp_surgery_id
            ) %>% distinct(activity_instance_id, start, end, .keep_all = T)
    } else {
        df1 <- df %>%
            filter(!! rlang::sym(nest_df_name_count) > 0)
        #print(nrow(df1))
        if(nrow(df1) > 0) {
            df1 %>%
            select(unique_id, !! rlang::sym(nest_df_name)) %>%
            rename(activity_instance_id = unique_id) %>%
            unnest(!! rlang::sym(nest_df_name)) %>% #glimpse()
            transmute(
                activity_instance_id,
                person_id,
                unique_id,
                start,
                end,
                age,
                sex,
                imd_rank,
                imd_decile,
                ethnicity_source_value,
                avoidable_admission = NA,
                final_dx_code = NA,
                symtom_group_desc = NA,
                symptom_discriminator_desc = NA,
                activity = !! activity,
                hosp = NA,
                gp_practice_id = NA
                # There are some duplications of In-patient and ED cases but they all have the same start and end date times.
            ) %>% distinct(activity_instance_id, start, end, .keep_all = T)
        }

    }

  
}

In [None]:
gp_df <- process_nest(ab_dfa, glue::glue("gp_nest_visit_{follow_up_time}_df"), glue::glue("gp_nest_calls_in_{follow_up_time}_hrs"), "GP")

In [None]:
gp_df %>% glimpse()

In [None]:
gp_df %>% count() # Grouping 1: 771 Grouping 2: 3571 Grouping 3: 4342

In [None]:
iuc_df <- process_nest(ab_dfa, glue::glue("iuc_nest_visit_{follow_up_time}_df"), glue::glue("iuc_nest_calls_in_{follow_up_time}_hrs"), "IUC")

In [None]:
iuc_df %>% glimpse()

In [None]:
iuc_df %>% count() # Grouping 1: 924 Grouping 2: 3952 Grouping 3: 4876

In [None]:
cad_df <- process_nest(ab_dfa, glue::glue("cad_nest_visit_{follow_up_time}_df"), glue::glue("cad_nest_calls_in_{follow_up_time}_hrs"), "999")

In [None]:
cad_df %>% glimpse()

In [None]:
cad_df %>% count() # Grouping 1: 166 Grouping 2: 733 Grouping 3: 899

In [None]:
hosp_ed_df <- process_nest(ab_dfa, glue::glue("hosp_ed_nest_visit_{follow_up_time}_df"), glue::glue("hosp_ed_nest_calls_in_{follow_up_time}_hrs"), "ED")

In [None]:
hosp_ed_df %>% glimpse()

In [None]:
hosp_ed_df %>% count() # Grouping 1: 360 Grouping 2: 2678 Grouping 3: 3112

In [None]:
hosp_ip_df <- process_nest(ab_dfa, glue::glue("hosp_ip_nest_visit_{follow_up_time}_df"), glue::glue("hosp_ip_nest_calls_in_{follow_up_time}_hrs"), "IP")

In [None]:
hosp_ip_df %>% glimpse()

In [None]:
hosp_ip_df %>% count() # Cohort 1: 146 Cohort 2: 911 Cohort 3: 1090

In [None]:
ac_df <- process_nest(ab_dfa, glue::glue("ac_nest_visit_{follow_up_time}_df"), glue::glue("ac_nest_calls_in_{follow_up_time}_hrs"), "AC")

In [None]:
ac_df %>% glimpse()

In [None]:
ac_df %>% count() # Grouping 1: 204 Grouping 2: 958 Grouping 3: 1162

In [None]:
ooh <- function(timestamps) {
  a <- map_chr(timestamps, function(x) {
    if(lubridate::wday(x,week_start = 1) > 5 | !between(lubridate::hour(x), 8, 17)) {
      return('Out-of-hours')
    } else {
      return('In-hours')
    }
  })
  
  return(a)
}

In [None]:
# Tidy up main ab_df dataframe before joining the datasets together
ab_df1 <- ab_dfa %>%
    transmute(
        unique_id,
        person_id,
        ethnicity_source_value,
        age,
        sex,
        imd_rank,
        imd_decile,
        activity_instance_id = unique_id,
        start,
        end,
        ooh = ooh(start),
        avoidable_admission = NA,
        final_dx_code = NA,
        symtom_group_desc = NA,
        symptom_discriminator_desc = NA,
        activity = 'AC_INDEX',
        hosp = NA_character_,
        gp_surgery_id = NA
        
    ) %>% distinct(activity_instance_id, start, end, .keep_all = T)

In [None]:
ab_df1 %>% glimpse()

In [None]:
df <- bind_rows(ab_df1, gp_df, iuc_df, cad_df, hosp_ed_df, hosp_ip_df, ac_df) %>%
    arrange(start)

In [None]:
df %>% glimpse() # Grouping 1: 4436 Grouping 2: 41516 Grouping 3: 46060

In [None]:
df1 <- df %>% 
    group_by(person_id) %>% 
    mutate(
        gp_surgery_id = if_else(any(is.na(gp_surgery_id) & !is.na(gp_practice_id)), first(gp_practice_id[!is.na(gp_practice_id)]), first(gp_surgery_id))
    ) %>%
    ungroup()

In [None]:
df1 %>% count(is.na(gp_surgery_id)) 

In [None]:
saveRDS(df1, glue::glue("data/grouping{grouping_number}_ac_combo_df{follow_up_time_str}.rds"))

In [None]:
# df1 <- readRDS(glue::glue("data/grouping{grouping_number}_ac_combo_df.rds"))
# df1 %>% filter(activity_instance_id	 == "ABN_10030")

# 111 data

In [None]:
grouping_iuc_df <- readRDS(glue::glue("data/grouping{grouping_number}_iuc_nest_df{follow_up_time_str}.rds"))

In [None]:
grouping_iuc_df %>% glimpse()

In [None]:
grouping_iuc_df1 <- grouping_iuc_df %>%
    transmute(
        unique_id,
        person_id,
        age,
        sex,
        activity_instance_id = unique_id,
        start,
        end,
        ooh = ooh(start),
        avoidable_admission = NA,
        final_dx_code,
        symtom_group_desc,
        symptom_discriminator_desc,
        activity = 'IUC_INDEX',
        hosp = NA,
        ethnicity_source_value,
        imd_rank,
        imd_decile,
        gp_practice_id = gp_surgery_id
    ) %>% distinct()

In [None]:
iuc_gp_df <- process_nest(grouping_iuc_df, glue::glue("gp_nest_visit_{follow_up_time}_df"), glue::glue("gp_nest_calls_in_{follow_up_time}_hrs"), "GP")

In [None]:
iuc_gp_df %>% count() # Group 1: 57671 GRoup 2: 56810 GRoup 3: 56285

In [None]:
iuc_iuc_df <- process_nest(grouping_iuc_df, glue::glue("iuc_nest_visit_{follow_up_time}_df"), glue::glue("iuc_nest_calls_in_{follow_up_time}_hrs"), "IUC")

In [None]:
iuc_iuc_df %>% count() # Grouping 1: 25110 Grouping 2: 25166 grouping 3: 24409

In [None]:
iuc_cad_df <- process_nest(grouping_iuc_df, glue::glue("cad_nest_visit_{follow_up_time}_df"), glue::glue("cad_nest_calls_in_{follow_up_time}_hrs"), "999")

In [None]:
iuc_cad_df %>% count() # Grouping 1: 20495 Grouping 2 20212 Grouping 3: 20095

In [None]:
iuc_hosp_ed_df <- process_nest(grouping_iuc_df, glue::glue("hosp_ed_nest_visit_{follow_up_time}_df"), glue::glue("hosp_ed_nest_calls_in_{follow_up_time}_hrs"), "ED")

In [None]:
iuc_hosp_ed_df %>% count() # Grouping 1 40372 Grouping 2 39849 Grouping 3: 39438

In [None]:
iuc_hosp_ip_df <- process_nest(grouping_iuc_df, glue::glue("hosp_ip_nest_visit_{follow_up_time}_df"), glue::glue("hosp_ip_nest_calls_in_{follow_up_time}_hrs"), "IP")

In [None]:
iuc_hosp_ip_df %>% count() # Grouping 1 13194 Grouping 2 13068 Grouping 3: 12960

In [None]:
grouping_iuc_df2 <- bind_rows(grouping_iuc_df1, iuc_gp_df, iuc_iuc_df, iuc_cad_df, iuc_hosp_ed_df, iuc_hosp_ip_df) %>%
    arrange(start)

In [None]:
grouping_iuc_df2 %>% glimpse()

In [None]:
saveRDS(grouping_iuc_df2, glue::glue("data/grouping{grouping_number}_iuc_combo_df{follow_up_time_str}.rds"))

In [None]:
#grouping_iuc_df2 <- readRDS(glue::glue("data/grouping{grouping_number}_iuc_combo_df.rds"))

In [None]:
#grouping_iuc_df2 %>% filter(grepl("IUC", activity)) 

# Create Survival analysis dataset

## Abandoned calls

In [None]:
df2 <- df1 %>% group_by(activity_instance_id) %>% 
    arrange(start) %>%
    summarise(
        person_id = first(person_id),
        ethnicity_source_value = first(ethnicity_source_value),
        age = first(age),
        sex = first(sex),
        ooh = first(ooh),
        imd_rank = first(imd_rank),
        imd_decile = first(imd_decile),
        gp_surgery_id = if_else(any(!is.na(gp_surgery_id)), first(gp_surgery_id[!is.na(gp_surgery_id)]), NA),
        
        fu_time = case_when(
            n() == 1 ~ follow_up_time, # Only abandoned call in group, end time of call
            any(activity == "ED") ~ as.integer(difftime(first(start[activity == "ED"]), first(start), unit = "hours")), # ED attendance in dataset, so get first attendance
            TRUE ~ follow_up_time
        ),
        fu_time = if_else(fu_time > follow_up_time, follow_up_time, fu_time), # Truncate all values to follow_up_time
        fu_time_non_avoid = case_when(
            n() == 1 ~ follow_up_time, # Only abandoned call in group, end time of call
            any(activity == "ED" & !is.na(avoidable_admission) & avoidable_admission == 0) ~ as.integer(difftime(first(start[activity == "ED" & !is.na(avoidable_admission) & avoidable_admission == 0]), first(start), unit = "hours")), # ED attendance in dataset, so get first attendance
            TRUE ~ follow_up_time
        ),
        fu_time_non_avoid = if_else(fu_time_non_avoid > follow_up_time, follow_up_time, fu_time_non_avoid),
        
        num_contacts_to_ED_attend = case_when(
          any(activity == 'ED') ~ first(row_number()[activity == "ED"]-1),
          TRUE ~ NA_integer_
        ),
        num_contacts_to_ED_non_avoid_attend = case_when(
          any(activity == 'ED' & !is.na(avoidable_admission) & avoidable_admission == 0) ~ first(row_number()[activity == 'ED' & !is.na(avoidable_admission) & avoidable_admission == 0]-1),
          TRUE ~ NA_integer_
        ),
        
        num_GP_contacts_to_ED_attend = case_when(
          any(activity == 'ED') ~ sum(row_number()[activity == 'GP'] < first(row_number()[activity == "ED"])),
          TRUE ~ NA_integer_
        ),
        num_GP_contacts_to_ED_non_avoid_attend = case_when(
          any(activity == 'ED' & !is.na(avoidable_admission) & avoidable_admission == 0) ~ sum(row_number()[activity == 'GP'] < first(row_number()[activity == 'ED' & !is.na(avoidable_admission) & avoidable_admission == 0]), na.rm = T),
          TRUE ~ NA_integer_
        ),
        
        status = case_when(
            n() == 1 ~ 0,
            any(activity == "ED") ~ 1, 
            TRUE ~ 0
        ),
        status_non_avoid = case_when(
            n() == 1 ~ 0, # Censored
            any(activity == "ED" & !is.na(avoidable_admission) & avoidable_admission == 0) ~ 1,
            TRUE ~ 0
        ),
        hosp = case_when(
         any(activity == "ED") ~ first(hosp[activity == "ED"]),
         TRUE ~ NA_character_
        )
    ) %>% ungroup()

In [None]:
df2 %>% glimpse()

## 111 calls

In [None]:
grouping_iuc_df3 <- grouping_iuc_df2 %>% #head() %>%
    group_by(activity_instance_id) %>% 
    arrange(start) %>%
    summarise(
        person_id = first(person_id),
        ethnicity_source_value = first(ethnicity_source_value),
        age = first(age),
        sex = first(sex),
        ooh = first(ooh),
        imd_rank = first(imd_rank),
        imd_decile = first(imd_decile),
        gp_surgery_id = if_else(any(!is.na(gp_practice_id)), first(gp_practice_id[!is.na(gp_practice_id)]), NA),
        
        fu_time = case_when(
            n() == 1 ~ follow_up_time, # Only abandoned call in group, end time of call
            any(activity == "ED") ~ as.integer(difftime(first(start[activity == "ED"]), first(start), unit = "hours")), # ED attendance in dataset, so get first attendance
            TRUE ~ follow_up_time
        ),
        fu_time = if_else(fu_time > follow_up_time, follow_up_time, fu_time), # Truncate all values to 72 hours
        
        fu_time_non_avoid = case_when(
            n() == 1 ~ follow_up_time, # Only abandoned call in group, end time of call
            any(activity == "ED" & !is.na(avoidable_admission) & avoidable_admission == 0) ~ as.integer(difftime(first(start[activity == "ED" & !is.na(avoidable_admission) & avoidable_admission == 0]), first(start), unit = "hours")), # ED attendance in dataset, so get first attendance
            TRUE ~ follow_up_time
        ),
        fu_time_non_avoid = if_else(fu_time_non_avoid > follow_up_time, follow_up_time, fu_time_non_avoid),
        
        num_contacts_to_ED_attend = case_when(
          any(activity == 'ED') ~ first(row_number()[activity == "ED"]-1),
          TRUE ~ NA_integer_
        ),
        num_contacts_to_ED_non_avoid_attend = case_when(
          any(activity == 'ED' & !is.na(avoidable_admission) & avoidable_admission == 0) ~ first(row_number()[activity == 'ED' & !is.na(avoidable_admission) & avoidable_admission == 0]-1),
          TRUE ~ NA_integer_
        ),
        
        num_GP_contacts_to_ED_attend = case_when(
          any(activity == 'ED') ~ sum(row_number()[activity == 'GP'] < first(row_number()[activity == "ED"])),
          TRUE ~ NA_integer_
        ),
        num_GP_contacts_to_ED_non_avoid_attend = case_when(
          any(activity == 'ED' & !is.na(avoidable_admission) & avoidable_admission == 0) ~ sum(row_number()[activity == 'GP'] < first(row_number()[activity == 'ED' & !is.na(avoidable_admission) & avoidable_admission == 0]), na.rm = T),
          TRUE ~ NA_integer_
        ),
                                                                                                                            
        status = case_when(
            # Only index call present
            n() == 1 ~ 0,
            any(activity == "ED") ~ 1, 
            TRUE ~ 0
        ),
        status_non_avoid = case_when(
            n() == 1 ~ 0, # Censored
            any(activity == "ED" & !is.na(avoidable_admission) & avoidable_admission == 0) ~ 1,
            TRUE ~ 0
        ),
        hosp = case_when(
         any(activity == "ED") ~ first(hosp[activity == "ED"]),
         TRUE ~ NA_character_
        )
    ) %>% ungroup()

In [None]:
grouping_iuc_df3 %>% glimpse()

In [None]:
#grouping_iuc_df3 %>% count(num_contacts_to_ED_attend)
#grouping_iuc_df3 %>% count(num_GP_contacts_to_ED_non_avoid_attend)

## Combined cohort survival dataframe

In [None]:
grouping_survival_df <- bind_rows(
        df2 %>% mutate(cohort = 'AC'),
        grouping_iuc_df3 %>% mutate(cohort = 'IUC')
)

In [None]:
grouping_survival_df %>% glimpse()

In [None]:
saveRDS(grouping_survival_df, glue::glue("data/grouping{grouping_number}_survival_df{follow_up_time_str}.rds"))

In [None]:
grouping_survival_df %>% count(num_GP_contacts_to_ED_attend)

# Exploration of index calls dispositions

In [None]:
iuc_calls_dispostion_df <- readRDS(glue::glue("data/grouping{grouping_number}_iuc_combo_df.rds"))

In [None]:
iuc_calls_dispostion_df %>% glimpse()

In [None]:
iuc_calls_disposition_df2 <- iuc_calls_dispostion_df %>% #head() %>%
    group_by(activity_instance_id) %>% 
    arrange(start) %>%
    summarise(
        person_id = first(person_id),
        sym_desc = first(symtom_group_desc),
        sym_discr = first(symptom_discriminator_desc),
        dx_code = first(final_dx_code),
        
        fu_time = case_when(
            n() == 1 ~ follow_up_time, # Only abandoned call in group, end time of call
            any(activity == "ED") ~ as.integer(difftime(first(start[activity == "ED"]), first(start), unit = "hours")), # ED attendance in dataset, so get first attendance
            TRUE ~ follow_up_time
        ),
        fu_time = if_else(fu_time > follow_up_time, follow_up_time, fu_time), # Truncate all values to 72 hours
        
        fu_time_non_avoid = case_when(
            n() == 1 ~ follow_up_time, # Only abandoned call in group, end time of call
            any(activity == "ED" & !is.na(avoidable_admission) & avoidable_admission == 0) ~ as.integer(difftime(first(start[activity == "ED" & !is.na(avoidable_admission) & avoidable_admission == 0]), first(start), unit = "hours")), # ED attendance in dataset, so get first attendance
            TRUE ~ follow_up_time
        ),
        fu_time_non_avoid = if_else(fu_time_non_avoid > follow_up_time, follow_up_time, fu_time_non_avoid),
                                                                                                                            
        status = case_when(
            # Only index call present
            n() == 1 ~ 0,
            any(activity == "ED") ~ 1, 
            TRUE ~ 0
        ),
        status_non_avoid = case_when(
            n() == 1 ~ 0, # Censored
            any(activity == "ED" & !is.na(avoidable_admission) & avoidable_admission == 0) ~ 1,
            TRUE ~ 0
        ),
        
    ) %>% ungroup()

In [None]:
iuc_calls_disposition_df2  %>% glimpse()

In [None]:
#iuc_calls_disposition_df2 %>% count(status_non_avoid, sym_discr, sort = T)

In [None]:
iuc_disposition_df3 <- iuc_calls_disposition_df2 %>%
    mutate(
        go_to_ed_incl_amb = if_else(grepl("ED|AMB", sym_discr), 1, 0),
        ed_only = if_else(grepl("ED", sym_discr), 1, 0)
    )

In [None]:
iuc_disposition_df3 %>% count(status_non_avoid, go_to_ed_incl_amb)

In [None]:
iuc_disposition_df3 %>% count(status_non_avoid, ed_only)