In [None]:
unlink('data/walk_files/*')

In [None]:
suppressPackageStartupMessages(library(tidyverse))
library(lubridate)
library(glue)
library(furrr)

In [None]:
no_cores <- availableCores() - 1
no_cores

# Helper functions

In [None]:
date_diff_fn <- function(df, p_id, endtime, backwards = FALSE, follow_up_time = 72) {
  # endtime = 111 consultation finish time
  # start = start of 999/GP call etc.

  df1 <- df %>%
    dplyr::filter(person_id == p_id) %>%
    mutate(
       # time1 - time2 or difftime(time1, time2)
        # So we are comparing the end time of the split dataframe value with the current dataset row end time
      call_time_diff = difftime(start, endtime, units = "hours")
    )

    if(backwards) {
        # Need to check for IUC calls before index event (AC calls only)
        num_cases = nrow(df1 %>% filter(!is.na(call_time_diff) & (call_time_diff < 0 & call_time_diff >= -follow_up_time)))
        return (num_cases)
    } else {
        # Remove values with no datetimes recorded and 
        # only return cases which occured within follow_up_time of the call.
        df2 <- df1 %>% filter(!is.na(call_time_diff) & (call_time_diff >= 0 & call_time_diff <= follow_up_time))

        return (list(df2))
    }
    
}


calc_min_callback <- function(df) {
  df %>%
    pull(call_time_diff) %>%
    min(na.rm = T)
}

manage_split <- function(df, iter, df2, filename, follow_up_time = 72) { 
  # default is end of 111 call
  # message(glue::glue("Processing {filename}"))
  var1 = glue("{filename}_visit_{follow_up_time}_df")
        
  min_date = min(df$start)
  max_date = max(df$end)
  #message(glue("Min date is {min_date} and max date is {max_date}"))
    
  df3 <- df2 %>%
    filter(
        start >= min_date - hours(follow_up_time),
        end <= max_date + hours(follow_up_time)
    )
    
    #df3 %>% glimpse()
    
  #message(glue::glue("df3 contains {nrow(df3)} incidents where min_date is {min_date} and max_date is {max_date + hours(72)}"))
  #message("----------------")
    
  #df %>% head(1) %>% glimpse()
  
  df1 <- df %>% 
   rowwise() %>%
    mutate(
      "{filename}_visit_{follow_up_time}_df" := date_diff_fn(df3, person_id, end), # change end to start 
      "{filename}_calls_in_{follow_up_time}_hrs" := nrow((!!sym(var1))[0]),
      "{filename}_calls_{follow_up_time}_before_df" := date_diff_fn(df3, person_id, end, TRUE)
    ) %>% 
    select(unique_id, person_id, starts_with(filename))
    
   #df1 %>% glimpse()
    
   #message(glue("{var1} has {nrow(df1)} matches"))
    
   saveRDS(df1, paste0('data/walk_files/', filename, '_', iter, '.rds'))
}

# Abandoned Calls

In [None]:
# There are 2 ac_df files, one contains only the index abandoned call and the other, includes all calls

In [None]:
ac_index_df <- readRDS('data/ac_df_index.rds')

In [None]:
ac_index_df %>% glimpse()

In [None]:
n <- 250
nr <- nrow(ac_index_df)

split_ac_df <- split(ac_index_df, rep(1:ceiling(nr/n), each=n, length.out=nr))

In [None]:
split_ac_df[[1]] %>% glimpse()

## Non-index abandoned calls

In [None]:
ac_df <- readRDS('data/ac_df_all.rds') %>% filter(index_event == 0)

In [None]:
ac_df %>% count() # 2913 v2 2034 (kids removed)

In [None]:
plan(multisession, workers = no_cores)

start <- Sys.time()
future_walk2(split_ac_df, rep(1:ceiling(nr/n)), ~manage_split(.x, .y, ac_df, "ac_nest", follow_up_time = 72))
end <- Sys.time()

In [None]:
end-start # 1.2 mins

In [None]:
test_df <- readRDS('data/walk_files/ac_nest_1.rds')
test_df %>% glimpse()

In [None]:
test_df %>% count(ac_nest_calls_in_72_hrs ) 
# 0	236
# 1	14   v2 0 232, 1 18

# IUC Calls

In [None]:
iuc_df <- readRDS('data/iuc_df.rds')

In [None]:
iuc_df %>% count() # 193526 v2 272193

In [None]:
iuc_df %>% glimpse()

In [None]:
plan(multisession, workers = no_cores)

start <- Sys.time()
future_walk2(split_ac_df, rep(1:ceiling(nr/n)), ~manage_split(.x,.y, iuc_df, "iuc_nest", follow_up_time = 72))
end <- Sys.time()

In [None]:
end-start # 1.8 mins

# GP calls

In [None]:
gp_df <- readRDS('data/gp_df.rds')

In [None]:
gp_df %>% glimpse()

In [None]:
plan(multisession, workers = no_cores)

start <- Sys.time()
future_walk2(split_ac_df, rep(1:ceiling(nr/n)), ~manage_split(.x,.y, gp_df, "gp_nest", follow_up_time = 72))
end <- Sys.time()

In [None]:
end-start # 6 mins

In [None]:
readRDS('data/walk_files/gp_nest_1.rds') %>% glimpse()

In [None]:
readRDS('data/walk_files/gp_nest_1.rds') %>% count(gp_nest_calls_in_72_hrs)
#0	236    238
#1	10     9
#2	3      2
#3	1   v2 1

# CAD

In [None]:
cad_df <- readRDS('data/cad_df.rds')

In [None]:
cad_df %>% glimpse()

In [None]:
plan(multisession, workers = no_cores)

start <- Sys.time()
future_walk2(split_ac_df, rep(1:ceiling(nr/n)), ~manage_split(.x,.y, cad_df, "cad_nest", follow_up_time = 72))
end <- Sys.time()

In [None]:
end-start # 1.4 mins

# ED datasets

In [None]:
hosp_ed_df <- readRDS('data/hosp_ed_df.rds')

In [None]:
plan(multisession, workers = no_cores)

start <- Sys.time()
future_walk2(split_ac_df, rep(1:ceiling(nr/n)), ~manage_split(.x,.y, hosp_ed_df, "hosp_ed_nest", follow_up_time = 72))
end <- Sys.time()

In [None]:
end-start # 1.9mins

# In-patient data

In [None]:
hosp_ip_df <- readRDS('data/hosp_ip_df.rds')

In [None]:
plan(multisession, workers = no_cores)

start <- Sys.time()
future_walk2(split_ac_df, rep(1:ceiling(nr/n)), ~manage_split(.x,.y, hosp_ip_df, "hosp_ip_nest", follow_up_time = 72))
end <- Sys.time()

In [None]:
end-start #2 min

# Combine data

In [None]:
combine_rds <- function(file_path, filename) {
  list.files(path = file_path, glue("^{filename}"), full.names = T) %>%
    map_dfr(readRDS)
}

In [None]:
ac_nest_df <- combine_rds("data/walk_files", "ac_nest")

In [None]:
ac_nest_df %>% glimpse() 

In [None]:
iuc_nest_df <- combine_rds("data/walk_files", "iuc_nest")

In [None]:
iuc_nest_df %>% glimpse()

In [None]:
gp_nest_df <- combine_rds("data/walk_files", "gp_nest")

In [None]:
gp_nest_df %>% glimpse()

In [None]:
cad_nest_df <- combine_rds("data/walk_files", "cad_nest")

In [None]:
cad_nest_df %>% glimpse()

In [None]:
hosp_ed_nest_df <- combine_rds("data/walk_files", "hosp_ed_nest")

In [None]:
hosp_ed_nest_df %>% glimpse()

In [None]:
hosp_ip_nest_df <- combine_rds("data/walk_files", "hosp_ip_nest")

In [None]:
hosp_ip_nest_df %>% glimpse()

In [None]:
iuc_nest_df %>% glimpse()

In [None]:
# This dataset will be the one we use to identify cases to remove for triaged 111 calls

In [None]:
final_nest_df <- ac_index_df %>%
  left_join(ac_nest_df,       by=c("unique_id", "person_id")) %>%
  left_join(iuc_nest_df,      by=c("unique_id", "person_id")) %>% 
  left_join(gp_nest_df,       by=c("unique_id", "person_id")) %>%
  left_join(cad_nest_df,      by=c("unique_id", "person_id")) %>%
  left_join(hosp_ed_nest_df,  by=c("unique_id", "person_id")) %>%
  left_join(hosp_ip_nest_df,  by=c("unique_id", "person_id"))

In [None]:
final_nest_df %>% glimpse()

In [None]:
# This will be the final abandoned call dataset with 

In [None]:
# Note: modify if follow_up_time is not 72 hours
final_nest_df1 <- final_nest_df %>% #head() %>%
    mutate(
        # Flag to identify if any healthcare contact made within 72 hours
        made_contact = if_else(rowSums(across(ends_with('in_72_hrs'))) > 0, 1, 0),
        # Simple flag for triaged 111 call in the 72 hours prior to Ab call.
        contacted_iuc_b4_ac = if_else(iuc_nest_calls_72_before_df > 0, 1, 0),
        # Simple flag for abandoned call but triaged IUC call within 72 hours of ab call.
        contacted_iuc_after_ac = if_else(iuc_nest_calls_in_72_hrs > 0, 1, 0)
    ) %>%
    group_by(call_id) %>%
    mutate(
        # Cases with multiple patients who have had a healthcare contacts associated with a single call_id
        # cannot be included as we don't know which person actually made the contact (or the abandoned call)
        delete_flag = case_when(
            # Only one match
            n() == 1 ~ 0,
            # multiple matches allowed as long as there only one of the group has a contact recorded
            sum(made_contact) > 1 ~ 1,
            TRUE ~ 0
        )
    ) %>% ungroup()

In [None]:
saveRDS(final_nest_df1, 'data/final_nest_df.rds')

In [None]:
final_nest_df1 %>% glimpse()

In [None]:
final_nest_df1 %>% count(contacted_iuc_b4_ac, contacted_iuc_after_ac)
#A tibble: 4 × 3
#contacted_iuc_b4_ac	contacted_iuc_after_ac	n
#<dbl>	<dbl>	<int>
#0	0	27581
#0	1	3724
#1	0	1312
#1	1	715

# v2
# 0	0	16657
# 0	1	3583
# 1	0	1275
# 1	1	755


In [None]:
final_nest_df1 %>% count(made_contact) # 0 23601, 1 9731 v2 0 14973 1 7297

In [None]:
final_nest_df1 %>% count(delete_flag) # 0 30576, 1 2756 v2 0 20995 1 1275

In [None]:
final_nest_df2 <- final_nest_df1 %>%
    # Remove all cases with the delete_flag set
     filter(delete_flag == 0)

In [None]:
saveRDS(final_nest_df2, 'data/final_abandoned_call_nest_df.rds')