-
Notifications
You must be signed in to change notification settings - Fork 1
/
get_patient_flow.R
146 lines (122 loc) · 7.18 KB
/
get_patient_flow.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#' Summarize inter-facility patient transfer network
#'
#' @param pt_trans_df a dataframe representing a patient transfer network of 3 cols: 'source_facil', 'dest_facil, and 'n_transfers' (code doesn't support missing paths, any missing paths will be represented by 0s)
#' @param locs vector of unique locations you want to find shortest paths between (e.g. ones for which you have sequencing data for); default to use all locations
#' @param paths boolean value, TRUE if you want the shortest paths returned, FALSE if you don't
#'
#' @return the number of direct patient transfers and indirect flow metrics between each facility pair. If paths = TRUE, a list of summary (pt_trans_summary) and shortest paths used (paths).
#' @export
#' @description Summarize inter-facility patient transfer network from an edge list. Direct and indirect patient flow metrics are calculated.
#' @details For more details on how patient flow is calculated, see: https://aac.asm.org/content/63/11/e01622-19.
#'
#' @examples
#' get_patient_flow(pt_trans_df = pt_trans_df)
get_patient_flow <- function(pt_trans_df, locs = NULL, paths = FALSE){
#run checks
# could make this more general by defining column names in function, regardless of what they originally were
check_get_patient_flow_input(pt_trans_df = pt_trans_df, locs = locs, paths = paths)
if(is.null(locs)){
locs = c(as.character(pt_trans_df$source_facil), as.character(pt_trans_df$dest_facil))
}
locs <- unique(locs)
#make pt_trans_net not factors
pt_trans_df$source_facil <- as.character(pt_trans_df$source_facil)
pt_trans_df$dest_facil <- as.character(pt_trans_df$dest_facil)
#run indirect flow
ind_flow_output <- get_indirect_flow(pt_trans_df, locs)
pt_trans_df_i <- ind_flow_output$transfer_network
paths_list <- ind_flow_output$paths
#remove any same-facility pairs
pat_flow <- pt_trans_df %>% dplyr::filter(source_facil != dest_facil)
pt_trans_summary <- dplyr::full_join(pat_flow, pt_trans_df_i, by = c("source_facil", "dest_facil")) %>%
dplyr::filter(source_facil != dest_facil & (source_facil %in% locs) & (dest_facil %in% locs))
## sort facilities before summarizing (should probably make this a function)
facil_pairs <- lapply(1:nrow(pt_trans_summary), function(x)
sort(c(as.character(pt_trans_summary$source_facil[x]), as.character(pt_trans_summary$dest_facil[x])))
)
pt_trans_summary <- lapply(facil_pairs, function(x){
f12 <- pt_trans_summary %>% dplyr::filter(source_facil == x[1] & dest_facil == x[2]) %>%
dplyr::rename(loc1 = source_facil, loc2 = dest_facil, n_transfers_f12 = n_transfers, pt_trans_metric_f12 = pt_trans_metric)
f21 <- pt_trans_summary %>% dplyr::filter(source_facil == x[2] & dest_facil == x[1]) %>%
dplyr::rename(loc1 = dest_facil, loc2 = source_facil, n_transfers_f21 = n_transfers, pt_trans_metric_f21 = pt_trans_metric)
pt_flow_sub <- dplyr::full_join(f12, f21, by = c('loc1','loc2'))
}) %>% dplyr::bind_rows() %>% dplyr::mutate(sum_transfers = n_transfers_f12 + n_transfers_f21,
sum_pt_trans_metric = pt_trans_metric_f12 + pt_trans_metric_f21)
if(paths == TRUE){
#return paths and summary as a list
pt_trans_summary <- list("pt_trans_summary" = pt_trans_summary, "paths" = paths_list)
}
return(pt_trans_summary)
}
#' Calculate indirect patient flow from patient transfer network
#'
#' @inheritParams get_patient_flow
#'
#' @return facility x facility matrix of metric of patient flow between each facility pair
#' @noRd
#'
#' @examples
#' get_indirect_flow(pt_trans_df = pt_trans_df)
get_indirect_flow <- function(pt_trans_df, locs = NULL){
#don't want to subset before getting here, need whole network for indirect
#checks
check_pt_trans_df(pt_trans_df, locs)
if(is.null(locs)){
locs = c(as.character(pt_trans_df$source_facil), as.character(pt_trans_df$dest_facil))
}
locs <- unique(locs)
# fill in missing source and destination facilities (doesn't change results, but will error out otherwise)
pt_trans_df <- fill_missing_src_dest(pt_trans_df)
#make matrix format
trans_mat <- tidyr::pivot_wider(pt_trans_df, names_from = source_facil, values_from = n_transfers)
trans_mat <- as.data.frame(trans_mat[,2:ncol(trans_mat)])
rownames(trans_mat) <- colnames(trans_mat)
trans_mat = t(trans_mat)
#make graph
g <- igraph::graph_from_adjacency_matrix(trans_mat,mode='directed',weighted = TRUE)
#remove weights that are NA (zero transfers)
g <- igraph::delete_edges(g, igraph::E(g)[is.na(igraph::E(g)$weight)])
#name nodes in network that we have data for
#modify edge weights from n facilities -> normalize/invert
out_strength = igraph::strength(g,mode='out') # get number of outgoing patient transfers for each vertex
tail_vert = igraph::tail_of(g,igraph::E(g)) # get tail (source) vertex for each edge
edwt_sum = sapply(names(tail_vert), function(x) out_strength[names(out_strength) == x]) # get number of outgoing patient transfers of tail vertex for each edge
igraph::E(g)$weight = -log10(igraph::E(g)$weight/edwt_sum) # normalize edge weight by number of outgoing patient transfers of source vertex and take negative log (to use to calculate shortest paths)
#find shortest path function -> igraph::shortest.paths()
sp <- g %>% igraph::shortest.paths(v = as.character(locs), to = as.character(locs), mode="out") %>% as.data.frame()
#make long form
trans_net_i <- sp %>% tibble::as_tibble() %>% dplyr::mutate(source_facil = colnames(sp)) %>% tidyr::pivot_longer(!source_facil, names_to = "dest_facil", values_to = "pt_trans_metric")
#make them each -(10^x)
trans_net_i$pt_trans_metric <- 10^(-trans_net_i$pt_trans_metric)
trans_net_i$pt_trans_metric[is.na(trans_net_i$pt_trans_metric)] <- 0
#if they asked for the paths, find them and make/return list
paths <- igraph::get.shortest.paths(g, 1, mode = "out")
returns <- list("transfer_network" = trans_net_i, "paths" = paths)
return(returns)
}
#' Fill in missing source and destination facilities in network edge list
#'
#' @inheritParams get_patient_flow
#'
#' @return filled in pt_trans_df
#' @noRd
#'
fill_missing_src_dest <- function(pt_trans_df) {
all_facils <- unique(c(as.character(pt_trans_df$source_facil),as.character(pt_trans_df$dest_facil)))
not_in_source <- all_facils[!(all_facils %in% pt_trans_df$source_facil)]
not_in_dest <- all_facils[!(all_facils %in% pt_trans_df$dest_facil)]
if(length(not_in_source) != 0 | length(not_in_dest) != 0){
pt_trans_df$source_facil <- as.character(pt_trans_df$source_facil)
pt_trans_df <- dplyr::bind_rows(pt_trans_df,
dplyr::bind_cols(source_facil = not_in_source,
dest_facil = pt_trans_df$dest_facil[1],
n_transfers = 0),
dplyr::bind_cols(source_facil = pt_trans_df$source_facil[1],
dest_facil = not_in_dest,
n_transfers = 0))
}
pt_trans_df <- pt_trans_df %>% tidyr::expand(source_facil, dest_facil) %>%
dplyr::left_join(pt_trans_df, by = c("source_facil", "dest_facil")) %>%
dplyr::mutate(n_transfers = ifelse(is.na(n_transfers), 0, n_transfers))
return(pt_trans_df)
}