-
Notifications
You must be signed in to change notification settings - Fork 11
/
distances_for_injury_function.R
131 lines (119 loc) · 7.42 KB
/
distances_for_injury_function.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#' Get distances and model for injuries module
#'
#' Computes exposures (distances) to parametrise the injury regression model,
#' which is computed as a Poisson with various offsets and used later in prediction
#'
#' @param journeys df with sex and age_cat total distance (by total population) for scenarios
#' @param dist table of (total) distances per mode per scenario
#'
#' @return list of distances, injury table, and injury regression model
#'
#' @export
distances_for_injury_function <- function(journeys, dist){
##!! RJ need to scale distances up for representativeness of survey population of total population
##!! AA have added total distance by taking demographic into the calculations, and then scaling distance accordingly
distances <- spread(journeys,stage_mode, tot_dist,fill=0)
if('walk_to_pt'%in%names(distances)){
distances$pedestrian <- distances$pedestrian + distances$walk_to_pt
}
## car is car, taxi, shared auto, shared taxi
distances$car <- rowSums(distances[,colnames(distances)%in%c('car','taxi','shared_auto','shared_taxi')])
distances <- distances[, -which(names(distances) %in% c('taxi','shared_auto','shared_taxi',"walk_to_pt"))]
## bus distance increases linearly with bus passenger distance
if('bus_driver'%in%colnames(distances)){
passenger <- sapply(SCEN,function(x)sum(subset(distances,scenario==x)$bus))
distances$bus_driver <- distances$bus_driver * passenger[match(distances$scenario,SCEN)] / passenger[[1]]
}
true_distances_0 <- distances
true_distances_0$sex_age <- paste0(true_distances_0$sex,"_",true_distances_0$age_cat)
#if(ADD_BUS_DRIVERS) true_distances_0$bus <- true_distances_0$bus + true_distances_0$bus_driver
true_distances <- true_distances_0#[,-c(which(names(true_distances_0) == 'sex'))]
## for injury_function_2
mode_names <- names(true_distances)[!names(true_distances)%in%c('age_cat','scenario','sex_age','sex')]
# divide injuries into those for which we can write a WHW (who hit whom) matrix, i.e. we know distances of both striker and casualty,
## and those for which we don't know striker distance: no or other vehicle (NOOV)
## we can only model casualties for which we know distance travelled
## we include truck and bus travel via the flags used in run_ithim_setup, if they are missing from the survey, as in accra
injury_table <- INJURY_TABLE
## add distance columns
injuries_for_model <- add_distance_columns(injury_table,mode_names,true_distances_0,dist,scenarios=SCEN[1])
scenario_injury_table <- list()
for(type in INJURY_TABLE_TYPES)
scenario_injury_table[[type]] <- expand.grid(age_cat=unique(DEMOGRAPHIC$age),
cas_gender=unique(DEMOGRAPHIC$sex),
cas_mode=unique(injuries_for_model[[1]][[type]]$cas_mode),
strike_mode=unique(injuries_for_model[[1]][[type]]$strike_mode))
injuries_list <- add_distance_columns(injury_table=scenario_injury_table,mode_names,true_distances_0,dist)
for (n in 1:(NSCEN+1))
for(type in INJURY_TABLE_TYPES)
injuries_list[[n]][[type]]$injury_gen_age <- apply(cbind(as.character(injuries_list[[n]][[type]]$cas_gender),as.character(injuries_list[[n]][[type]]$age_cat)),1,function(x)paste(x,collapse='_'))
# run regression model on baseline data
reg_model <- list()
## Injury regression. This needs a lot of work to make it generalisable to different settings, data qualities, etc.
##TODO write formulae without prior knowledge of column names
##TODO use all ages with ns(age,...).
##RJ linearity in group rates
CAS_EXPONENT <<- SIN_EXPONENT_SUM * CASUALTY_EXPONENT_FRACTION
STR_EXPONENT <<- SIN_EXPONENT_SUM - CAS_EXPONENT
forms <- list(whw='count~cas_mode*strike_mode+offset(log(cas_distance)+(CAS_EXPONENT-1)*log(cas_distance_sum)+log(strike_distance)+(STR_EXPONENT-1)*log(strike_distance_sum)-log(injury_reporting_rate)+log(weight))',
nov='count~cas_mode+offset(log(cas_distance)+(CAS_EXPONENT-1)*log(cas_distance_sum)-log(injury_reporting_rate)+log(weight))')
if('age_cat'%in%names(injuries_for_model[[1]][[1]]))
for(type in INJURY_TABLE_TYPES)
forms[[type]] <- paste0(c(forms[[type]],'age_cat'),collapse='+')
if('cas_gender'%in%names(injuries_for_model[[1]][[1]]))
for(type in INJURY_TABLE_TYPES)
forms[[type]] <- paste0(c(forms[[type]],'cas_gender'),collapse='+')
## catch for when regression fails: if fail, run simpler model: no interactions.
for(type in INJURY_TABLE_TYPES){
injuries_for_model[[1]][[type]]$injury_reporting_rate <- 1
test <- 'try-error'
# try 1: add age cat and gender
if(any(c('age_cat','cas_gender')%in%names(injuries_for_model[[1]][[type]]))){
new_form <- forms[[type]]
if('age_cat'%in%names(injuries_for_model[[1]][[1]]))
new_form <- paste0(c(new_form,'age_cat'),collapse='+')
if('cas_gender'%in%names(injuries_for_model[[1]][[1]]))
new_form <- paste0(c(new_form,'cas_gender'),collapse='+')
test <- try(glm(as.formula(new_form),data=injuries_for_model[[1]][[type]],family='poisson'))
}
if(length(test)==1&&test == 'try-error')
test <- try(glm(as.formula(forms[[type]]),data=injuries_for_model[[1]][[type]],family='poisson'))
if(length(test)==1&&test == 'try-error')
test <- try(glm('offset(log(cas_distance)+(CAS_EXPONENT-1)*log(cas_distance_sum)-log(injury_reporting_rate)+log(weight))',data=injuries_for_model[[1]][[type]],family='poisson'))
#
reg_model[[type]] <- trim_glm_object(test)
test <- NULL
# reg_model[[type]] <- tryCatch({
# suppressWarnings(glm(as.formula(forms[[type]]),data=injuries_for_model[[1]][[type]],family='poisson',control=glm.control(maxit=100)))
# }, error = function(e){
# temp_form <- gsub('*','+',forms[[type]],fixed=T)
# suppressWarnings(glm(as.formula(temp_form),data=injuries_for_model[[1]][[type]],family='poisson',control=glm.control(maxit=100)))
# }
# )
# reg_model[[type]] <- trim_glm_object(reg_model[[type]])
}
##
## For predictive uncertainty, we could sample a number from the predicted distribution
# if('year'%in%colnames(injuries_list[[1]][[1]])){
# print('year')
# # the injury burden at baseline is the prediction for the most recent year
# most_recent_year <- max(injuries_list[[1]][[1]]$year)
# for(scen in SCEN)
# for(type in INJURY_TABLE_TYPES)
# injuries_list[[scen]][[type]] <- subset(injuries_list[[scen]][[type]],year==most_recent_year)
# }
return(list(true_distances=true_distances,injuries_list=injuries_list,reg_model=reg_model))
}
## old code:
# get distances relative to baseline scenario
# scen_dist <- sapply(1:(NSCEN+1),function(x)c(colSums(subset(distances,scenario == SCEN[x])[,colnames(distances)%in%unique(journeys$stage_mode)])))
# colnames(scen_dist) <- SCEN_SHORT_NAME
# for(i in 2:ncol(scen_dist)) scen_dist[,i] <- scen_dist[,i]/scen_dist[,1]
# if(CITY=='accra') scen_dist <- rbind(scen_dist,Tuktuk=1)
#for (i in 1: length(mode_names))
#for (n in 1:(NSCEN+1))
# distances[[mode_names[i]]][which(distances$scenario == SCEN[n])] <-
# distances[[mode_names[i]]][which(distances$scenario == SCEN[n])]/ sum(distances[[mode_names[i]]][which(distances$scenario == SCEN[n])],na.rm=T)
#relative_distances <- distances
#relative_distances$sex_age <- paste0(relative_distances$sex,"_",relative_distances$age_cat)
#relative_distances <- relative_distances[,-c(which(names(relative_distances) == 'sex'))]