-
Notifications
You must be signed in to change notification settings - Fork 0
/
dist_edr.R
275 lines (259 loc) · 9.22 KB
/
dist_edr.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
#' Dissimilarities between Ecological Dynamic Regimes
#'
#' @description
#' Generate a matrix containing dissimilarities between one or more pairs of
#' Ecological Dynamic Regimes (EDR). `dist_edr()` computes different dissimilarity
#' indices, all of them based on the dissimilarities between the trajectories of
#' two EDRs.
#'
#' @param d Symmetric matrix or object of class [`dist`] containing the
#' dissimilarities between each pair of states of all trajectories in the EDR or
#' the dissimilarities between each pair of trajectories.
#' @param d.type One of `"dStates"` (if `d` contains state dissimilarities) or
#' `"dTraj"` (if `d` contains trajectory dissimilarities).
#' @param trajectories Only if `d.type` = `"dStates"`. Vector indicating the
#' trajectory or site corresponding to each entry in `d`.
#' @param states Only if `d.type` = `"dStates"`. Vector of integers indicating the
#' order of the states in `d` for each trajectory.
#' @param edr Vector indicating the EDR to which each trajectory/state in `d` belongs.
#' @param metric A string indicating the dissimilarity index to be used: `"dDR"`
#' (default), `"minDist"`, `"maxDist"`.
#' @param symmetrize String naming the function to be called to symmetrize the
#' resulting dissimilarity matrix (`"mean"`, `"min"`, `"max`, `"lower"`, `"upper"`).
#' If `NULL` (default), the matrix is not symmetrized.
#' @param ... Only if `d.type` = `"dStates"`. Further arguments to calculate
#' trajectory dissimilarities. See [ecotraj::trajectoryDistances()].
#'
#' @details
#' The implemented metrics are:
#'
#' \describe{
#' \item{`"dDR"`}{
#' \eqn{
#' d_{DR}(R_1, R_2) = \frac{1}{n} \sum_{i=1}^{n} d_{TR}(T_{1i}, R_2)
#' }
#' }
#'
#' \item{`"minDist"`}{
#' \eqn{
#' d_{DRmin}(R_1, R_2) = \min_{i=1}^{n} \{ d_{TR}(T_{1i}, R_2) \}
#' }
#' }
#'
#' \item{`"maxDist"`}{
#' \eqn{
#' d_{DRmax}(R_1, R_2) = \max_{i=1}^{n} \{ d_{TR}(T_{1i}, R_2) \}
#' }
#' }
#' }
#'
#' where \eqn{R_1} and \eqn{R_2} are two EDRs composed of \eqn{n} and \eqn{m}
#' ecological trajectories, respectively, and \eqn{d_{TR}(T_{1i}, R_2)} is the
#' dissimilarity between the trajectory \eqn{T_{1i}} of \eqn{R_1} and the closest
#' trajectory of \eqn{R_2}:
#'
#' \eqn{
#' d_{TR}(T_{1i}, R_2) = \min\{d_T(T_{1i}, T_{21}), ... , d_T(T_{1i}, T_{2m})\}
#' }
#'
#' The metrics calculated are not necessarily symmetric. That is, \eqn{d_{DR}(R_1, R_2)}
#' is not necessarily equal to \eqn{d_{DR}(R_2, R_1)}. It is possible to symmetrize
#' the returned matrix by indicating the name of the function to be used in `symmetrize`:
#'
#' \describe{
#' \item{`"mean"`}{
#' \eqn{
#' d_{DRsym} = \frac{d_{DR}(R_1, R_2) + d_{DR}(R_2, R_1)}{2}
#' }
#' }
#'
#' \item{`"min"`}{
#' \eqn{
#' d_{DRsym} = \min\{d_{DR}(R_1, R_2), d_{DR}(R_2, R_1)\}
#' }
#' }
#'
#' \item{`"max"`}{
#' \eqn{
#' d_{DRsym} = \max\{d_{DR}(R_1, R_2), d_{DR}(R_2, R_1)\}
#' }
#' }
#'
#' \item{`"lower"`}{
#' The lower triangular part of the dissimilarity matrix is used.
#' }
#'
#' \item{`"upper"`}{
#' The upper triangular part of the dissimilarity matrix is used.
#' }
#'
#' }
#'
#' @return
#' Matrix including the dissimilarities between every pair of EDRs.
#'
#' @author Martina Sánchez-Pinillos
#'
#' @references
#' Sánchez-Pinillos, M., Kéfi, S., De Cáceres, M., Dakos, V. 2023. Ecological Dynamic
#' Regimes: Identification, characterization, and comparison. *Ecological Monographs*.
#' <doi:10.1002/ecm.1589>
#'
#'
#' @export
#'
#' @examples
#' # Load species abundances and compile in a data frame
#' abun1 <- EDR_data$EDR1$abundance
#' abun2 <- EDR_data$EDR2$abundance
#' abun3 <- EDR_data$EDR3$abundance
#' abun <- data.frame(rbind(abun1, abun2, abun3))
#'
#' # Define row names in abun to keep the reference of the EDR, trajectory, and
#' # state
#' row.names(abun) <- paste0(abun$EDR, "_", abun$traj, "_", abun$state)
#'
#' # Calculate dissimilarities between every pair of states
#' # For example, Bray-Curtis index
#' dStates <- vegan::vegdist(abun[, -c(1, 2, 3)], method = "bray")
#'
#' # Use the labels in dStates to define the trajectories to which each state
#' # belongs
#' id_traj <- vapply(strsplit(labels(dStates), "_"), function(x){
#' paste0(x[1], "_", x[2])
#' }, character(1))
#' id_state <- vapply(strsplit(labels(dStates), "_"), function(x){
#' as.integer(x[3])
#' }, integer(1))
#' id_edr <- vapply(strsplit(labels(dStates), "_"), function(x){
#' paste0("EDR", x[1])
#' }, character(1))
#'
#' # Calculate dissimilarities between every pair of trajectories
#' dTraj <- ecotraj::trajectoryDistances(d = dStates, sites = id_traj,
#' surveys = id_state,
#' distance.type = "DSPD")
#'
#' # Use labels in dTraj to identify EDRs
#' id_edr_traj <- vapply(strsplit(labels(dTraj), "_"), function(x){
#' paste0("EDR", x[1])
#' }, character(1))
#'
#' # Compute dissimilarities between EDRs:
#' # 1) without symmetrizing the matrix and using state dissimilarities
#' dEDR <- dist_edr(d = dStates, d.type = "dStates",
#' trajectories = id_traj, states = id_state, edr = id_edr,
#' metric = "dDR", symmetrize = NULL)
#'
#' # 2) symmetrizing by averaging elements on and below the diagonal and using
#' # trajectory dissimilarities
#' dEDR <- dist_edr(d = dTraj, d.type = "dTraj", edr = id_edr_traj,
#' metric = "dDR", symmetrize = "mean")
#'
#'
dist_edr <- function(d, d.type, trajectories = NULL, states = NULL, edr, metric = "dDR", symmetrize = NULL, ...){
metric <- match.arg(metric, c("dDR", "minDist", "maxDist"))
if (!is.null(symmetrize)) {
symmetrize <- match.arg(symmetrize, c("mean", "min", "max", "lower", "upper"))
}
d.type <- match.arg(d.type, c("dStates", "dTraj"))
if (d.type == "dTraj") {
states <- NULL
}
## WARNING MESSAGES ----------------------------------------------------------
# Check the format for d, trajectories, states, and reference
if (all(!is.matrix(d), !inherits(d, "dist")) |
nrow(as.matrix(d)) != ncol(as.matrix(d))) {
stop("'d' must be a symmetric dissimilarity matrix or an object of class 'dist'.")
}
if (d.type == "dStates") {
if (is.null(trajectories)) {
stop("If 'd.type' = \"dStates\", you must provide a value for 'trajectories'.")
}
if (is.null(states)) {
stop("If 'd.type' = \"dStates\", you must provide a value for 'states'.")
}
if (length(trajectories) != nrow(as.matrix(d))) {
stop("The length of 'trajectories' must be equal to both dimensions in 'd'.")
}
if (length(states) != nrow(as.matrix(d))) {
stop("The length of 'states' must be equal to both dimensions in 'd'.")
}
}
# Check input data is correct
if(length(edr) != nrow(as.matrix(d))){
stop("'edr' needs to have a length equal to the number of rows and columns of 'd'. \nProvide the EDR of all trajectories considered in 'd'.")
}
## TRAJECTORIES & DISSIMILARITIES --------------------------------------------
# Trajectory dissimilarity
if(d.type == "dStates"){
dTrajmat <- as.matrix(ecotraj::trajectoryDistances(d = d,
sites = paste0(edr, "_", trajectories),
surveys = states,...))
edr.df <- unique(data.frame(traj = paste0(edr, "_", trajectories), edr = edr))
edr <- edr.df$edr
}
if(d.type == "dTraj") {
dTrajmat <- as.matrix(d)
}
# Nb edr and trajectories/edr
ID_edr <- as.character(unique(edr))
Ntraj_edr <- table(edr)
# Empty matrix to compile regime distances
dReg <- matrix(0, ncol = length(ID_edr), nrow = length(ID_edr), dimnames = list(ID_edr, ID_edr))
for (iR1 in ID_edr) {
for (iR2 in ID_edr) {
minD_T1R2 <- numeric()
# for each T1i, find the minimum distance to R2; min{D(T1i, T21), ..., D(T1i, T2m)}
for (iT1 in which(edr == iR1)) {
# Indices of the trajectories in R2
T2 <- which(edr == iR2)
# min{D(T1i, T21), ..., D(T1i, T2m)}
minD_T1R2 <- c(minD_T1R2, min(dTrajmat[iT1, T2]))
}
if(metric == "dDR"){
dReg[iR1, iR2] <- sum(minD_T1R2)/(Ntraj_edr[iR1]) # dDR(R1, R2)
}
if(metric == "minDist"){
dReg[iR1, iR2] <- min(minD_T1R2)
}
if(metric == "maxDist"){
dReg[iR1, iR2] <- max(minD_T1R2)
}
}
}
# Symmetrize dDR if required
if (!is.null(symmetrize)) {
if (symmetrize == "mean") {
for (iR1 in ID_edr) {
for (iR2 in ID_edr) {
dReg[iR1, iR2] <- mean(c(dReg[iR1, iR2], dReg[iR2, iR1]))
dReg[iR2, iR1] <- dReg[iR1, iR2]
}
}
}
if (symmetrize == "min") {
for (iR1 in ID_edr) {
for (iR2 in ID_edr) {
dReg[iR1, iR2] <- min(c(dReg[iR1, iR2], dReg[iR2, iR1]))
dReg[iR2, iR1] <- dReg[iR1, iR2]
}
}
}
if (symmetrize == "max") {
for (iR1 in ID_edr) {
for (iR2 in ID_edr) {
dReg[iR1, iR2] <- max(c(dReg[iR1, iR2], dReg[iR2, iR1]))
dReg[iR2, iR1] <- dReg[iR1, iR2]
}
}
}
if (symmetrize == "lower") {
dReg[upper.tri(dReg)] = t(dReg)[upper.tri(dReg)]
}
if (symmetrize == "upper") {
dReg[lower.tri(dReg)] = t(dReg)[lower.tri(dReg)]
}
}
return(dReg)
}