-
Notifications
You must be signed in to change notification settings - Fork 2
/
sppm_decision_tree.R
224 lines (178 loc) · 9.12 KB
/
sppm_decision_tree.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
## Ian McFadden
## imcfadden@ucla.edu
################################################################################
#
# sppm_decision_tree
#
# Implements a decision tree using spatial point process
# models (SPPMs) to:
#
# 1.
# Categorize species based on spatial aggregation into:
# Category 1: 'Complete spatial randomness' (CSR)
# Category 2: 'Habitat only' (H)
# Category 3: 'Dispersal only' (D)
# Category 4: 'Habitat and dispersal' (HD)
#
# 2.
# When appropriate estimates model parameters related to habitat
# association and / or dispersal limitation based on species category
# using SPPMs incorporating one or both processes:
#
# Category 1 (CSR): No parameters estimated
#
# Category 2 (H): Heterogenious poisson process to estimete coefficients
# of association only
#
# Category 3 (D): Thomas cluster process to estimate cluster size (alpha)
# and clustering intensity (sigma2) only
#
# Category 4 (HD): log-Gaussian Cox process to estimate coefficients
# of association, cluster size (alpha) and clustering
# intensity (sigma2)
#
# ARGUMENTS
# dataframe Spatial dataframe to be used
# species_codes Name of species code column in dataframe
# x_coords Name of x coordinate column in dataframe
# y_coords Name of y coordinate column in dataframe
# hab_vars List of habitat images in 'im' format
# nu Preset kernel decay shape parameter
# 0.5 (default): exponential
# Inf (infinity): Gaussian
# abund_cutoff Species stem abundance cutoff, 70 is default
# plotdim Dimensions of the study plot as c(xmin, xmax, ymin, ymax)
#
# MODEL OUTPUTS
# *Output will differ by category (C1-4)
# Coefficients of association with habitat variables
# CI95lo Min of 95% confidence interval
# CI95hi Max of 95% confidence interval
# sigma2 Clustering intensity, the mean number of points per cluster
# alpha Cluster size, in meters
#
################################################################################
sppm_decision_tree <- function(dataframe=NULL,
species_codes=NULL,
x_coords=NULL,
y_coords=NULL,
hab_vars=NULL,
nu=0.5,
abund_cutoff=70,
plotdim=c(0,500,0,500)){
# Require packages
packages <- c("spatstat", "raster", "geostatsp", "ncf", "maptools",
"spatial", "RandomFieldsUtils", "RandomFields")
lapply(packages, require, character.only=T)
# Source the goodness of fit function
source("gof_test.r")
# Remove species below abundance cutoff
abund <- names(which(table(dataframe[,species_codes]) >= abund_cutoff))
dataframe <- dataframe[dataframe[,species_codes] %in% abund,]
# Make vector of species to loop over
spp <- unique(dataframe[,species_codes])
# Make vector of habitat variable names
habNames <- names(hab_vars)
# Create column names
CIlo <- paste0(names(hab_vars), "CI95.lo")
CIhi <- paste0(names(hab_vars), "CI95.hi")
colNames <- c("sp", "mod_category", "nu", "sigma2", "alpha", habNames, CIlo, CIhi)
# Create results dataframe
nrow <- length(spp)
ncol <- length(colNames)
results <- data.frame(matrix(NA, nrow, ncol))
colnames(results) <- colNames
results$sp <- spp
# Loop over each species, assign to category and estimate any parameters
for(i in 1:length(spp)){
# Write progress statement
writeLines("###################")
writeLines("###################")
writeLines("###################")
writeLines("###################")
writeLines("")
writeLines(paste("Run", i,"/", length(spp)))
writeLines("")
writeLines("###################")
writeLines("###################")
writeLines("###################")
writeLines("###################")
# Subset to a single species
sp <- dataframe[dataframe[,species_codes]==spp[i],]
# Create point pattern object from stem map
stems <- as.ppp(cbind(sp[,x_coords], sp[,y_coords]), W=plotdim)
### CATEGORY 1 TEST: CSR ####
# If there are no departures from CSR assign the species to C1
# and move on to next iteration of the loop
if(gof_test(stems, nsim=999)==TRUE){
results[i,"mod_category"] <- "CSR"
next
}
### CATEGORY 2 TEST: HABITAT ONLY ####
# Specify the habitat trend formula
trend <- as.formula(paste("~", paste(names(hab_vars), collapse="+"), sep=""))
# Fit heterogenious poisson model (HP)
mod <- ppm(stems, trend=trend, covariates=hab_vars, control=list(maxit=10000))
# Perform backwards selection via AIC, specifying AIC with k=2
step_mod <- step(mod, direction="backward", k=2)
# Check for deviations from the HP model
hp_dev <- gof_test(step_mod, nsim=999)
# If there are no significant departures from the HP model
# and the species is associated with at least one habitat attribute
if(hp_dev==TRUE & length(step_mod$coef)>1){
# Assign it to the 'Habitat only' category
results[i,"mod_category"] <- "H"
# Add model coefficients to the results dataframe
results[i, names(step_mod$coef)[-1]] <- step_mod$coef[-1]
# Add confidence intervals to results dataframe
modsum <- summary(step_mod)$coefs.SE.CI[-1,]
results[which(results$sp==spp[i]), paste0(rownames(modsum), "CI95.lo")] <- modsum$CI95.lo
results[which(results$sp==spp[i]), paste0(rownames(modsum), "CI95.hi")] <- modsum$CI95.hi
# And move on to the next iteration of the loop
next
}
### CATEGORY 3 TEST: DISPERSAL ONLY ####
# If there are significant departures from the HP model
# and the species is not associated with any habitat attribute
if(hp_dev==FALSE & length(step_mod$coef)==1){
# Assign it to the 'Dispersal only' category
results[i,"mod_category"] <- "D"
# Fit a Thomas cluster process to the data
mod <- kppm(stems ~ 1, "Thomas", statistic="pcf", method="mincon",
model="matern", nu=nu, control=list(maxit=10000))
# Add clustering paramters to results dataframe
results[i, c("nu", "sigma2", "alpha")] <- c(nu, mod$mu, mod$clustpar['scale'])
# And move on to the next iteration of the loop
next
}
### CATEGORY 4 TEST: HABITAT AND DISPERSAL ####
# If there are significant departures from the HP model
# and the species is associated at least one habitat attribute
if(hp_dev==FALSE & length(step_mod$coef) > 1){
# Assign it to the 'Habitat and dispersal' category
results[i,"mod_category"] <- "HD"
# Extract names of significant parameters
sigPars <- names(step_mod$coef)[2:length(coef(step_mod))]
# Specify the new habitat trend formula
trend <- as.formula(paste("~", paste(sigPars, collapse="+"), sep=""))
# Remove insignificant habitat images
sig_hab_vars <- as.list(rep(NA, length(sigPars)))
for(j in 1:length(sigPars)){
sig_hab_vars[[j]] <- hab_vars[[sigPars[j]]]
}
names(sig_hab_vars) <- sigPars
# Fit Cox model with all habitat variables
mod <- kppm(stems, trend=trend, covariates=sig_hab_vars, "LGCP", statistic="pcf", method="mincon",
model="matern", nu=nu, control=list(maxit=10000), improve.type="quasi")
# Add model coefficients to the results dataframe
results[i, sigPars] <- as.numeric(coef(mod))[2:length(coef(mod))]
# Add confidence intervals to results dataframe
modsum <- summary(mod)$coefs.SE.CI[-1,]
results[i, paste0(rownames(modsum), "CI95.lo")] <- modsum$CI95.lo
results[i, paste0(rownames(modsum), "CI95.hi")] <- modsum$CI95.hi
# Add clustering paramters to results dataframe
results[i, c("nu", "sigma2", "alpha")] <- c(nu, mod$par['sigma2'], mod$par['alpha'])
}
}
return(results)
}