/
LWD_Stats.R
executable file
·385 lines (320 loc) · 17.8 KB
/
LWD_Stats.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
##=============================##
##LWD Stats
##Author: Brendan Schuetze
##Copyright MessenSchuetze Lab
##Started July 22, 2015
##=============================##
##Import Libraries
library(ggplot2) #Better Graphs
library(dplyr) #SQL-like Syntax for Datframe Manipulation
library(broom) #Tidier Summaries of Regression Models
library(reshape2) #Dataframe Manipulation
library(stringr) #String Manipulation
library(ggthemes) #Better ggplot2 Themes
printStats <- function(df, mean.Length, mean.Diameter) { #Basic Stats about Data Collected So far
print("================================================================")
cat("\n")
print("Stream Stats Metric")
print(paste("Total Number of LWD Pieces:", nrow(df)))
print(paste("Mean Length", mean.Length / 100, "meters"))
print(paste("Mean Diameter", mean.Diameter, "cm"))
print(paste("Mean Volume", (pi / 4) * ((mean.Diameter / 100) ^ 2) * (mean.Length / 100), "m^3"))
print(paste("Mean Frequency of", nrow(df) / 11.80, "LWD/km")) ##hardcoded number of kilometers traveled, must be changed for other surveys
cat("\n")
print("================================================================")
cat("\n")
print("Distribution of Factor Variables")
print(paste("Ratio of Coniferous to Deciduous to Unknown: ", nrow(filter(df, Species..C.D.U.== "C")), ":", nrow(filter(df, Species..C.D.U.== "D")), ":", nrow(filter(df, Species..C.D.U.== "U"))))
df <- df %>% mutate(Bank = str_sub(df$ID.Number, -1)) ##Split ID number into two parts, number and L/R bank placement columns
print(paste("Ratio of L/R/U Bank Placement: ", table(df$Bank)[1], ":", table(df$Bank)[2], ":", table(df$Bank)[3]))
print(paste("Number of Tagged Pieces:", nrow(filter(df, Tagged == "Y"))))
}
orientChi <- function(df) {
temp <- list()
df$Channel.Orientation <- factor(df$Channel.Orientation)
for(i in 2:5) { ##Split By Quantiles, in order to find distribution percentages for Chi Square
if(i == 2) {
df.temp <- df %>%
filter(Diameter..cm. <= quantile(df$Diameter..cm.)[[i]] & Diameter..cm. >= quantile(df$Diameter..cm.)[[i - 1]]) %>%
select(Channel.Orientation)
}
else {
df.temp <- df %>%
filter(Diameter..cm. <= quantile(df$Diameter..cm.)[[i]] & Diameter..cm. > quantile(df$Diameter..cm.)[[i - 1]]) %>%
select(Channel.Orientation)
}
temp[i-1] <- df.temp
}
temp.first <- table(temp[1])
temp.second <- table(temp[2])
temp.third <- table(temp[3])
temp.fourth <- table(temp[4])
A <- c(temp.first[[1]], temp.second[[1]], temp.third[[1]], temp.fourth[[1]])
B <- c(temp.first[[2]], temp.second[[2]], temp.third[[2]], temp.fourth[[2]])
C <- c(temp.first[[3]], temp.second[[3]], temp.third[[3]], temp.fourth[[3]])
D <- c(temp.first[[4]], temp.second[[4]], temp.third[[4]], temp.fourth[[4]])
temp.dist <- data.frame(A, B, C, D)
##Print Results of Chi Square Analysis
cat("\n")
print("Effect of Diameter on Channel Orientation")
##print(temp.dist)
c <- chisq.test(temp.dist)
#print(c) #Print Chi Square Full Results
hypothesisTest(c$p.value)
for(i in 2:5) { ##Split By Quantiles, in order to find distribution percentages for Chi Square
if(i == 2) {
df.temp <- df %>%
filter(Length..cm. <= quantile(df$Length..cm.)[[i]] & Length..cm. >= quantile(df$Length..cm.)[[i - 1]]) %>%
select(Channel.Orientation)
}
else {
df.temp <- df %>%
filter(Length..cm. <= quantile(df$Length..cm.)[[i]] & Length..cm. > quantile(df$Length..cm.)[[i - 1]]) %>%
select(Channel.Orientation)
}
temp[i-1] <- df.temp
}
temp.first <- table(temp[1])
temp.second <- table(temp[2])
temp.third <- table(temp[3])
temp.fourth <- table(temp[4])
A <- c(temp.first[[1]], temp.second[[1]], temp.third[[1]], temp.fourth[[1]])
B <- c(temp.first[[2]], temp.second[[2]], temp.third[[2]], temp.fourth[[2]])
C <- c(temp.first[[3]], temp.second[[3]], temp.third[[3]], temp.fourth[[3]])
D <- c(temp.first[[4]], temp.second[[4]], temp.third[[4]], temp.fourth[[4]])
temp.dist <- data.frame(A, B, C, D)
##Print Results of Chi Square Analysis
cat("\n")
print("Relationship Between Orientation and Length")
##print(temp.dist)
c <- chisq.test(temp.dist)
##print(c) #Print Chi Square Full Results
hypothesisTest(c$p.value)
}
updateOrient <- function(df) {
cat("\n")
print("================================================================")
cat("\n")
print("Orientation Statistics")
cat("\n")
print("Percentages")
orient <- (table(factor(df$Channel.Orientation)))
print(orient/sum(orient) * 100)
##Chi-Square Test for Orientation versus Diameter and Length
orientChi(df)
cat("\n")
##Distribution of Channel Orientations Graph By Diameter
Channel.Orientation.Graph <- ggplot(data = df) + geom_density(aes(x = Diameter..cm.)) + facet_wrap( ~ Channel.Orientation)
ggsave("Output/Channel.Orientation.Diameter.jpg")
##Distribution of Channel Orientations By Length
levels(df$Channel.Orientation) <- c("A" = "A Orientation", "B" = "B Orientation", "C" = "C Orientation", "D" = "D Orientation")
Channel.Orientation.Graph <- ggplot(data = df) + geom_density(aes(x = Length..cm.)) + facet_wrap( ~ Channel.Orientation) + xlim(200, 1800) + theme_tufte() + ggtitle("Distribution of Lengths by Channel Orientation \n") + xlab("Lengths") + ylab("Density \n")
ggsave("Output/Channel.Orientation.Length.jpg")
}
lengthVersusDiameterGraph <- function(df){ ##Graph Length versus Diameter
ggplot() + geom_point(data = df, aes(x = Diameter..cm., y = Length..cm.)) + ylim(0, 5000) + geom_smooth(data = df, aes(x = Diameter..cm., y = Length..cm.)) + xlim(min(df$Diameter..cm.), 65)
ggsave("Output/Length.Diameter.Scatterplot.jpg")
}
updateSed <- function(df) {
print("================================================================")
cat("\n")
print("Sedimentation & Pool Statistics")
print(paste("Sediment Storage Y/N", table(factor(df$Sediment.Storage..Y.N.))[2], ":", table(factor(df$Sediment.Storage..Y.N.))[1]))
print(paste("Pool Formation Y/N", table(factor(df$Pool.FF..Y.N.))[2], ":", table(factor(df$Pool.FF..Y.N.))[1]))
temp.dist <- data.frame(c(table(filter(df, Channel.Orientation == "A")$Sediment.Storage..Y.N.)[2], table(filter(df, Channel.Orientation == "B")$Sediment.Storage..Y.N.)[2], table(filter(df, Channel.Orientation == "C")$Sediment.Storage..Y.N.)[2], table(filter(df, Channel.Orientation == "D")$Sediment.Storage..Y.N.)[2]), c(table(filter(df, Channel.Orientation == "A")$Sediment.Storage..Y.N.)[1], table(filter(df, Channel.Orientation == "B")$Sediment.Storage..Y.N.)[1], table(filter(df, Channel.Orientation == "C")$Sediment.Storage..Y.N.)[1], table(filter(df, Channel.Orientation == "D")$Sediment.Storage..Y.N.)[1])) #Create Dataframe for Sed
colnames(temp.dist) <- c("Yes", "No")
cat("\n")
print("Effect of Channel Orientation on Sedimentation")
c <- chisq.test(temp.dist)
##print(c) #Print Chi Square Full Results
hypothesisTest(c$p.value)
rm(temp.dist)
temp.dist <- data.frame(c(table(filter(df, Channel.Orientation == "A")$Pool.FF..Y.N.)[2], table(filter(df, Channel.Orientation == "B")$Pool.FF..Y.N.)[2], table(filter(df, Channel.Orientation == "C")$Pool.FF..Y.N.)[2], table(filter(df, Channel.Orientation == "D")$Pool.FF..Y.N.)[2]), c(table(filter(df, Channel.Orientation == "A")$Pool.FF..Y.N.)[1], table(filter(df, Channel.Orientation == "B")$Pool.FF..Y.N.)[1], table(filter(df, Channel.Orientation == "C")$Pool.FF..Y.N.)[1], table(filter(df, Channel.Orientation == "D")$Pool.FF..Y.N.)[1])) #Create Dataframe for Pool Formation
colnames(temp.dist) <- c("Yes", "No")
cat("\n")
print("Effect of Channel Orientation on Pool Formation")
c <- chisq.test(temp.dist)
##print(c) #Print Chi Square Full Results
hypothesisTest(c$p.value)
cat("\n")
}
smallLWD <- function(bad) { ##Identify LWD that do not meet minimum requirements
bad <- df %>%
filter(Length..cm. < 200 | Diameter..cm. < 10)
if(nrow(bad) > 0) {
print("================================================================")
cat("\n")
print(paste("WARNING:", nrow(bad), "Piece(s) of LWD are Below Minimum Requirements. They will be excluded from all statistics."))
print("Their ID(s) are shown below")
print(bad$ID.Number)
cat("\n")
}
}
largeLWD <- function(df) { #Display IDs of LWD > 50cm in diameter
LargeLWD <- df %>%
filter(Diameter..cm. > 50)
if(nrow(LargeLWD) > 0) {
print(paste(nrow(LargeLWD), "pieces of *Large* LWD found"))
print("Their ID(s) are shown below")
print(LargeLWD$ID.Number)
cat("\n")
}
}
hypothesisTest <- function(p) { ##Check if P values are significant
print(paste("P Value:", p))
if(p < 0.05) {
print("#!# Reject Null Hypothesis #!#")
return(TRUE)
} else{
print("### Fail to Reject Null Hypothesis ###")
return(FALSE)
}
}
geomorphologySize <- function(df) {
sedY <- df %>%
filter(Sediment.Storage..Y.N. == "Y") %>%
select(Length..cm., Diameter..cm.) %>%
mutate(Volume..cm. = (pi * ((Diameter..cm. / 2) ^ 2) * Length..cm.))
sedN <- df %>%
filter(Sediment.Storage..Y.N. == "N") %>%
select(Length..cm., Diameter..cm.) %>%
mutate(Volume..cm. = (pi * ((Diameter..cm. / 2) ^ 2) * Length..cm.))
t <- t.test(x = sedN$Volume..cm.,y = sedY$Volume..cm., alternative = "two.sided")
print("Effect of Between Volume on Sedimentation")
hypothesisTest(t$p.value)
cat("\n")
poolY <- df %>% ##Relationship Between Volume and Pool.FF
filter(Pool.FF..Y.N. == "Y") %>%
select(Length..cm., Diameter..cm.) %>%
mutate(Volume..cm. = (pi * ((Diameter..cm. / 2) ^ 2) * Length..cm.))
poolN <- df %>%
filter(Pool.FF..Y.N. == "N") %>%
select(Length..cm., Diameter..cm.) %>%
mutate(Volume..cm. = (pi * ((Diameter..cm. / 2) ^ 2) * Length..cm.))
t <- t.test(x = poolN$Volume..cm.,y = poolY$Volume..cm., alternative = "two.sided")
print("Effect of Between Volume on Pool.FF")
hypothesisTest(t$p.value)
}
positionSize <- function(df) {
df.temp <- df %>% mutate(Num = as.integer(str_sub(df$ID.Number, 0, -2))) %>%
filter(Num < 14) %>%
mutate(Num = Num + nrow(df)) %>%
mutate(Volume..cm. = (pi * ((Diameter..cm. / 2) ^ 2) * Length..cm.))
df <- df %>% mutate(Num = as.integer(str_sub(df$ID.Number, 0, -2))) %>%
filter(Num > 14) %>%
mutate(Volume..cm. = (pi * ((Diameter..cm. / 2) ^ 2) * Length..cm.))
df <- rbind(df, df.temp)
ggplot() + geom_point(data = df, aes(x = Num, y = Diameter..cm.)) + geom_smooth(data = df, aes(x = Num, y = Diameter..cm.)) + xlab("LWD Pieces (lower numbers are closer to Hellgate)") + ylab("Diameter (centimeters)") + ggtitle("Change in Diameter Down River") + ylim(10, 65) + theme_tufte()
ggsave("Output/Position.Diameter.jpg")
ggplot() + geom_point(data = df, aes(x = Num, y = Volume..cm.)) + geom_smooth(data = df, aes(x = Num, y = Volume..cm.)) + xlab("LWD Pieces (lower numbers are closer to Hellgate)") + ylab("Volume (cubic centimeters)") + ggtitle("Change in Volume Down River") + theme_tufte()
ggsave("Output/Position.Volume.jpg")
mod <- tidy(lm(Diameter..cm.*Length..cm. ~ Num, data = df))
cat("\n")
print("================================================================")
cat("\n")
print("Effect of Position Down River on Size")
hypothesisTest(mod$p.value[2])
}
bfStats <- function(df) {
ggplot() + geom_density(data = df, aes(x = X..Bankfull.Channel)) + facet_wrap(~ Channel.Orientation)
ggsave("Output/Bankfull.Percent.Dist.jpg")
ggplot() + geom_violin(data = df, aes(x = factor(Channel.Orientation), y = X..Bankfull.Channel))
ggsave("Output/Bankfull.Percent.Dist.Violin.jpg")
print("Effect of Channel Orientation on Bankfull Percentage")
bf.aov <- aov(X..Bankfull.Channel ~ Channel.Orientation, data = df)
bf.sum <- summary(bf.aov)
hypothesisTest((bf.sum[[1]][["Pr(>F)"]][1]))
}
graphStats <- function(df) {
ggplot() + geom_histogram(data = df, aes(x = Diameter..cm.), fill = "white", color = "black") + geom_vline(xintercept = 23.41, linetype = "longdash") + theme_tufte() + ggtitle("Distribution of Diameters") + xlab("Number of Pieces") + ylab("Density") + xlim(0, 65)
ggsave("Output/Diameter.Density.jpg")
}
channelOrient <- function(df) { ##Graph Frequency of LWD Orientations
df$Channel.Orientation <- factor(df$Channel.Orientation, level = c("A","C","B","D"))
ggplot() + geom_bar(data = df, aes(x = Channel.Orientation)) + theme_tufte() + ggtitle("Orientations of LWD in the Dead Diamond River") + xlab("Channel Orientation") + ylab("Count")
ggsave("Output/Orientation.jpg")
df.temp <- df %>% ##Calculate means of bankfull percentage for each orientation
group_by(Channel.Orientation) %>%
summarise(bfmean = mean(X..Bankfull.Channel))
ggplot() + geom_bar(data = df.temp, aes(x = Channel.Orientation, y = bfmean), stat = "identity") + xlab("Channel Orientation") + ylab("Mean Bankfull Percentage") + ggtitle("Bankfull Percentage by Channel Orientation") + theme_tufte()
ggsave("Output/Bankfull.Channel.Orientation.jpg")
}
volSed <- function(df) {
temp <- list()
for(i in 2:5) { ##Split By Quantiles, in order to find distribution percentages for Chi Square
if(i == 2) {
df.temp <- df %>%
filter(Diameter..cm. <= quantile(df$Diameter..cm.)[[i]] & Diameter..cm. >= quantile(df$Diameter..cm.)[[i - 1]]) %>%
select(Sediment.Storage..Y.N.)
}
else {
df.temp <- df %>%
filter(Diameter..cm. <= quantile(df$Diameter..cm.)[[i]] & Diameter..cm. > quantile(df$Diameter..cm.)[[i - 1]]) %>%
select(Sediment.Storage..Y.N.)
}
temp[i-1] <- df.temp
}
temp.first <- table(temp[1])
temp.second <- table(temp[2])
temp.third <- table(temp[3])
temp.fourth <- table(temp[4])
final <- data.frame("Quartile" = c("First", "Second", "Third", "Fourth"), "Yes" = c(temp.first[2], temp.second[2], temp.third[2], temp.fourth[2]), "No" = c(temp.first[1], temp.second[1], temp.third[1], temp.fourth[1])) %>%
mutate(total = Yes + No)
final$Quartile <- factor(final$Quartile, levels = c("First", "Second", "Third", "Fourth"))
ggplot() + geom_bar(data = final, aes(x = Quartile, y = Yes / total), stat = "identity") + theme_tufte() + ggtitle("Increases in Diameters Coincide with Higher Rates of Sediment Storage") + xlab("\n Diameter, sorted by quartiles") + ylab("Percent Sediment Storing \n")
ggsave("Output/VolSed.jpg")
rm(temp, final)
temp <- list()
for(i in 2:5) { ##Split By Quantiles, in order to find distribution percentages for Chi Square
if(i == 2) {
df.temp <- df %>%
filter(Diameter..cm. <= quantile(df$Diameter..cm.)[[i]] & Diameter..cm. >= quantile(df$Diameter..cm.)[[i - 1]]) %>%
select(Pool.FF..Y.N.)
}
else {
df.temp <- df %>%
filter(Diameter..cm. <= quantile(df$Diameter..cm.)[[i]] & Diameter..cm. > quantile(df$Diameter..cm.)[[i - 1]]) %>%
select(Pool.FF..Y.N.)
}
temp[i-1] <- df.temp
}
temp.first <- table(temp[1])
temp.second <- table(temp[2])
temp.third <- table(temp[3])
temp.fourth <- table(temp[4])
final <- data.frame("Quartile" = c("First", "Second", "Third", "Fourth"), "Yes" = c(temp.first[2], temp.second[2], temp.third[2], temp.fourth[2]), "No" = c(temp.first[1], temp.second[1], temp.third[1], temp.fourth[1])) %>%
mutate(total = Yes + No)
final$Quartile <- factor(final$Quartile, levels = c("First", "Second", "Third", "Fourth"))
ggplot() + geom_bar(data = final, aes(x = Quartile, y = Yes / total), stat = "identity") + theme_tufte() + ggtitle("Increases in Diameters Coincide with Higher Rates of Pool Formation") + xlab("\n Diameter, sorted by quartiles") + ylab("Percent Pool Forming \n")
ggsave("Output/VolPool.jpg")
}
startUpdate <- function(df) {
df <- df %>% ##Exclude any LWD that do not meet minimum requirements
filter(Length..cm. > 200 & Diameter..cm. > 10)
mean.Length = mean(as.numeric((df$Length..cm.)))
mean.Length.ft = mean(as.numeric((df$Length..ft.)))
mean.Diameter = mean(as.numeric((df$Diameter..cm.)))
sink("Output/LWD_Stats.txt")
smallLWD(df) ##Identify Insufficiently Sized LWD Pieces
largeLWD(df) ##Identify Large LWD
printStats(df, mean.Length, mean.Diameter) ##Calculate Basic Statistics
graphStats(df) ##Plot basic LWD statistics
updateOrient(df) ##Calculate Orientation Chi^2 and graph
bfStats(df)
updateSed(df)
lengthVersusDiameterGraph(df) ##Create scatterplot of length and diameter
geomorphologySize(df)
positionSize(df) ##Create graph of average size of LWD down river
channelOrient(df)
volSed(df) ##Create graph of volume versus sediment prevalence
cat("\n")
print("================================================================")
sink()
dev.off()
}
##Startup, Import and Scrub Dataframe
df <- read.csv("lwd.csv", stringsAsFactors = FALSE) %>%
select(-Long, -Lat, -Jam.ID, -Notes) %>%
na.omit() ##Scrub NAs
##Launch Main Program
startUpdate(df)