In [None]:
# Parameters
param_data_filename <- "Template_MBO_Example_raw.xlsx"
param_metadata_sheet <- "METADATA"
param_data_sheet <- "BIRDS"

conf_temporary_directory <- list("tmp", "data")
conf_temporary_directory_path <- paste(conf_temporary_directory[1], conf_temporary_directory[2], sep="/")

In [None]:
# Data Cleaner
# ---
# NaaVRE:
#  cell:
#   inputs: []
# ...
library("readxl")
library(dplyr)

# Load data & metadata
metadata <- read_excel(param_data_filename, sheet = param_metadata_sheet) #Load metadata sheet
data <- read_excel(param_data_filename, sheet = param_data_sheet) #Load data sheet

# Create a table with sites with more that 7 sampling years
sites <- data %>% 
  group_by(siteid) %>%
  summarise(nyear = n_distinct(substr(datecollected, 1, 4))) %>%
  filter(nyear > 7)

md <- merge(metadata,sites, by = "siteid")

# Keep sites within the study area [our boundaries are latitude (25:90), longitude (-45:70)]
md <- filter(md, decimallatitude >= 25, decimallatitude <= 90, 
                 decimallongitude >= -45, decimallongitude <= 70)

data <- data %>% # Keep data from these sites
  filter(siteid %in% md$siteid)

# Check that depth is ~constant 
#(in this case is not necessary, but for other taxonomic groups is possible that the sample were taken at different depths)
for (i in names(table(data$siteid))){
  x <- filter(data, siteid == i)
  print(table(x$maximumdepthinmeters))
}

# Check sampling dates
for (i in names(table(data$siteid))){
  x <- filter(data, siteid == i)
  print(table(x$datecollected))
}

# In this case, most of the sampling campaigns were conducted in winter
# One was conducted in summer and should be removed since the sampling season is not consistent

data$month <- as.numeric(format(data$datecollected, "%m")) # Create a column with the sampling month

data <- data %>%
  filter(!month %in% c(8)) #Remove those samples in non-consistent seasons (summer in this case)

# Note that some time series can have more than one sampling campaign per year and even per season (not in this case)
# For our analysis, we are only keeping one sampling campaign per year


# Update the table with sites with more that 7 sampling years
# After removing inconsistent sampling campaigns, some time series may become shorter than 8 years
sites <- data %>% 
  group_by(siteid) %>%
  summarise(nyear = n_distinct(substr(datecollected, 1, 4))) %>%
  filter(nyear > 7)

data <- data %>% # Keep data from these sites
  filter(siteid %in% md$siteid)
md <- md %>% # Keep metadata from these sites
  filter(siteid %in% md$siteid)

md_final <- md[,c(1:8)]
data_final <- data[,c(1:15)]

# Create filenames
metadata_filename <- ""
data_filename <- ""
metadata_filename <- paste(conf_temporary_directory_path, "metadata_Example.csv", sep="/")
data_filename <- paste(conf_temporary_directory_path, "data_Example.csv", sep="/")

# Ensure the temporary data storage directory exists
nested_dir <- file.path(conf_temporary_directory[1], conf_temporary_directory[2])

# Check and create the nested directory
if (!dir.exists(nested_dir)) {
  dir.create(nested_dir, recursive = TRUE)
}

write.csv(md_final, file = metadata_filename)
write.csv(data_final, file = data_filename)


In [None]:
# Trend analyzer
# ---
# NaaVRE:
#  cell:
#   inputs:
#    - metadata_filename: String
#    - data_filename: String
# ...

library(vegan)
library(dplyr)
library(ggplot2)
library(nlme)

# Load cleaned data & metadata
md <- read.csv(metadata_filename, sep = ",")
data <- read.csv(data_filename, sep = ",")
data$year <- as.numeric(format(as.Date(data$datecollected), "%Y"))
colnames(data)

# Calculate community metrics
data.tax <- data %>%
  group_by(siteid, year, datecollected) %>%
  summarise(richness = n_distinct(taxaname[parameter_value > 0]), # Richness
            parameter_value_tot = sum(parameter_value), # Abundance estimate
            parameter = unique(parameter),
            parameter_standardunit = unique(parameter_standardunit),
            diversity = diversity(parameter_value, index="shannon"), # Diversity
            )

# Temporal analysis. Example with Richness and these 2 time series
results.richness <- data.frame(siteid = character(0), slope = numeric(0), p = numeric(0))

for (i in names(table(data.tax$siteid))) {
  x <- subset(data.tax, siteid == i)
  # We used GLS models taking into account the temporal autocorrelation
  gls_model <- gls(log10(richness+1) ~ year, data = x, correlation = corAR1(form = ~ 1 | year))
  slope <- coef(gls_model)[2]
  p <- summary(gls_model)$tTable[2, 4]

  # Save results
  results.richness <- rbind(results.richness, data.frame(siteid = i, slope = slope, p = p))
}

print(results.richness)

# In this example the second site showed a significant decrease in Richness over time (p<0.05)

final_results <- merge(md,results.richness, by = "siteid"); final_results
