## Simple Harmonization

In [None]:
# To follow along, run this cell first.

options(warn = -1)
suppressMessages(library(neotoma2))
suppressMessages(library(sf))
suppressMessages(library(geojsonsf))
suppressMessages(library(dplyr))
suppressMessages(library(ggplot2))
suppressMessages(library(leaflet))
options(dplyr.summarise.inform = FALSE)

cz_dl <- readRDS('data/czDownload.RDS')
allSamp <- samples(cz_dl)

**Goal:** 
Group all samples from `Plantago taxa` into one pseudo-taxon called `Plantago`.

**Methods:**

Using `dplyr::mutate()`

Whenever we detect (`str_detect()`) a variablename that starts with Plantago replace it with `"Plantago"`

In [None]:
allSamp %>% 
  dplyr::filter(stringr::str_detect(variablename, "Plantago.*")) %>%
  dplyr::select(variablename, ecologicalgroup)%>%
  dplyr::distinct()

In [None]:
allSamp <- allSamp %>% 
  dplyr::filter(ecologicalgroup %in% c("UPHE", "TRSH")) %>%
  mutate(variablename = replace(variablename, 
                                stringr::str_detect(variablename, "Plantago.*"), 
                                "Plantago"))

head(allSamp, n =3)

In [None]:
allSamp %>% 
  dplyr::filter(stringr::str_detect(variablename, "Plantago.*")) %>%
  dplyr::select(variablename, ecologicalgroup)%>%
  dplyr::distinct()

In [None]:
taxaplots <- taxa(cz_dl)
# Save the taxon list to file so we can edit it subsequently.
readr::write_csv(taxaplots, "data/mytaxontable.csv")

In [None]:
translation <- readr::read_csv("data/taxontable.csv")

In [None]:
allSamp <- samples(cz_dl)

allSamp <- allSamp %>%
  inner_join(translation, by = c("variablename" = "variablename")) %>% 
  dplyr::select(!c("variablename", "sites", "samples")) %>% 
  group_by(siteid, sitename, replacement,
           sampleid, units, age,
           agetype, depth, datasetid,
           long, lat) %>%
  summarise(value = sum(value), .groups='keep')

In [None]:
DT::datatable(head(allSamp, n = 25), rownames = FALSE)