Timeseries Plot For Multiple Model Builds

In [None]:
library(readr)
library(dplyr)
library(tidyr)
library(stringr)
library(magrittr)
library(padr)
library(kableExtra)
library(purrr)
library(ggplot2)
library(ggthemr)
library(here)
library(lubridate)
library(caret)
library(scales)
ggthemr("fresh")
devtools::load_all()

select_season <- "alnu19"


In [None]:
# The following types are being modelled:
# Erle - Alder - Aulne - Alnus
# Birke - Birch - Bouleau - Betula
# Gräser - Grasses - Graminées - Poaceae
# Ambrosia - Ragweed - Ambroisie - Ambrosia

species_all <- tibble(
  taxon = c(
    "Castanea",
    "Alnus",
    "Ulmus",
    "Cupressus",
    "Fraxinus",
    "Fagus",
    "Juglans",
    "Plantago",
    "Corylus",
    "Pinus",
    "Quercus",
    "Rumex",
    "Platanus",
    "Populus",
    "Poaceae",
    "Salix",
    "Betula",
    "Carpinus",
    "Urtica",
    "Taxus",
    "Picea",
    "Ambrosia"
  ),
  hirst_taxon = c(
    "kacasth0",
    "kaalnuh0",
    "kaulmuh0",
    "kacuprh0",
    "kafraxh0",
    "kafaguh0",
    "kajuglh0",
    "khplanh0",
    "kacoryh0",
    "kapinuh0",
    "kaquerh0",
    "khrumeh0",
    "kaplath0",
    "kapopuh0",
    "khpoach0",
    "kasalih0",
    "kabetuh0",
    "kacarph0",
    "khurtih0",
    "kataxuh0",
    "kapiceh0",
    "khambrh0"
  ),
  cosmo_taxon = c(
    NA_character_,
    "ALNU",
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    "POAC",
    NA_character_,
    "BETU",
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    "AMBR"
  ),
  fieldextra_taxon = c(
    NA_character_,
    "ALNU1",
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    "POAC1",
    NA_character_,
    "BETU1",
    NA_character_,
    NA_character_,
    NA_character_,
    NA_character_,
    "AMBR1"
  )
)

species <- species_all %>%
  filter(taxon %in% c("Alnus", "Ambrosia", "Betula", "Poaceae"))

stations <-
  tibble(
    hirst_station = c(
      "PDS",
      "PBU",
      "PMU",
      "PBS",
      "PZH",
      "PLZ",
      "PBE",
      # "PPY",
      "PNE",
      "PVI",
      "PLS",
      "PGE",
      "PCF",
      "PLO",
      # "BLR",
      "PLU"
    ),
    station = c(
      "Wolfgang",
      "Buchs",
      "Münsterlingen",
      "Basel",
      "Zürich",
      "Luzern",
      "Bern",
      # "Payerne",
      "Neuchâtel",
      "Visp",
      "Lausanne",
      "Genève",
      "La-Chaux-de-Fonds",
      "Locarno",
      # "Balerna",
      "Lugano"
    ),
    cosmo_station = c(
      "CHDAVO",
      "CHBUCH",
      "CHMUEN",
      "CHBASE",
      "CHZUER",
      "CHLUZE",
      "CHBERN",
      # NA_character_,
      "CHNEUC",
      "CHVISP",
      "CHLAUS",
      "CHGENE",
      "CHLACH",
      "CHLOCA",
      # NA_character_,
      "CHLUGA"
    )
  ) %>%
  arrange(hirst_station)

In [None]:


data_dwh <- import_data_dwh(paste0(here(), "/data/dwh/pollen_dwh_hourly.txt"))

if (select_season == "alnu20") {
  data_osm <- import_data_cosmo("/scratch/sadamov/wd/20_alnu_osm/mod_pollen_combined.txt", type = "Operational - C1E")
  data_pheno_v1 <- import_data_cosmo("/scratch/sadamov/wd/20_alnu_pheno_v1/mod_pollen_combined.txt", type = "Phenology V1")
  data_pheno_v2 <- import_data_cosmo("/scratch/sadamov/wd/20_alnu_pheno_v2/mod_pollen_combined.txt", type = "Phenology V2")
  data_pheno_v1 %<>% mutate(value = 0.6 * value)
  data_pheno_v2 %<>% mutate(value = 0.6 * value)
} else if (select_season == "alnu19") {
  data_osm <- import_data_cosmo("/scratch/sadamov/wd/19_alnu_osm/mod_pollen_combined.txt", type = "Operational - C1E")
  data_pheno_v1 <- import_data_cosmo("/scratch/sadamov/wd/19_alnu_pheno_v1/mod_pollen_combined.txt", type = "Phenology V1")
  data_pheno_v2 <- import_data_cosmo("/scratch/sadamov/wd/19_alnu_pheno_v2/mod_pollen_combined.txt", type = "Phenology V2")
  data_pheno_v1 %<>% mutate(value = 0.6 * value)
  data_pheno_v2 %<>% mutate(value = 0.6 * value)
} else if (select_season == "betu20") {
  data_osm <- import_data_cosmo("/scratch/sadamov/wd/20_betu_osm/mod_pollen_combined.txt", type = "Operational - C1E")
  data_pheno_v1 <- import_data_cosmo("/scratch/sadamov/wd/20_betu_pheno_v1/mod_pollen_combined.txt", type = "Phenology V1")
  data_pheno_v2 <- import_data_cosmo("/scratch/sadamov/wd/20_betu_pheno_v2/mod_pollen_combined.txt", type = "Phenology V2")
} else if (select_season == "betu19") {
  data_osm <- import_data_cosmo("/scratch/sadamov/wd/19_betu_osm/mod_pollen_combined.txt", type = "Operational - C1E")
  data_pheno_v1 <- import_data_cosmo("/scratch/sadamov/wd/19_betu_pheno_v1/mod_pollen_combined.txt", type = "Phenology V1")
  data_pheno_v2 <- import_data_cosmo("/scratch/sadamov/wd/19_betu_pheno_v2/mod_pollen_combined.txt", type = "Phenology V2")
} else if (select_season == "poac20") {
  data_osm <- import_data_cosmo("/scratch/sadamov/wd/20_poac_osm/mod_pollen_combined.txt", type = "Operational - C1E")
  data_pheno_v1 <- import_data_cosmo("/scratch/sadamov/wd/20_poac_pheno_v1/mod_pollen_combined.txt", type = "Phenology V1")
  data_pheno_v2 <- import_data_cosmo("/scratch/sadamov/wd/20_poac_pheno_v2/mod_pollen_combined.txt", type = "Phenology V2")
} else if (select_season == "poac19") {
  data_osm <- import_data_cosmo("/scratch/sadamov/wd/19_poac_osm/mod_pollen_combined.txt", type = "Operational - C1E")
  data_pheno_v1 <- import_data_cosmo("/scratch/sadamov/wd/19_poac_pheno_v1/mod_pollen_combined.txt", type = "Phenology V1")
  data_pheno_v2 <- import_data_cosmo("/scratch/sadamov/wd/19_poac_pheno_v2/mod_pollen_combined.txt", type = "Phenology V2")
}

data_list <- list(
  dwh = data_dwh,
  osm = data_osm,
  realtime_sdes = data_pheno_v1,
  pheno_v2 = data_pheno_v2
)

data_daily_list <- map(data_list, ~ .x %>%
  aggregate_pollen() %>%
  impute_daily() %>%
  filter(measurement == "concentration"))

data_daily <- data_daily_list %>% bind_rows()

data_daily_comp <- map(data_daily_list[-1], ~ data_daily_list$dwh %>%
  select(date, taxon, station, obs = value) %>%
  right_join(.x, by = c("taxon", "date", "station")) %>%
  select(date, taxon, station, obs, value))

In [None]:
if (select_season == "alnu20") {
  data_osm_sdes <- import_data_cosmo("/scratch/sadamov/wd/20_alnu_osm/mod_sdes_combined.txt", type = "Operational - C1E")
  data_pheno_v2_sdes <- import_data_cosmo("/scratch/sadamov/wd/20_alnu_pheno_v2/mod_sdes_combined.txt", type = "Phenology V2")
  data_pheno_v1_sdes <- import_data_cosmo("/scratch/sadamov/wd/20_alnu_pheno_v1/mod_sdes_combined.txt", type = "Phenology V1")
} else if (select_season == "alnu19") {
  data_osm_sdes <- import_data_cosmo("/scratch/sadamov/wd/19_alnu_osm/mod_sdes_combined.txt", type = "Operational - C1E")
  data_pheno_v1_sdes <- import_data_cosmo("/scratch/sadamov/wd/19_alnu_pheno_v1/mod_sdes_combined.txt", type = "Phenology V1")
  data_pheno_v2_sdes <- import_data_cosmo("/scratch/sadamov/wd/19_alnu_pheno_v2/mod_sdes_combined.txt", type = "Phenology V2")
} else if (select_season == "betu20") {
  data_osm_sdes <- import_data_cosmo("/scratch/sadamov/wd/20_betu_osm/mod_sdes_combined.txt", type = "Operational - C1E")
  data_pheno_v1_sdes <- import_data_cosmo("/scratch/sadamov/wd/20_betu_pheno_v1/mod_sdes_combined.txt", type = "Phenology V1")
  data_pheno_v2_sdes <- import_data_cosmo("/scratch/sadamov/wd/20_betu_pheno_v2/mod_sdes_combined.txt", type = "Phenology V2")
} else if (select_season == "betu19") {
  data_osm_sdes <- import_data_cosmo("/scratch/sadamov/wd/19_betu_osm/mod_sdes_combined.txt", type = "Operational - C1E")
  data_pheno_v1_sdes <- import_data_cosmo("/scratch/sadamov/wd/19_betu_pheno_v1/mod_sdes_combined.txt", type = "Phenology V1")
  data_pheno_v2_sdes <- import_data_cosmo("/scratch/sadamov/wd/19_betu_pheno_v2/mod_sdes_combined.txt", type = "Phenology V2")
} else if (select_season == "poac20") {
  data_osm_sdes <- import_data_cosmo("/scratch/sadamov/wd/20_poac_osm/mod_sdes_combined.txt", type = "Operational - C1E")
  data_pheno_v1_sdes <- import_data_cosmo("/scratch/sadamov/wd/20_poac_pheno_v1/mod_sdes_combined.txt", type = "Phenology V1")
  data_pheno_v2_sdes <- import_data_cosmo("/scratch/sadamov/wd/20_poac_pheno_v2/mod_sdes_combined.txt", type = "Phenology V2")
} else if (select_season == "poac19") {
  data_osm_sdes <- import_data_cosmo("/scratch/sadamov/wd/19_poac_osm/mod_sdes_combined.txt", type = "Operational - C1E")
  data_pheno_v1_sdes <- import_data_cosmo("/scratch/sadamov/wd/19_poac_pheno_v1/mod_sdes_combined.txt", type = "Phenology V1")
  data_pheno_v2_sdes <- import_data_cosmo("/scratch/sadamov/wd/19_poac_pheno_v2/mod_sdes_combined.txt", type = "Phenology V2")
}

data_pheno_v2_sdes_list <- list()
data_pheno_v1_sdes_list <- list()
data_osm_sdes_list <- list()

data_osm_sdes %<>%
  aggregate_pollen() %>%
  impute_daily()

for (stn in stations$station) {
  max_value <- data_daily_list$osm %>%
    filter(station == stn) %>%
    pull(value) %>%
    max()
  data_osm_sdes_list[[stn]] <- data_osm_sdes %>%
    filter(station == stn) %>%
    mutate(value = rescale(value, to = c(0, 1.2 * max_value)))
}

data_osm_sdes <- data_osm_sdes_list %>%
  bind_rows()

data_pheno_v2_sdes %<>%
  aggregate_pollen() %>%
  impute_daily()

for (stn in stations$station) {
  max_value <- data_daily_list$osm %>%
    filter(station == stn) %>%
    pull(value) %>%
    max()
  data_pheno_v2_sdes_list[[stn]] <- data_pheno_v2_sdes %>%
    filter(station == stn) %>%
    mutate(value = rescale(value, to = c(0, 1.2 * max_value)))
}

data_pheno_v2_sdes <- data_pheno_v2_sdes_list %>%
  bind_rows()
  
  
data_pheno_v1_sdes %<>%
  aggregate_pollen() %>%
  impute_daily()

for (stn in stations$station) {
  max_value <- data_daily_list$osm %>%
    filter(station == stn) %>%
    pull(value) %>%
    max()
  data_pheno_v1_sdes_list[[stn]] <- data_pheno_v1_sdes %>%
    filter(station == stn) %>%
    mutate(value = rescale(value, to = c(0, 1.2 * max_value)))
}

data_pheno_v1_sdes <- data_pheno_v1_sdes_list %>%
  bind_rows()
    

In [None]:
taxon_selected <- data_list$osm %>%
  pull(taxon) %>%
  unique()
start_date <- data_list$osm %>%
  pull(date) %>%
  min()
end_date <- data_list$osm %>%
  pull(date) %>%
  max()
year_selected <- year(start_date)

map(stations$station, ~
data_daily %>%
  filter(
    taxon == taxon_selected,
    station == .x,
    between(date, start_date, end_date)
  ) %>%
  ggplot() +
  geom_line(aes(x = date, y = value, col = type), alpha = 0.6) +
  geom_point(aes(x = date, y = value, col = type, shape = type), alpha = 0.4) +
  geom_line(data = data_osm_sdes %>%
    filter(
      taxon == taxon_selected,
      station == .x,
      between(date, start_date, end_date)
    ), aes(x = date, y = value), col = "#76d3de", size = 1.5, alpha = 0.3, lty = 3) +
  geom_line(data = data_pheno_v2_sdes %>%
    filter(
      taxon == taxon_selected,
      station == .x,
      between(date, start_date, end_date)
    ), aes(x = date, y = value), col = "#ec7d64", size = 1.5, alpha = 0.3, lty = 3) +
  geom_line(data = data_pheno_v1_sdes %>%
    filter(
      taxon == taxon_selected,
      station == .x,
      between(date, start_date, end_date)
    ), aes(x = date, y = value), col = "#f4b258", size = 1.5, alpha = 0.3, lty = 3) +
  scale_color_manual(values = c("#404040", "#76d3de", "#f4b258", "#ec7d64", "#6eb148", "#bb809a", "#52cdb5")) +
  # scale_color_manual(values = swatch()[c(3, 2, 4, 5, 9, 7)]) +
  theme(legend.position = "bottom", legend.title = element_blank()) +
  xlab(year_selected) +
  ylab(paste("Daily", taxon_selected, "Concentration [m^-3]")) +
  ggtitle(paste0("Daily ", taxon_selected, " Concentrations in ", .x, " - ", year_selected, " Season")))

# ggsave(paste0(here(), "/vignettes/figures/timeseries.png"), gg_timeseries, width = 24, height = 13.5, dpi = 300, units = c("cm"))

In the following we are looking at several numeric and categoric metrics to compare the new model builds.

In [None]:
metrics_numeric <- map(data_daily_comp, ~ .x  %>%
  filter(obs >= 10) %>% 
  summarise(
    R2 = cor(value, obs, use = "complete.obs")^2,
    ME = mean(abs((value - obs)), na.rm = TRUE),
    SDME = sd(abs((value - obs)), na.rm = TRUE),
    MSE = mean((value - obs)^2, na.rm = TRUE),
    RMSE = sqrt(MSE),
    MSLE = mean((log(1 + value) - log(1 + obs))^2, na.rm = TRUE),
    RMSLE = sqrt(MSLE),
    MAE = mean(abs(value - obs), na.rm = TRUE)
  ))

metrics_numeric %<>%
  bind_rows() %>%
  mutate(model = names(metrics_numeric))

mse_baseline <- metrics_numeric %>%
  filter(model == "osm") %>%
  pull(MSE)

metrics_numeric %<>%
  mutate("Rel. MSE" = MSE / mse_baseline)

metrics_numeric  %>%
  arrange(MSE) %>%
  kable() %>%
  kable_styling("striped", full_width = FALSE) %>%
  as.character() %>%
  IRdisplay::display_html()


Reference	
Predicted	Event	No Event
Event	A	B
No Event	C	D
The formulas used here are:

Sensitivity = A/(A+C)

Specificity = D/(B+D)

Prevalence = (A+C)/(A+B+C+D)

PPV = (sensitivity * prevalence)/((sensitivity*prevalence) + ((1-specificity)*(1-prevalence)))

NPV = (specificity * (1-prevalence))/(((1-sensitivity)*prevalence) + ((specificity)*(1-prevalence)))

Detection Rate = A/(A+B+C+D)

Detection Prevalence = (A+B)/(A+B+C+D)

Balanced Accuracy = (sensitivity+specificity)/2

Precision = A/(A+B)

Recall = A/(A+C)

F1 = (1+beta^2)*precision*recall/((beta^2 * precision)+recall)

In [None]:
data_valid <- map(data_daily_comp, ~ .x %>%
  mutate(
    conc_obs = case_when(
      taxon == "Alnus" & obs < 1 ~ "nothing",
      taxon == "Alnus" & obs >= 1 & obs <= 10 ~ "weak",
      taxon == "Alnus" & obs >= 11 & obs <= 69 ~ "medium",
      taxon == "Alnus" & obs >= 70 & obs <= 249 ~ "strong",
      taxon == "Alnus" & obs >= 250 ~ "verystrong",
      taxon == "Betula" & obs < 1 ~ "nothing",
      taxon == "Betula" & obs >= 1 & obs <= 10 ~ "weak",
      taxon == "Betula" & obs >= 11 & obs <= 69 ~ "medium",
      taxon == "Betula" & obs >= 70 & obs <= 299 ~ "strong",
      taxon == "Betula" & obs >= 300 ~ "verystrong",
      taxon == "Poaceae" & obs < 1 ~ "nothing",
      taxon == "Poaceae" & obs >= 1 & obs <= 19 ~ "weak",
      taxon == "Poaceae" & obs >= 20 & obs <= 49 ~ "medium",
      taxon == "Poaceae" & obs >= 50 & obs <= 149 ~ "strong",
      taxon == "Poaceae" & obs >= 150 ~ "verystrong",
      taxon == "Ambrosia" & obs < 1 ~ "nothing",
      taxon == "Ambrosia" & obs >= 1 & obs <= 5 ~ "weak",
      taxon == "Ambrosia" & obs >= 6 & obs <= 10 ~ "medium",
      taxon == "Ambrosia" & obs >= 11 & obs <= 39 ~ "strong",
      taxon == "Ambrosia" & obs >= 40 ~ "verystrong"
    ),
    conc_value = case_when(
      taxon == "Alnus" & value < 1 ~ "nothing",
      taxon == "Alnus" & value >= 1 & value <= 10 ~ "weak",
      taxon == "Alnus" & value >= 11 & value <= 69 ~ "medium",
      taxon == "Alnus" & value >= 70 & value <= 249 ~ "strong",
      taxon == "Alnus" & value >= 250 ~ "verystrong",
      taxon == "Betula" & value < 1 ~ "nothing",
      taxon == "Betula" & value >= 1 & value <= 10 ~ "weak",
      taxon == "Betula" & value >= 11 & value <= 69 ~ "medium",
      taxon == "Betula" & value >= 70 & value <= 299 ~ "strong",
      taxon == "Betula" & value >= 300 ~ "verystrong",
      taxon == "Poaceae" & value < 1 ~ "nothing",
      taxon == "Poaceae" & value >= 1 & value <= 19 ~ "weak",
      taxon == "Poaceae" & value >= 20 & value <= 49 ~ "medium",
      taxon == "Poaceae" & value >= 50 & value <= 149 ~ "strong",
      taxon == "Poaceae" & value >= 150 ~ "verystrong",
      taxon == "Ambrosia" & value < 1 ~ "nothing",
      taxon == "Ambrosia" & value >= 1 & value <= 5 ~ "weak",
      taxon == "Ambrosia" & value >= 6 & value <= 10 ~ "medium",
      taxon == "Ambrosia" & value >= 11 & value <= 39 ~ "strong",
      taxon == "Ambrosia" & value >= 40 ~ "verystrong"
    )
  ) %>%
  mutate_at(
    vars(conc_obs, conc_value),
    ~ factor(., levels = c("nothing", "weak", "medium", "strong", "verystrong"))) %>% 
  filter(!(conc_obs %in% c("nothing", "weak"))))

confusion_matrix <- map(data_valid, ~ confusionMatrix(.x$conc_value, .x$conc_obs))

metrics_categoric  <- confusion_matrix %>%
  map(~ .x$overall[1:2]) %>%
  bind_rows() %>%
  mutate(model = names(confusion_matrix))

kappa_baseline <- metrics_categoric %>%
  filter(model == "osm") %>%
  pull(Kappa)

accuracy_baseline <- metrics_categoric %>%
  filter(model == "osm") %>%
  pull(Accuracy)

kable_metrics_categoric <- metrics_categoric %>%
  mutate("Rel. Accuracy" = Accuracy / accuracy_baseline,
       "Rel. Kappa" = Kappa / kappa_baseline) %>%
  arrange(desc(Kappa)) %>%
  kable() %>%
  kable_styling("striped", full_width = FALSE)

kable_metrics_categoric %>%
  as.character() %>%
  IRdisplay::display_html()

In [None]:
confusion_matrix

In [None]:
save_kable(kable_metrics_categoric, file = paste0(here(), "/vignettes/tables/test.html"))