In [None]:
options(repr.matrix.max.rows = 600, repr.matrix.max.cols = 200)

In [None]:
renv::load(here::here())
library(readr)
library(dplyr)
library(tidyr)
library(stringr)
library(magrittr)
library(padr)
library(tibble)
library(scales)
library(kableExtra)
library(purrr)
library(ggplot2)
library(ggthemr)
library(here)
library(lubridate)
library(animation)
ggthemr("fresh")
devtools::load_all()


In [None]:
load(paste0(here(), "/data/other/species.RData"))
load(paste0(here(), "/data/other/stations.RData"))

In [None]:
species_sel <- "alnu"
model_sel <- "pheno_v6"
year <- "20"
path <- paste0("/scratch/sadamov/wd/", year, "_", species_sel, "_", model_sel, "/")
path_osm <- paste0("/scratch/sadamov/wd/", year, "_", species_sel, "_osm/")

In [None]:
data_t2m <- import_data_dwh(paste0(here(), "/data/dwh/t2m_dwh_daily.txt"), parameter = "t2m") %>%
  # Maybe only retrieve latest 10 years for a more up-to-date picture
  # filter(date >= as.Date("2010-01-01")) %>%
  filter(date > as.Date("2010-01-01")) %>%
  mutate(month = month(date), day = day(date)) %>%
  mutate(month = if_else(month == 12, 0, month)) %>%
  group_by(station, month, day) %>%
  summarise(t2m = round(mean(value), 4), .groups = "drop") %>%
  arrange(month, day) %>%
  group_by(station) %>%
  mutate(weight = row_number()) %>%
  ungroup()

if (species_sel == "alnu"){
  data_t2m %<>%
    mutate(weight = pmax(0, weight - 14))
}

if (species_sel == "betu"){
  data_t2m %<>%
    mutate(weight = pmax(0, weight - 40))
}

if (species_sel == "poac"){
  data_t2m %<>%
    mutate(weight = pmax(0, weight - 46))
}

In [None]:
data_dwh <- import_data_dwh(paste0(here(), "/data/dwh/pollen_dwh_hourly.txt")) %>%
  shift_hours() %>%
  aggregate_pollen() %>%
  impute_daily()

data_osm <- import_data_cosmo(paste0(path_osm, "mod_sdes_combined.txt"), type = "Phenology") %>%
  shift_hours() %>%
  aggregate_pollen() %>%
  impute_daily()

data_sdes  <- import_data_cosmo(paste0(path, "mod_sdes_combined.txt"), type = "Phenology") %>%
  shift_hours() %>%
  aggregate_pollen() %>%
  impute_daily()
data_saisn <- import_data_cosmo(paste0(path, "mod_saisn_combined.txt"), type = "Phenology") %>%
  shift_hours() %>%
  aggregate_pollen() %>%
  impute_daily()
data_tthrs <- import_data_cosmo(paste0(path, "mod_tthrs_combined.txt"), type = "Phenology") %>%
  shift_hours() %>%
  aggregate_pollen() %>%
  impute_daily()
data_ctsum <- import_data_cosmo(paste0(path, "mod_ctsum_combined.txt"), type = "Phenology") %>%
  shift_hours() %>%
  aggregate_pollen() %>%
  impute_daily()

if (species_sel == "poac"){
data_saisl <- import_data_cosmo(paste0(path, "mod_saisl_combined.txt"), type = "Phenology") %>%
  shift_hours() %>%
  aggregate_pollen() %>%
  impute_daily()
} else {
data_tthre <- import_data_cosmo(paste0(path, "mod_tthre_combined.txt"), type = "Phenology") %>%
  shift_hours() %>%
  aggregate_pollen() %>%
  impute_daily()
}

invisible(
data_dwh %<>% filter(
  between(date, min(data_sdes$date), max(data_sdes$date)),
  taxon == unique(data_sdes$taxon)
))

data_pheno <- data_sdes %>%
  select(date, station, sdes = value) %>%
  left_join(data_saisn %>%
    select(date, station, saisn = value),
    by = c("date", "station")) %>%
  left_join(data_tthrs %>%
    select(date, station, tthrs = value),
    by = c("date", "station")) %>%
  left_join(data_ctsum %>%
    select(date, station, ctsum = value),
    by = c("date", "station"))

if (species_sel == "poac"){
  data_pheno %<>%
  left_join(data_saisl %>%
    select(date, station, saisl = value),
    by = c("date", "station"))
} else {
  data_pheno %<>%
  left_join(data_tthre %>%
    select(date, station, tthre = value),
    by = c("date", "station"))
}


data_pheno %<>%
  mutate(month = month(date), day = day(date)) %>%
  left_join(data_t2m, by = c("station", "month", "day")) %>%
  # In Wolfgang the season end is never reached (tthre too high?)
  filter(!station %in% c("Wolfgang", "La-Chaux-de-Fonds"))


In [None]:
data_pheno %>%
filter(station == "Zürich")

In [None]:
t_base <- case_when(
  species_sel == "alnu" ~ 5.3,
  species_sel == "betu" ~ 9,
  species_sel == "poac" ~ 3
)

data_pheno %<>%
  group_by(station) %>%
  mutate(
    t2m = if_else(t2m < t_base, 0, t2m),
    t2m_weighted = t2m * weight,
    t2m_actual = if_else(is.na(lag(ctsum)), 0, (ctsum - lag(ctsum)) / weight),
    t2m_actual = if_else(t2m_actual < t_base, 0, t2m_actual),
    t2m_actual_weighted = t2m_actual * weight,
    change_tthrs = if_else(is.na(lag(tthrs)), 0, tthrs - lag(tthrs)),
    col_tthrs = case_when(
      change_tthrs < 0 ~ "#e23d37",
      change_tthrs == 0 ~ "#161414",
      change_tthrs > 0 ~ "#4b4bc5"
    ),
    change_t2m = if_else(t2m_actual == 0, "< T_Base", ""),
    log_t2m = if_else(t2m_actual == 0, TRUE, FALSE)
  ) %>%
  {
    if (species_sel != "poac") {
      mutate(.,
        change_tthre = if_else(is.na(lag(tthrs)), 0, tthrs - lag(tthrs)),
        col_tthre = case_when(
          change_tthre < 0 ~ "#e23d37",
          change_tthre == 0 ~ "#161414",
          change_tthre > 0 ~ "#4b4bc5"
        )
      )
    } else {
      mutate(.,
        change_saisl = if_else(is.na(lag(saisl)), 0, saisl - lag(saisl)),
        col_saisl = case_when(
          change_saisl < 0 ~ "#e23d37",
          change_saisl == 0 ~ "#161414",
          change_saisl > 0 ~ "#4b4bc5"
        )
      )
    }
  }

In [None]:

if (species_sel == "alnu") {
  curves_pheno <- data_pheno %>%
    split(.$station) %>%
    # Looping through the stations
    map(~ .x %>%
      # Looping through the days
      pmap(function(...) {
        current <- tibble(...)
        
        days_delayed_actual <- data_pheno %>%
          filter(station == current$station) %>%
          mutate(
            days_delayed = if_else(
              date <= current$date & t2m_actual < t_base & saisn > 0, 1, 0)
          ) %>%
          pull(days_delayed) %>%
          sum()
        
        helper <- data_pheno %>%
          filter(station == current$station) %>%
          mutate(
            tthrs_dummy = case_when(
              date < current$date & current$saisn <= 0 ~ 0,
              date == current$date & current$saisn <= 0 ~ ctsum,
              date > current$date & current$saisn <= 0 ~ t2m_weighted,
              TRUE ~ 0 # will be overwritten with ctsum
            ),
            tthre_dummy = case_when(
              date < current$date ~ 0,
              date == current$date ~ ctsum,
              date > current$date ~ t2m_weighted
            ),
            # Vectorize the condition
            current_status = if_else(current$saisn > 0 | current$ctsum >= current$tthrs, TRUE, FALSE),
            t2m_threshold = if_else(current_status, ctsum, cumsum(tthrs_dummy)),
            in_season = if_else(t2m_threshold >= current$tthrs | (saisn > 0 & date <= current$date), 1, 0),
            day_in_season = cumsum(in_season),
            day_in_season_actual = pmax(0, day_in_season - days_delayed_actual),
            tthre_threshold = if_else(date <= current$date, ctsum, cumsum(tthre_dummy))
          )

        day_in_season <- helper %>%
          arrange(date) %>%
          pull(day_in_season_actual)

        saisl <-helper %>%
            filter(tthre_threshold >= current$tthre) %>%
            arrange(date) %>%
            slice(1) %>%
            pull(day_in_season_actual)

        # if (current$station == "Lugano" & current$date >= as.Date("2020-01-11") & current$date <= as.Date("2020-01-15")) {
        #   print(current$station)
        #   print(current$date)
        #   print(saisl)
        #   print(days_delayed_actual)
        #   View(helper)
        # }

        tibble(
          date = unique(data_pheno$date),
          value =
            # ((1.16) / (1 + exp((-0.676 * 42.788 / saisl) *
            #   (day_in_season - 12.457 * saisl / 42.788)))) *
            # ((1.16) / (1 + exp((0.193 * 42.788 / saisl) *
            #   (day_in_season - 23.435 * saisl / 42.788)))),
            ((1.61) / (1 + exp((-0.272 * 42.788 / saisl) *
              (day_in_season - 19.450 * saisl / 42.788)))) *
            ((1.61) / (1 + exp((0.213 * 42.788 / saisl) *
              (day_in_season - 23.435 * saisl / 42.788)))),
          old_cosmo = (exp(-0.152 * saisl + 6) + 1) * (1 / (1 + exp(- day_in_season * 0.3 + 7))) *
             (1 / (1 + exp(17 / saisl * day_in_season - 12)) - 0.005)
        ) %>%
          mutate(
            value = rescale(value, to = c(0, 1)),
            old_cosmo = rescale(old_cosmo, to = c(0, 1))
          )
      }) %>%
      setNames(unique(data_pheno$date)))
}

In [None]:

if (species_sel == "betu") {
  curves_pheno <- data_pheno %>%
    split(.$station) %>%
    # Looping through the stations
    map(~ .x %>%
      # Looping through the days
      pmap(function(...) {
        current <- tibble(...)

        days_delayed_actual <- data_pheno %>%
          filter(station == current$station) %>%
          mutate(
            days_delayed = if_else(
              date <= current$date & t2m_actual < t_base & saisn > 0, 1, 0)
          ) %>%
          pull(days_delayed) %>%
          sum()
        
        helper <- data_pheno %>%
          filter(station == current$station) %>%
          mutate(
            tthrs_dummy = case_when(
              date < current$date & current$saisn <= 0 ~ 0,
              date == current$date & current$saisn <= 0 ~ ctsum,
              date > current$date & current$saisn <= 0 ~ t2m_weighted,
              TRUE ~ 0 # will be overwritten with ctsum
            ),
            tthre_dummy = case_when(
              date < current$date ~ 0,
              date == current$date ~ ctsum,
              date > current$date ~ t2m_weighted
            ),
            # Vectorize the condition
            current_status = if_else(current$saisn > 0 | current$ctsum >= current$tthrs, TRUE, FALSE),
            t2m_threshold = if_else(current_status, ctsum, cumsum(tthrs_dummy)),
            in_season = if_else(t2m_threshold >= current$tthrs | (saisn > 0 & date <= current$date), 1, 0),
            day_in_season = cumsum(in_season),
            day_in_season_actual = pmax(0, day_in_season - days_delayed_actual),
            tthre_threshold = if_else(date <= current$date, ctsum, cumsum(tthre_dummy))
          )

        day_in_season <- helper %>%
          arrange(date) %>%
          pull(day_in_season_actual)

        saisl <-helper %>%
            filter(tthre_threshold >= current$tthre) %>%
            arrange(date) %>%
            slice(1) %>%
            pull(day_in_season_actual)

        tibble(
          date = unique(data_pheno$date),
          value =
            ((1.01) / (1 + exp((-0.863 * 32.266 / saisl) *
              (day_in_season - 5.626 * saisl / 32.266)))) *
              ((1.01) / (1 + exp((0.392 * 32.266 / saisl) *
                (day_in_season - 22.856 * saisl / 32.266)))),
          old_cosmo = 2 * (1 / (1 + exp(-day_in_season * 0.5)) - 0.5) * exp(-((0.08 * day_in_season - 0.8) *
            (0.08 * day_in_season - 0.8))) * (1 / (1 + exp((log(39) + 8) / saisl * day_in_season - 8)) - 0.025)
        ) %>%
          mutate(
            value = rescale(value, to = c(0, 1)),
            old_cosmo = rescale(old_cosmo, to = c(0, 1))
          )
      }) %>%
      setNames(unique(data_pheno$date)))
}

In [None]:
if (species_sel == "poac") {
  curves_pheno <- data_pheno %>%
    split(.$station) %>%
    # Looping through the stations
    map(~ .x %>%
      # Looping through the days
      pmap(function(...) {
        current <- tibble(...)

        day_in_season <- data_pheno %>%
          filter(station == current$station) %>%
          mutate(
            t2m_dummy = case_when(
              date < current$date & current$saisn <= 0 ~ 0,
              date == current$date & current$saisn <= 0 ~ ctsum,
              date > current$date & current$saisn <= 0 ~ t2m_weighted
            ),
            # Vectorize the condition
            current_status = if_else(current$ctsum >= current$tthrs | current$saisn > 0, TRUE, FALSE),
            t2m_threshold = if_else(current_status, ctsum, cumsum(t2m_dummy)),
            in_season = if_else(t2m_threshold >= current$tthrs, 1, 0),
            day_in_season = cumsum(in_season)
          ) %>%
          pull(day_in_season)

        tibble(
          date = unique(data_pheno$date),
          value =
            ((1.01) / (1 + exp((-0.275 * 86.538 / current$saisl) *
              (day_in_season - 16.878 * current$saisl / 86.538)))) *
              ((1.01) / (1 + exp((0.156 * 86.538 / current$saisl) *
                (day_in_season - 60.440 * current$saisl / 86.538)))),
          old_cosmo = 1.03 * (1 / (1 + exp(-0.3 * day_in_season + 3.5)) - 0.03) *
            (1 / (1 + exp((log(499) + 8) / current$saisl * -7)) - 0.001)
        ) %>%
          mutate(
            value = rescale(value, to = c(0, 1)),
            old_cosmo = rescale(old_cosmo, to = c(0, 1))
          )
      }) %>%
      setNames(unique(data_pheno$date)))
}

In [None]:

dates <- unique(data_pheno$date)
# dates <- as.Date("2020-02-26")
station_sel <- unique(data_pheno$station)
station_sel <- "Zürich"

plot_collection <- map(station_sel, function(station_){
    map(dates, function(date_) {
    data_dwh %>%
      filter(station == station_) %>%
      mutate(value = rescale(value, to = c(0, 1))) %>%
      filter(date <= date_) %>%
      ggplot() +
      ylim(c(0, 1)) +
      scale_x_date(
        date_breaks = "1 month",
        date_labels = "%B",
        limits = c(min(data_pheno$date), max(data_pheno$date))
      ) +
      ylab("") +
      xlab(min(year(data_pheno$date))) +
      ggtitle(paste("Season Description in Cosmo -", toupper(station_), year(date_))) +
      geom_col(aes(x = date, y = value), col = "#1ba9ade7", fill = "#1ba9ade7") +
      geom_line(
        data = curves_pheno[[station_]][[as.character(date_)]],
        aes(x = date, y = value),
        col = "#f5c04e"
      ) +
      geom_line(
        data = curves_pheno[[station_]][[as.character(date_)]],
        aes(x = date, y = old_cosmo),
        col = "#8f48d1",
        lty = 3,
        alpha = 0.6
      ) +
      geom_line(
        data = curves_pheno[[station_]][[as.character(min(dates))]],
        aes(x = date, y = value),
        col = "#f15236",
        alpha = 0.6
      ) +
      geom_point(
        data = data_pheno %>%
          filter(station == station_, date <= date_),
        aes(x = date, y = sdes),
        col = "#6b4900",
        shape = 8
      )+
      {if (date_ == max(data_pheno$date))
      geom_point(
        data = data_osm %>%
          filter(station == station_, date <= date_),
        aes(x = date, y = value),
        col = "#620288",
        shape = 8
      )} +
        geom_label(data = data_pheno %>%
          filter(station == station_, date == date_), aes(label = paste("tthrs:", round(change_tthrs)), color = col_tthrs), x = min(data_pheno$date), hjust = "inward", y = 0.95, show.legend = FALSE) +
        geom_label(data = data_pheno %>%
          filter(station == station_, date == date_), aes(label = paste("tthre:", round(change_tthre)), color = col_tthre), x = min(data_pheno$date), hjust = "inward", y = 0.9, show.legend = FALSE) +
        {if (data_pheno %>% filter(station == station_, date == date_) %>% pull(log_t2m))
        geom_label(data = data_pheno %>%
          filter(station == station_, date == date_), aes(label = paste(change_t2m)), x = min(data_pheno$date), color = "#e23d37", hjust = "inward", y = 0.85, show.legend = FALSE)
        } +
        geom_label(data = data_pheno %>%
          filter(station == station_, date == date_), aes(label = paste("Day in Season:", round(saisn)), color = "#161414"), x = min(data_pheno$date), hjust = "inward", y = 0.8, show.legend = FALSE)
  })
}) %>%
set_names(station_sel)

In [None]:
walk(station_sel, function(station_) {
  plot_selection <- plot_collection[[station_]]
  saveGIF(
    {
      for (i in seq_along(plot_selection)) {
        print(plot_selection[[i]])
      }
    },
    movie.name = paste0(here(), "/vignettes/figures/Phenology", station_, ".gif"),
    interval = 2 / 25,
    ani.width = 720,
    ani.height = 480
  )
})