In [None]:
library(data.table)
library(dplyr)
library(dtplyr)
library(lubridate)

In [None]:
# get pangolid data
pangolin_f <- snakemake@input$pangolin
pangolin <- fread(pangolin_f,select=c("taxon","lineage"),showProgress=TRUE,colClasses=c(taxon="character",lineage="character")) 

In [None]:
#get meta date
meta_f <- snakemake@input$meta
meta <- fread(meta_f,select=c("strain","date","country"),showProgress=TRUE,
             colClasses=c(strain="character",date="character",country="character")) 

In [None]:
# convert pangolin and meta to lazy and join them
meta <-  lazy_dt(meta)
pangolin <- lazy_dt(pangolin)
meta <- meta %>%
    left_join(pangolin, by=c("strain"="taxon")) %>%
    filter(nchar(date)==10) %>%
    as.data.table()

In [None]:
filter(meta, country == "Lithuania")

In [None]:
# filter our improper date formats
meta <- meta  %>%
    mutate(date = as.character(date)) %>%
    filter(!grepl(pattern = "/",fixed = T, x = date)) %>%
    filter(!grepl(pattern = "X",fixed = T, x = date)) %>%
    as.data.table()
meta$date <- date(meta$date)

In [None]:
# get floor month
meta$month = lubridate::floor_date(meta$date, "month")

In [None]:
# get floor week
meta$week = lubridate::floor_date(meta$date, "week")

In [None]:
# get floor week
meta$quarter = lubridate::floor_date(meta$date, "quarter")

In [None]:
lt_abund <- meta %>%
    filter(country=="Lithuania") %>%
    group_by(lineage) %>%
    summarise(counts=n()) %>%
    mutate(frac = counts/sum(counts)) %>%
    ungroup() %>%
    as.data.table() %>%
    arrange(-frac) %>%
    filter(frac >= 0.05) %>%
    as.data.frame()
fwrite(x = lt_abund, snakemake@output$most_abundad_lt)

In [None]:
lineage_cnt <- meta %>%
    group_by(lineage,quarter) %>%
    summarise(N=n()) %>%
    ungroup() %>%
    as.data.table() 
lineage_cnt2 <- data.table(lineage_cnt)
lineage_cnt2 <- lineage_cnt2 %>%
    dplyr::group_by(quarter) %>%
    dplyr::slice_max(order_by = N, n=2) %>%
    filter(N > 	1000) %>%
    ungroup()

chosen <-  data.frame(chosen = unique(c(lineage_cnt2$lineage, lt_abund$lineage)))
fwrite(x = chosen, snakemake@output$quarterly_most_abundand)

In [None]:
lineage_cnt_all <- meta %>%
    group_by(lineage) %>%
    summarise(Counts=n()) %>%
    ungroup() %>%
    as.data.table() 

In [None]:
abund_week_peak <- meta %>%
    mutate(weekn = as.numeric(difftime(week, min(week), units = "week"))) %>%
    group_by(lineage,week, weekn) %>%
    summarise(counts = n()) %>% 
    as.data.frame() %>%
    ungroup() %>%
    group_by(lineage) %>%
    mutate(frac = counts/sum(counts)) %>%
    slice_max(n=1,with_ties = F, order_by = counts) %>%
    ungroup() %>%
    select(lineage, week_peak=week)

abund_week_start <- meta %>%
    mutate(weekn = as.numeric(difftime(week, min(week), units = "week"))) %>%
    group_by(lineage,week, weekn) %>%
    summarise(counts = n()) %>% 
    as.data.frame() %>%
    ungroup() %>%
    group_by(lineage) %>%
    mutate(frac = counts/max(counts)) %>%
    filter(frac > 0.05) %>%
    slice_min(n=1,with_ties = F, order_by = weekn) %>%
    ungroup() %>%
    select(lineage, week_start=week)

abund_week_end <- meta %>%
    mutate(weekn = as.numeric(difftime(week, min(week), units = "week"))) %>%
    group_by(lineage,week, weekn) %>%
    summarise(counts = n()) %>% 
    as.data.frame() %>%
    ungroup() %>%
    group_by(lineage) %>%
    mutate(frac = counts/max(counts)) %>%
    filter(frac > 0.05) %>%
    slice_max(n=1,with_ties = F, order_by = weekn) %>%
    ungroup() %>%
    select(lineage, week_end=week)

abund_month_peak <- meta %>%
    mutate(monthn = as.numeric(difftime(month, min(month), units = "weeks"))) %>%
    group_by(lineage,month, monthn) %>%
    summarise(counts = n()) %>% 
    as.data.frame() %>%
    ungroup() %>%
    group_by(lineage) %>%
    mutate(frac = counts/sum(counts)) %>%
    slice_max(n=1,with_ties = F, order_by = counts) %>%
    ungroup() %>%
    select(lineage, month_peak=month)

abund_month_start <- meta %>%
    mutate(monthn = as.numeric(difftime(month, min(month), units = "weeks"))) %>%
    group_by(lineage,month, monthn) %>%
    summarise(counts = n()) %>% 
    as.data.frame() %>%
    ungroup() %>%
    group_by(lineage) %>%
    mutate(frac = counts/max(counts)) %>%
    filter(frac > 0.05) %>%
    slice_min(n=1,with_ties = F, order_by = monthn) %>%
    ungroup() %>%
    select(lineage, month_start=month)

abund_month_end <- meta %>%
    mutate(monthn = as.numeric(difftime(month, min(month), units = "weeks"))) %>%
    group_by(lineage,month, monthn) %>%
    summarise(counts = n()) %>% 
    as.data.frame() %>%
    ungroup() %>%
    group_by(lineage) %>%
    mutate(frac = counts/max(counts)) %>%
    filter(frac > 0.05) %>%
    slice_max(n=1,with_ties = F, order_by = monthn) %>%
    ungroup() %>%
    select(lineage, month_end=month)
abund_quarter_peak <- meta %>%
    mutate(quartern = as.numeric(difftime(quarter, min(quarter), units = "week"))) %>%
    group_by(lineage,quarter, quartern) %>%
    summarise(counts = n()) %>% 
    as.data.frame() %>%
    ungroup() %>%
    group_by(lineage) %>%
    mutate(frac = counts/sum(counts)) %>%
    slice_max(n=1,with_ties = F, order_by = counts) %>%
    ungroup() %>%
    select(lineage, quarter_peak=quarter)

abund_quarter_start <- meta %>%
    mutate(quartern = as.numeric(difftime(quarter, min(quarter), units = "week"))) %>%
    group_by(lineage,quarter, quartern) %>%
    summarise(counts = n()) %>% 
    as.data.frame() %>%
    ungroup() %>%
    group_by(lineage) %>%
    mutate(frac = counts/max(counts)) %>%
    filter(frac > 0.05) %>%
    slice_min(n=1,with_ties = F, order_by = quartern) %>%
    ungroup() %>%
    select(lineage, quarter_start=quarter)

abund_quarter_end <- meta %>%
    mutate(quartern = as.numeric(difftime(quarter, min(quarter), units = "week"))) %>%
    group_by(lineage,quarter, quartern) %>%
    summarise(counts = n()) %>% 
    as.data.frame() %>%
    ungroup() %>%
    group_by(lineage) %>%
    mutate(frac = counts/max(counts)) %>%
    filter(frac > 0.05) %>%
    slice_max(n=1,with_ties = F, order_by = quartern) %>%
    ungroup() %>%
    select(lineage, quarter_end=quarter)

abundance_dates_per_lineage <- abund_week_peak %>%
    left_join(abund_week_start, by="lineage") %>%
    left_join(abund_week_end, by="lineage") %>%
    left_join(abund_month_peak, by="lineage") %>%
    left_join(abund_month_start, by="lineage") %>%
    left_join(abund_month_end, by="lineage") %>%
    left_join(abund_quarter_peak, by="lineage") %>%
    left_join(abund_quarter_start, by="lineage") %>%
    left_join(abund_quarter_end, by="lineage") %>%
    left_join(lineage_cnt_all, by = "lineage")


In [None]:
fwrite(x = abundance_dates_per_lineage, snakemake@output$abundance_dates_per_lineage)