In [None]:
library(dplyr)
library(data.table)
library(tidyr)
library(ggplot2)
library(patchwork)
library(ggrepel)

In [None]:
dfall = bind_rows(lapply(X = snakemake@input$all,
       function(x){
           fread(x)
       } 
))

dflt = bind_rows(lapply(X = snakemake@input$lt,
       function(x){
           fread(x)
       } 
))


In [None]:
dfd <- fread(snakemake@input$abundance_dates_per_lineage)
dfd_numeric = dfd %>%
    select(-lineage, -Counts) %>%
    mutate_all(as.numeric) 

dfd_date = dfd %>%
    select(-lineage, -Counts)

dfd_lineage <- dfd %>%
    select(lineage)

dfd_numeric <- bind_cols(dfd_lineage,dfd_numeric)
dfd_date <- bind_cols(dfd_lineage,dfd_date)

In [None]:
df_4out <- dflt %>%
    #mutate(WilcoxPsign = if_else(WilcoxP <= 0.05, 1, 0)) %>%
    select(-"0",-"1", -Scope,-Distance_cutoff,-NES) %>%
    select(-leadingEdge)
    # select(Lineage,WilcoxP,WilcoxPsign) %>%
    # group_by(Lineage) %>%
    # summarise(Frac=sum(WilcoxPsign)/n(), Mean = mean(WilcoxP)) %>%
    # ungroup() %>%
    # left_join(dfd_numeric, by = c("Lineage"="lineage")) %>%
    # select(-Lineage)

df_4out

In [None]:
df_enrall <- dfall %>%
    mutate(WilcoxPsign = if_else(WilcoxP <= 0.05, 1, 0)) %>%
    select(Lineage,WilcoxP,WilcoxPsign) %>%
    group_by(Lineage) %>%
    summarise(Frac=sum(WilcoxPsign)/n(), Mean = mean(WilcoxP)) %>%
    ungroup() %>%
    left_join(dfd_numeric, by = c("Lineage"="lineage")) %>%
    select(-Lineage)
df_enrall_notd <- select(df_enrall,Frac,Mean)
df_enrall_d <- select(df_enrall,-Frac,-Mean)
cor(df_enrall_d, df_enrall_notd)

In [None]:
ltstr = "AY.4.5	Q.1	B.1.1.7	B.1.177.60	BA.2	BA.2.9" 
ltstr = unlist(strsplit(ltstr, split = "\t"))
ltstr

In [None]:
#get data per month
dfd_date_month <- select(dfd_date, lineage, month_peak)
df_enrall <- dfall %>%
    mutate(WilcoxPsign = if_else(WilcoxP <= 0.05, 1, 0)) %>%
    select(Lineage,WilcoxP,WilcoxPsign) %>%
    group_by(Lineage) %>%
    summarise(Frac=sum(WilcoxPsign)/n(), Mean = mean(WilcoxP)) %>%
    ungroup() %>%
    left_join(dfd_date_month, by = c("Lineage"="lineage"))

In [None]:
# get general view
dall_rev <- dfall %>%
    mutate(WilcoxPsign = if_else(WilcoxP <= 0.05, 1, 0)) %>%
    mutate(fgseaPsign = if_else(fgseaP <= 0.05, 1, 0)) %>%
    mutate(dE=`1`-`0`) %>%
    select(Lineage, WilcoxP, fgseaP, WilcoxPsign, fgseaPsign, dE)

    #select(Lineage,WilcoxP,WilcoxPsign) %>%
    #left_join(dfd_date_month, by = c("Lineage"="lineage"))
dall_rev

In [None]:
ggplot(dfall) + geom_boxplot(aes(x=Lineage, y=WilcoxP))

In [None]:
#sort lineages by date
dfd4s <- select(dfd,Lineage = lineage, month_peak) %>%
    filter(Lineage %in% dfall$Lineage) %>%
    mutate(month_peak_num = as.numeric(month_peak)) %>%
    arrange(month_peak)
dall_rev$Lineage <- factor(dall_rev$Lineage, levels = dfd4s$Lineage)

In [None]:
dall_rev$fgseaPsign[is.na(dall_rev$fgseaPsign)] <- 0
dall_rev_sum <- dall_rev %>%
    group_by(Lineage) %>%
    summarise(FracW=sum(WilcoxPsign)/n(), WilcoxP=sum(WilcoxP)/n()) %>%
    mutate(`Fraction with p ≤ 0.05`=FracW, `<p>`=WilcoxP) %>%
    select(-FracW, -WilcoxP) %>%
    pivot_longer(cols = c(`Fraction with p ≤ 0.05`), values_to = "Fraction", names_to = "Test") 
head(dall_rev_sum)

In [None]:
options(repr.plot.width=7, rep.plot.height=3)

#facet_grid(rows = vars(drv))
de <- ggplot(dall_rev) + geom_boxplot(aes(x=Lineage, y=dE)) +
theme(
    axis.text.x = element_text(angle = 90, vjust = 1, hjust=1),
    text=element_text(size=20)
)
fr_vs_l <- ggplot(dall_rev_sum) + geom_point(aes(x=Lineage, y = Fraction))  +
ylab("`Fraction with \n p ≤ 0.05`") + xlab("") +
  theme(axis.text.x=element_blank(), #remove x axis labels
        axis.ticks.x=element_blank(), #remove x axis ticks
        text=element_text(size=20)
        )
devslin <- fr_vs_l/ de +   plot_layout(height = c(1, 2))

In [None]:
crt <- cor.test(df_enrall$Frac,as.numeric(df_enrall$month_peak),method = "spearman")



In [None]:
options(repr.plot.width=8, rep.plot.height=7)
trnd <- ggplot(df_enrall) + geom_point(aes(x=month_peak, y = Frac)) +   theme(
        text=element_text(size=20)
        ) + xlab("Month") + ylab("`Fraction with \n p ≤ 0.05`") +
geom_text_repel(aes(label=Lineage,x=month_peak, y = Frac), max.overlaps = Inf, size = 5) +
geom_smooth(method = "lm", aes(x=month_peak, y=Frac)) +
ggtitle(paste("r = ",round(crt$estimate,digits = 2)," p = ",round(crt$p.value, digits = 4),sep=""))

In [None]:
options(repr.plot.width=15, rep.plot.height=7)

outimg <- devslin | trnd
outimg <- outimg +  plot_annotation(tag_levels = 'a')
outimg
ggsave(filename = snakemake@output$image, plot = outimg, width=15, height=7)

In [None]:
# write  out output data #ounly lithuanian data
fwrite(x=df_4out,file = snakemake@output$data)

df_4out