In [None]:
library(dplyr)
library(data.table)
library(tidyverse)
library(ggplot2)
library(ggalt)
library(viridis)
library(patchwork)
library(lubridate)
library(ggh4x)
library(ggrepel)
library(ggpubr)
library(svglite)

# Analysis on pairwise distance

In [None]:
df <- fread(snakemake@input$stabilities_pairwise) %>%
    mutate(dM = (Mean_Lineage - Mean_Ref_Lineage)/Mean_Ref_Lineage*100)
names(df)

In [None]:
abundance_dates_per_lineage = fread(snakemake@input$abundance_dates_per_lineage)
df <- df %>%
    left_join(abundance_dates_per_lineage, by = c("Lineage"="lineage"))

In [None]:
dfsort <- df %>%
    select(Lineage,dM) %>%
    arrange(-dM)

df$Lineage <- factor(df$Lineage,levels = dfsort$Lineage)
df <- df %>%
    mutate(Wilcox_less_P_sig = if_else(Wilcox_less_P <0.05, 1,0))
df

In [None]:
options(repr.plot.width=9, repr.plot.height=4, jupyter.plot_mimetypes = "image/svg+xml")
p1 <- ggplot(data=df) +geom_lollipop(aes(x=Lineage,y=dM,color=Lineage),point.size=4) + guides(colour = "none") +
ylab("Stability change, %") + theme(
    text = element_text(size = 20),
    axis.title.y = element_text(size = 14),
    axis.title.x = element_text(size = 14),
    axis.text.x = element_text(angle = 90)
) + 
geom_hline(yintercept = 0, color="red")

p1

In [None]:
df <- df %>%
    mutate(Anova_zip_P_sig = if_else(Anova_zip_P <= 0.01, T, F)) %>%
    mutate(Wilcox_twotailed_P_sig = if_else(Wilcox_twotailed_P <= 0.05,"p ≤ 0.05","p > 0.05"))
df

In [None]:
dates_data <- df %>%
   select(starts_with("week"),starts_with("month"),starts_with("quarter"))
for (n in names(dates_data)) {
    dates_data[[n]] = as.numeric(dates_data[[n]])
}
nrow(dates_data)
data_of_interest <- df %>%
    select(-starts_with("week"), -starts_with("month"), -starts_with("quarter"), -Lineage)
# plot(dates_data$quarter, data_of_interest$Mean_Lineage)

In [None]:
options(repr.plot.width=5, repr.plot.height=4)

cort = cor.test(df$dM,as.numeric(df$month_peak))
p = round(cort$p.value, digits = 3)
r = round(cort$estimate, digits = 2) 

pairplot <- ggplot(data=df, aes(x = month_peak, y=dM, label=Lineage)) + geom_point(aes(size=Counts, color = Wilcox_twotailed_P_sig)) + geom_smooth(method = "lm", se = T, aes(x = month_peak, y=dM)) +
scale_size(range=c(2.1,4), name="Abundance") + xlab("Month of maximum abundance") + ylab("Estimation") +  theme(text = element_text(size = 9),axis.text.x = element_text(size = 5))+
geom_text_repel(size=2.5) +
annotate(geom = "text", x = min(df$month_peak), y = max(df$dM), label = paste("r = ",r, " p = ",p,sep = ""), hjust = "left", size = 3) +
labs(color="Wilcoxon") +
ylab("Difference in stability, %")

ggsave("pairplot.svg", plot = pairplot, width = 5, height = 4)
pairplot
#pairplot

# + facet_grid2(cols = vars(Measure),scales="free",  independent = "y")

In [None]:
# Analysis on taking all together distance

In [None]:
df2 <- fread(snakemake@input$stabilities_common) %>%
    select(-.group) %>%
    rename(Lineage=Sample) %>%
    left_join(abundance_dates_per_lineage, by = c("Lineage"="lineage"))
df2

In [None]:
dates_data <- df2 %>%
   select(starts_with("week"),starts_with("month"),starts_with("quarter"))
for (n in names(dates_data)) {
    dates_data[[n]] = as.numeric(dates_data[[n]])
}
nrow(dates_data)
data_of_interest <- df2 %>%
    select(-starts_with("week"), -starts_with("month"), -starts_with("quarter"), -Lineage) %>%
    select(where(is.numeric))

cor(dates_data, data_of_interest)
# plot(dates_data$quarter, data_of_interest$Mean_Lineage)

In [None]:
df2 <- df2 %>%
    select(Lineage,month_peak, emmean,asymp.LCL,asymp.UCL, Counts )

In [None]:
options(repr.plot.width=5, repr.plot.height=4)

cort = cor.test(df2$asymp.LCL,as.numeric(df2$month_peak))
p = round(cort$p.value, digits = 4)
r = round(cort$estimate, digits = 2) 

cort2 = cor.test(df2$emmean,as.numeric(df2$month_peak))
p2 = round(cort2$p.value, digits = 4)
r2 = round(cort2$estimate, digits = 2) 

commonplot <- ggplot(data=df2, aes(x = month_peak, y=asymp.LCL, label=Lineage)) + geom_point(aes(size=Counts)) + geom_smooth(method = "lm", se = T, aes(x = month_peak, y=asymp.LCL)) +
scale_size(range=c(2.1,4), name="Abundance") + xlab("Month of maximum abundance") + ylab("Estimation") +  theme(text = element_text(size = 9),axis.text.x = element_text(size = 5))+
geom_text_repel(size=2.5) +
annotate(geom = "text", x = min(df$month_peak), y = max(df$emmean), label = paste("asymp.LCL, ","r = ",r, " p = ",p,sep = ""), hjust = "left", size = 3) +
annotate(geom = "text", x = min(df$month_peak), y = max(df$emmean)*1.05, label = paste("emean, ","r = ",r2, " p = ",p2,sep = ""), hjust = "left", size = 3) +

labs(color="Wilcoxon") +
ylab("asymp.LCL, days")

#ggsave("pairplot.svg", plot = pairplot, width = 10, height = 8)
commonplot

In [None]:
options(repr.plot.width=10, repr.plot.height=4)
pairplot | commonplot 


In [None]:

dfboth <- df2 %>%
    select(-Counts) %>%
    left_join(df, by = "Lineage") %>%
    select(Lineage,dM, emmean, asymp.LCL, asymp.UCL, Counts) %>%
    pivot_longer(cols = c("dM","emmean"),names_to = "Measure",values_to = "Value" ) %>%
    mutate(asymp.LCL = if_else(Measure != "dM",asymp.LCL, NA)) %>%
    mutate(asymp.UCL = if_else(Measure != "dM",asymp.LCL, NA))

dfbothLineagerank <-  df %>%
    arrange(-dM)
dfboth$Lineage <- factor(dfboth$Lineage, levels = dfbothLineagerank$Lineage)



In [None]:
p1 <- ggplot(data=dfboth) +geom_lollipop(aes(x=Lineage,y=Value,color=Lineage),point.size=4) + guides(colour = "none") +
ylab("Stability change, %") + theme(
    text = element_text(size = 20),
    axis.title.y = element_text(size = 14),
    axis.title.x = element_text(size = 14),
    axis.text.x = element_text(angle = 90)
) + 
geom_hline(yintercept = 0, color="red") +
facet_wrap(~Measure,  ncol=1, scales = "free")

dfboth