In [None]:
library(dplyr)
library(data.table)
library(tidyr)
library(ggplot2)
library(patchwork)
library(svglite)
library(fgsea)

In [None]:
contacts_file = snakemake@input$contacts
entropy_file = snakemake@input$coservation
#read entropy
dfe <- fread(entropy_file)
#read contacts
dfcnt = fread(cmd = paste("sed s/?/NA/g ",contacts_file))
colnames(dfcnt) <- paste("Cluster",1:ncol(dfcnt),sep="_")
dfcnt$Position = 1:nrow(dfcnt)

dfcnt_tmp <- dfcnt %>%
    pivot_longer(cols = starts_with("Cluster"),names_to = "Cluster",values_to = "Contact")

dfcnt_tmp <- dfcnt_tmp %>%
    group_by(Position) %>%
    filter(!is.na(Contact)) %>%
    summarise(Cluster_all=min(Contact,na.rm = T)) 

dfcnt <- dfcnt %>%
    left_join(dfcnt_tmp,by="Position") %>%
    select(Position,starts_with("Cluster")) %>%
    pivot_longer(cols = starts_with("Cluster"),names_to = "Cluster",values_to = "Contact") %>%
    mutate(ID = sub("Cluster_","",Cluster))

dfcntshort <- dfcnt %>%
    filter(Contact <= 4.5)

In [None]:
options(repr.plot.width=9, repr.plot.height=7, jupyter.plot_mimetypes = "image/svg+xml")
contact_review <- ggplot(data=dfcnt, aes(x=Position,y=Contact)) + geom_point(aes(color=ID),size=0.5) + facet_wrap(~ID) + theme(text = element_text(size = 12), legend.position = "none") + ylab("Contact, Å")
contact_reviewshort <- ggplot(data=dfcntshort, aes(x=Position,y=Contact)) + geom_point(aes(color=ID),size=0.5) + facet_wrap(~ID) + theme(text = element_text(size = 12), legend.position = "none") + ylab("Contact, Å")

contact_reviewall <- contact_review /contact_reviewshort
contact_reviewall <- contact_reviewall + plot_annotation(tag_levels = 'a')
ggsave(snakemake@output$image, contact_reviewall, width = 9, height = 7)
contact_reviewall

In [None]:
sco = snakemake@wildcards$scope
lineage = snakemake@wildcards$id
contact_limit = 4.5
dfe <- fread(entropy_file)
ct = 0
rez = list()


for (cluster_id in unique(dfcnt$Cluster) ) {
    ct = ct + 1
     print(paste("Working on",cluster_id,contact_limit ))
    contacts_df <- dfcnt %>%
        dplyr::filter(Contact > contact_limit & Cluster == cluster_id) %>%
        mutate(Contact=1) %>%
        mutate(Cluster=NULL)

    dftest <- dfe %>%
        left_join(contacts_df, by= "Position") %>%
        mutate(Variability=1-Conservation)
    dftest$Contact[is.na(dftest$Contact)] <- 0
    dftest$Contact <- as.factor(dftest$Contact)
    df4print <- dftest %>%
        group_by(Contact) %>%
        summarise(M=median(Entropy)) %>%
        ungroup() %>%
        pivot_wider(names_from = Contact, values_from = M)
    wct = wilcox.test(Entropy ~ Contact, dftest,alternative = "less" )
    df4print$Lineage <- lineage
    df4print$Scope <- sco
    df4print$WilcoxP <- wct$p.value
    df4print$WilcoxP <- wct$p.value
    df4print$Distance_cutoff <- contact_limit
    df4print$Cluster <- cluster_id
    #fgsea
    position_set <- dftest$Position[dftest$Contact==1]
    position_set4fgsea <- list(set = unlist(as.character(position_set)) )
    dftest <- dftest %>%
        filter(!Entropy==0) %>%
        mutate(dE=Entropy-mean(Entropy)) %>%
        arrange(-dE)
    ranks4test <- dftest$dE
    names(ranks4test) <- as.character(dftest$Position)
    fgseaRes <- fgsea(pathways = position_set4fgsea, 
                      stats    = ranks4test,
                      minSize  = 15,
                      maxSize  = 500,
                      eps      = 0.0)
    if (nrow(fgseaRes) > 0) {
        df4print$fgseaP <- fgseaRes$pval
        df4print$NES <- fgseaRes$NES
        df4print$leadingEdge <- fgseaRes$leadingEdge
        rez[[ct]] = df4print
    } else {
        df4print$fgseaP <- NA
        df4print$NES <- NA
        df4print$leadingEdge <- NA
        rez[[ct]] = df4print    
    }
}

In [None]:
rez <- bind_rows(rez) %>%
    mutate(SignificantW = if_else(WilcoxP <= 0.05, "Y","N"),SignificantF = if_else(fgseaP <= 0.05, "Y","N")) %>% 
    rowwise() %>%
    mutate(leadingEdge = paste(leadingEdge,collapse = ",")) %>%
    as.data.table()

In [None]:
fwrite(file = snakemake@output$data, x = rez)