In [1]:
suppressPackageStartupMessages({
    library(ggplot2)
    library(ggpubr)
})

path <- "../2.current_version.subtype/"

In [2]:
species <- c("Hsap","Mmus","Pvit","Pmar")

get_OR <- function(species, type){
    df <- Reduce(rbind, lapply(species, FUN = function(s){
        wgd <- read.delim(paste0(path, s, "/", type ,"/", s, ".ohnolog_DEGs.fisher.celltype.txt"), header = T)
        ssd <- read.delim(paste0(path, s, "/", type ,"/", s, ".SSDparalog_DEGs.fisher.celltype.txt"), header = T)
        
        wgd$type <- "WGD"
        ssd$type <- "SSD"
    
        OR_info <- rbind(wgd, ssd)
        OR_info$species <- s
        return(OR_info)
    }))
    
    s = "Bflo"
    pa <- read.delim(paste0("../1.old_versions/2.my_version.v1/", s, "/", type ,"/", s, "_T1.paralog_DEGs.fisher.celltype.txt"), header = T)
    pa$type <- "SSD"
    pa$species <- "Bflo"
    
    df <- rbind(df, pa)
    df$species <- factor(df$species, levels = c("Hsap","Mmus","Pvit","Pmar", "Bflo"))
    
    my_comparisons <- list(c("WGD", "SSD"))
    p <- ggboxplot(df, x = "type", y = "OR", color = "type", palette = "jco") + 
        stat_compare_means(comparisons = my_comparisons, method = "wilcox.test", paired = TRUE, label = "p.signif") + 
        stat_summary(fun = "median", geom = "text", aes(label = round(after_stat(y), 3)), vjust = -1) + 
        facet_wrap(~species, nrow = 1) + 
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
    ggsave(filename = paste0("OR.subtype.summary.", type, ".pdf"), p, width = 5, height = 7)
}


In [3]:
get_OR(species, "wilcox")
get_OR(species, "roc")

In [4]:
species <- c("Hsap","Mmus","Pvit","Pmar")

get_ratio <- function(species, type){
    df <- Reduce(rbind, lapply(species, FUN = function(s){
        wgd <- read.delim(paste0(path, s, "/", type ,"/", s, ".ohnolog_ratio_inDEGs.stats.txt"), header = T)
        ssd <- read.delim(paste0(path, s, "/", type ,"/", s, ".SSDparalog_ratio_inDEGs.stats.txt"), header = T)
    
        wgd$type <- "WGD"
        ssd$type <- "SSD"
        colnames(wgd) <- colnames(ssd)
    
        info <- rbind(wgd, ssd)
        info$species <- s
        return(info)
    
    }))

    s = "Bflo"
    pa <- read.delim(paste0("../1.old_versions/2.my_version.v1/", s, "/", type ,"/", s, "_T1.paralog_ratio_inDEGs.stats.txt"), header = T)
    pa$type <- "SSD"
    pa$species <- "Bflo"

    df <- rbind(df, pa)
    df$species <- factor(df$species, levels = c("Hsap","Mmus","Pvit","Pmar", "Bflo"))
    
    species <- c("Hsap","Mmus","Pvit","Pmar", "Bflo")

    ratio_bg <- Reduce(rbind, lapply(species, FUN = function(s){
        if (s == "Bflo"){
            tmp <- read.delim(paste0("../1.old_versions/2.my_version.v1/", s, "/", type ,"/", s, "_T1.ratio_bg.txt"), header = F)
            tmp$V1 <- "SSD"
        } else {
            tmp <- read.delim(paste0(path, s, "/", type ,"/", s, ".ratio_bg.txt"), header = F)
            tmp <- tmp[tmp$V1 %in% c("ohnologs", "SSDparalogs"), ]
            tmp[tmp$V1 == "SSDparalogs", "V1"] <- "SSD"
            tmp[tmp$V1 == "ohnologs", "V1"] <- "WGD"
        }
        tmp$species <- s
        return(tmp)
    }))
    colnames(ratio_bg) <- c("type", "bg", "species")
    
    df <- merge(df, ratio_bg, by = c("species","type"))
    df$type <- factor(df$type, levels = c("WGD", "SSD"))
    
    p <- ggboxplot(df, x = "type", y = "paralogs.", color = "type", palette = "jco") + 
        stat_summary(fun = "median", geom = "text", aes(label = round(after_stat(y), 3)), vjust = -1) + 
        geom_hline(aes(yintercept = bg, color = type), linetype = "dashed") +
        facet_wrap(~species, nrow = 1) + 
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
    ggsave(filename = paste0("paralog_ratio.subtype.summary.", type, ".pdf"),p , width = 5, height = 7)
}


In [5]:
get_ratio(species, "wilcox")
get_ratio(species, "roc")

In [6]:
# plot family/number of paralogs ratio
species <- c("Hsap","Mmus","Pvit","Pmar")

get_fa_ratio <- function(species, type){
    df <- Reduce(rbind, lapply(species, FUN = function(s){
        wgd <- read.delim(paste0(path, s, "/", type ,"/", s, ".ohnolog_ratio_inDEGs.stats.txt"), header = T)
        ssd <- read.delim(paste0(path, s, "/", type ,"/", s, ".SSDparalog_ratio_inDEGs.stats.txt"), header = T)
    
        wgd$type <- "WGD"
        ssd$type <- "SSD"
        colnames(wgd) <- colnames(ssd)
    
        info <- rbind(wgd, ssd)
        info$species <- s
        return(info)
    
    }))

    s = "Bflo"
    pa <- read.delim(paste0("../1.old_versions/2.my_version.v1/", s, "/", type ,"/", s, "_T1.paralog_ratio_inDEGs.stats.txt"), header = T)
    pa$type <- "SSD"
    pa$species <- "Bflo"

    df <- rbind(df, pa)
    df$species <- factor(df$species, levels = c("Hsap","Mmus","Pvit","Pmar", "Bflo"))
    
    species <- c("Hsap","Mmus","Pvit","Pmar", "Bflo")

    ratio_bg <- Reduce(rbind, lapply(species, FUN = function(s){
        if (s == "Bflo"){
            tmp <- read.delim(paste0("../1.old_versions/2.my_version.v1/", s, "/", type ,"/", s, "_T1.family_ratio_bg.txt"), header = F)
            tmp$V1 <- "SSD"
        } else {
            tmp <- read.delim(paste0(path, s, "/", type ,"/", s, ".family_ratio_bg.txt"), header = F)
            tmp <- tmp[tmp$V1 %in% c("ohnologs", "SSDparalogs"), ]
            tmp[tmp$V1 == "SSDparalogs", "V1"] <- "SSD"
            tmp[tmp$V1 == "ohnologs", "V1"] <- "WGD"
        }
        tmp$species <- s
        return(tmp)
    }))
    colnames(ratio_bg) <- c("type", "bg", "species")
    
    df <- merge(df, ratio_bg, by = c("species","type"))
    df$type <- factor(df$type, levels = c("WGD", "SSD"))
    
    p <- ggboxplot(df, x = "type", y = "families_divided_by_paralogs.", color = "type", palette = "jco") + 
        stat_summary(fun = "median", geom = "text", aes(label = round(after_stat(y), 3)), vjust = -1) + 
        geom_hline(aes(yintercept = bg, color = type), linetype = "dashed") +
        facet_wrap(~species, nrow = 1) + 
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
    ggsave(filename = paste0("paralog_family_ratio.switching_within_species.subtype.summary.", type, ".pdf"),p , width = 5, height = 7)
}

In [7]:
get_fa_ratio(species, "wilcox")
get_fa_ratio(species, "roc")

“[1m[22mRemoved 6 rows containing non-finite outside the scale range
(`stat_boxplot()`).”
“[1m[22mRemoved 6 rows containing non-finite outside the scale range
(`stat_summary()`).”
