### This script visualize the EnrichR results using BarPlot used to be generated by Excel

Barplot part in Main Figure 2, 3, 5, 6

Author: Dian Li, Pierre Isnard

Contact: dianli@wustl.edu

Last updated: February 7th, 2024

In [1]:
suppressMessages(library(dplyr))
suppressMessages(library(openxlsx))
suppressMessages(library(ggplot2))
suppressMessages(library(stringr))

In [2]:
Enrichr_res = "../../meta/EnrichR_Cell-type_HuBMAP_Analysis_simplified.xlsx"

In [3]:
outputDir = "../../plots/version_1/main_figures/enrichR"
dir.create(outputDir, recursive = T, showWarnings = F)

In [4]:
load("../../meta/meta_info.RData")
load("../../meta/meta_info2.RData")
color_palette
color_palette2

In [5]:
color_palette3 = c("grey90", "#fff088", "#615500", "#c3ab00")
names(color_palette3) = c("Monocle_Cluster_1", "Monocle_Cluster_2", 
                 "Monocle_Cluster_3", "Monocle_Cluster_4")

color_palette2 = c(color_palette2, color_palette3)

In [6]:
cluster_list = c("Monocle_Cluster_1", "Monocle_Cluster_2", 
                 "Monocle_Cluster_3", "Monocle_Cluster_4", 
                 "TLS", "Tumors", "Glom", "PT", "LH-CD", "Cast-T", "Inj-T", "DKDG", "SGS", "GGS")

use_name_list = c("Monocle_Cluster_1", "Monocle_Cluster_2", 
                 "Monocle_Cluster_3", "Monocle_Cluster_4", 
                 "TLS", "Tumor", "Glomeruli", "PT", "LH-CD", "Cast-T", "Inj-T", "DKDG", "SGS", "GGS")
fig_width = c(12,12,12,12,
             8,16,8,8,12,12,8,10,10,10)
fig_height = c(4,4,4,4,
             4,4,4,4,6,6,4,4,4,4)
str_max_list = c(50,50,50,50,
             50,70,50,50,70,60,50,50,50,50)
str_wrap_list = c(50,50,50,50,
             40,70,40,40,70,60,40,40,40,40)

meta_list = vector("list", length = length(cluster_list))
names(meta_list) = cluster_list

for (i in 1:length(cluster_list)){
    meta_list[[i]] <- list(
    cluster_name = cluster_list[i],
    use_name = use_name_list[i],
    fig_width = fig_width[i],
    fig_height = fig_height[i],
    str_max = str_max_list[i],
    str_wrap = str_wrap_list[i]    
  )
}

### barplot helper function

In [7]:
enrichr_barplot = function(df, color_code, str_max, str_wrap){
    colnames(df)[3:4] = c("p", "p.adj")
    y_max = max(df$Combined.score)
    if (y_max < 500) {
      step_size <- 100
    } else if (y_max < 1000) {
      step_size <- 200
    } else if (y_max < 2500) {
      step_size <- 500
    } else if (y_max < 5000) {
      step_size <- 1000 
    } else {
      step_size <- 5000   
    }
    
    # Rank the bars with the longest at the top by reordering 'name' based on 'score'
    df$WrappedName <- df$Name
    df$WrappedName[nchar(df$Name) > str_max] <- str_wrap(df$Name[nchar(df$Name) > str_max], width = str_wrap) 
    df$WrappedName <- factor(df$WrappedName, levels = df$WrappedName[order(df$Combined.score)])
    # Create the plot
    p <- ggplot(df, aes(x = WrappedName, y = Combined.score)) +
      geom_bar(stat = "identity", fill = color_code, color = "grey25") +
      coord_flip() + # Make the barplot horizontal
      geom_text(aes(label = paste(p.adj), y = Combined.score + 0.02*y_max), hjust = 0, color = "black", size=4) +
#       geom_segment(aes(y = 0, yend = 1.05*y_max, x = 0, xend = 0), color = "black", linewidth = 1.5) + # Custom x-axis line
      theme_minimal() +
      theme(text = element_text(family = "Helvetica", color = "black"),
            panel.grid.major = element_blank(), 
            panel.grid.minor = element_blank(),
            axis.text.x = element_text(color = "black"),
            axis.text.y = element_text(color = "black", hjust = 1, margin = margin(r = -10, unit = "pt"), size=14),
            axis.ticks.x = element_line(color = "black"),
             axis.ticks.y = element_blank(),
            axis.line = element_blank()) +
      labs(y = "", x = "", title = "") +
      ylim(0, 1.15*y_max) 
#     + 
#       scale_y_continuous(breaks = seq(0, 1.25*y_max, step_size))

    return(p) 
}



In [None]:
for (cluster in cluster_list){
    df = read.xlsx(Enrichr_res, sheet = cluster)
    
    if(meta_list[[cluster]]$use_name %in% names(color_palette)){
      color_code = color_palette[meta_list[[cluster]]$use_name]  
    } else {
      color_code = color_palette2[meta_list[[cluster]]$use_name]  
    }
    str_max = meta_list[[cluster]]$str_max
    str_wrap = meta_list[[cluster]]$str_wrap
    p = enrichr_barplot(df, color_code, str_max, str_wrap)
    ggsave(filename = file.path(outputDir, paste0("Enrichr_", cluster, ".pdf")), p, 
           width = meta_list[[cluster]]$fig_width, height = meta_list[[cluster]]$fig_height, units = "in", dpi = 300)
    ggsave(filename = file.path(outputDir, paste0("Enrichr_", cluster, ".png")), p, 
           width = meta_list[[cluster]]$fig_width, height = meta_list[[cluster]]$fig_height, units = "in", dpi = 300)
    
}