In [17]:
library(openxlsx)
library(RENIN)
library(ggplot2)
library(ggrepel)

In [18]:
plot_tf_rankings <- function(results_df,
                         tfs_to_label = NULL,
                         p_value_cutoff = 0.05,
                         score_cutoff = 0.1,
                         two_tailed = TRUE,
                         top_n_to_label = 5,
                         label_tfs = TRUE,
                         colors = c("#39B54A", "#5862AD"), 
                         ident1 = "H", ident2 = "FR") {
require(ggplot2)
require(ggrepel)

results_df$CI <- results_df$SE * qnorm((1 - p_value_cutoff) / ifelse(two_tailed, 1, 2))
results_df <- results_df[which(abs(results_df$Score) - results_df$CI > 0), ]# remove nonsig TFs by p_value_cutoff
results_df <- results_df[which(abs(results_df$Score) > score_cutoff), ] # trim middle for plotting with score_cutoff
results_df <- results_df[order(results_df$Score, decreasing = TRUE), ]

if (is.null(tfs_to_label)) {
    tfs_to_label <- c(rownames(head(results_df, n = top_n_to_label)),
                      rownames(tail(results_df, n = top_n_to_label)))
}

results_df$label <- rep("", dim(results_df)[1])

if (label_tfs) { 
    results_df$label[which(results_df$TF_name %in% tfs_to_label)] <- results_df$TF_name[which(results_df$TF_name %in% tfs_to_label)]
}

# allow change the legend label for ident1 and ident2. 
results_df$comp <- ifelse(results_df$Score > 0, ident1, ident2)
# freeze the order of color with color 1 corresponds to ident 1, and color 2 corresponds to ident 2.
results_df$comp = factor(results_df$comp, levels = c(ident1, ident2))

if (length(which(results_df$Score < 0)) == 0) {
    colors <- colors[1]
}

results_df$axis <- results_df$axis <- nrow(results_df):1
g <- ggplot(results_df, aes(x=axis, y=Score, fill=comp)) + 
        # geom_bar(stat = "identity", color = "black", size = 0.0, width = 1, alpha = .8) + 
        geom_bar(stat = "identity", size = 0.0, width = 1, alpha = .8) + 
        scale_fill_manual(values = colors) +
#             theme_classic() + xlab("TF") + ylab("Predicted regulatory influence") +
        theme_classic() + xlab("TF") + ylab("RENIN-predicted regulatory influence") +
        # geom_text_repel(aes(label = label), max.overlaps = 100, size = 5) + 
      geom_text_repel(aes(label = label), max.overlaps = 100, size = 5, xlim = c(-Inf, Inf)) + 
      coord_cartesian(clip = "off") +   ## to allow labels exceed the x axis boundary in case multiple labels are mixed up 
        theme(text = element_text(size = 14), axis.text = element_text(size = 14))
return(g)
}

In [19]:
subfolder_name = "RENIN_324701_cells_DEGs_from_446267_cells"

processed_folder = file.path("..", "..", "processed_data", "RENIN", "tables")
processed_folder
dir.create(processed_folder, recursive = T, showWarnings = F)

plots_folder = file.path("..", "..", "plots", "RENIN")
plots_folder
dir.create(plots_folder, recursive = T, showWarnings = F)

### color palette for celltype5_rna

In [20]:
level.novaseq.rna <- c('JGA', 'POD', 'PEC', 
                       'PT', 'PT_dediff', 'PT_VCAM1', 
                       'tL1','tL2','tL-TAL', 'TAL1','TAL2', 'TAL3', 
                       'DCT', 'CNT', 
                       'PC1', 'PC2', 
                       'ICA', 'ICB', 
                       'Uro1', 'Uro2', 
                       'ENDO', 
                       'SMC1', 'SMC2', 'SMC3', 
                       'Fib1', 'Fib2', 'Fib3', 
                       'Ma', 'BT')

palette.novaseq.rna <- c('#FF8933', '#00B5EB', '#0077BE', 
                         '#8DC71E', '#69B41E', '#013220',
                         '#936210', '#5E2A0F', '#C9F5E6','#128394', '#62CCCC', '#046494',
                         '#be658d', '#8a3e6a', 
                         '#6a3070', '#4c2564', 
                         '#617A2E', '#A57C00',
                         '#092092', '#1C3BAC', 
                         '#F9CC72', 
                         '#E97E88', '#F8D1CD', '#E15566',
                         '#E2062B', '#860111', '#B4041E', 
                         '#86DEBB', '#7D4729')

names(palette.novaseq.rna) <- level.novaseq.rna


### color palette for renal_region_new

In [21]:
level.novaseq.renal_region_new  <- c("Cortex", "Medulla", "Papilla", "Renal Artery", "Ureter")     # C/M/P/RA/U
palette.novaseq.renal_region_new <- c('#4C9150','#7A339E','#E0AB3D','#CC2114','black')
names(palette.novaseq.renal_region_new) <- level.novaseq.renal_region_new

## the following scripts are used to extract tf_ranks

step3.5.15_RENIN_from_subset_harmony_extract_TF_ranks

step3.7.6_RENIN_from_subset_harmony_extract_TF_ranks_renal_region_new

In [22]:
load_test = function(tf_ranks_list, processed_folder, plots_folder, subfolder_name, palette){
    load(file.path(tf_ranks_list$file_path))

    # create output folder
    dir.create(file.path(processed_folder, "de_genes", subfolder_name), recursive = T, showWarnings = F)
    dir.create(file.path(processed_folder, "tf_ranks", subfolder_name), recursive = T, showWarnings = F)
    dir.create(file.path(plots_folder, "tf_ranks_labeled", subfolder_name), recursive = T, showWarnings = F)
    dir.create(file.path(plots_folder, "tf_ranks_unlabeled", subfolder_name), recursive = T, showWarnings = F)
    
    # export de_genes
    output_file_RData = file.path(processed_folder, "de_genes", subfolder_name, tf_ranks_list$output_name)
    output_file_csv = gsub("RData", "csv", output_file_RData, ignore.case = TRUE)

    de_genes = de_genes_BK
    
    save(list = c("de_genes"), file = output_file_RData, compress = T)
    write.csv(de_genes, output_file_csv)

    if (exists("tf_ranks")){
        # export tf_ranks
        output_file_RData = file.path(processed_folder, "tf_ranks", subfolder_name, tf_ranks_list$output_name)
        output_file_csv = gsub("RData", "csv", output_file_RData, ignore.case = TRUE)

        save(list = c("tf_ranks"), file = output_file_RData, compress = T)
        write.csv(tf_ranks, output_file_csv)
        #################################
        output_pdf = file.path(plots_folder, "tf_ranks_unlabeled", subfolder_name, gsub('RData', "pdf", tf_ranks_list$output_name))
        message(output_pdf)
        idents = strsplit(tf_ranks_list$comparison, ",")[[1]]

        if (length(idents) == 1){
            p = plot_tf_rankings(results_df = tf_ranks, colors = c(palette[idents[1]], 'grey'), 
                        ident1 = idents[1], ident2 = "Others", top_n_to_label = 0)

        } else if (length(idents) == 2){
            p = plot_tf_rankings(results_df = tf_ranks, colors = c(palette[idents[1]], palette[idents[2]]), 
                        ident1 = idents[1], ident2 = idents[2], top_n_to_label = 0)
            
        }

        p = p + theme(axis.title.x=element_blank()) + guides(fill=guide_legend(title=""))
        
        ggsave(output_pdf, p, width = 8, height = 6, dpi = 300, units = "in")
        #################################
        output_png = file.path(plots_folder, "tf_ranks_unlabeled", subfolder_name, gsub('RData', "png", tf_ranks_list$output_name))
        message(output_png)
        idents = strsplit(tf_ranks_list$comparison, ",")[[1]]

        if (length(idents) == 1){
            p = plot_tf_rankings(results_df = tf_ranks, colors = c(palette[idents[1]], 'grey'), 
                        ident1 = idents[1], ident2 = "Others", top_n_to_label = 0)

        } else if (length(idents) == 2){
            p = plot_tf_rankings(results_df = tf_ranks, colors = c(palette[idents[1]], palette[idents[2]]), 
                        ident1 = idents[1], ident2 = idents[2], top_n_to_label = 0)
            
        }

        p = p + theme(axis.title.x=element_blank()) + guides(fill=guide_legend(title=""))
        
        ggsave(output_png, p, width = 8, height = 6, dpi = 300, units = "in")
        #################################
        output_png = file.path(plots_folder, "tf_ranks_labeled", subfolder_name, gsub('RData', "png", tf_ranks_list$output_name))
        message(output_png)
        idents = strsplit(tf_ranks_list$comparison, ",")[[1]]

        if (length(idents) == 1){
            p = plot_tf_rankings(results_df = tf_ranks, colors = c(palette[idents[1]], 'grey'), 
                        ident1 = idents[1], ident2 = "Others", top_n_to_label = 5)

        } else if (length(idents) == 2){
            p = plot_tf_rankings(results_df = tf_ranks, colors = c(palette[idents[1]], palette[idents[2]]), 
                        ident1 = idents[1], ident2 = idents[2], top_n_to_label = 5)
            
        }
        
        ggsave(output_png, p, width = 8, height = 6, dpi = 300, units = "in")
    }
    
}

## extract de_genes calculated using hli_subset_78954 cells

In [23]:
tf_ranks_list_hli = data.frame(file_path = c(
    '../../processed_data/RENIN/by_regions/TAL1_TAL2_TAL3/RENIN_324701_cells_Medulla_vs_Cortex.RData',
    '../../processed_data/RENIN/by_regions/TAL1_TAL2_TAL3/RENIN_324701_cells_Papilla_vs_Medulla.RData',
    '../../processed_data/RENIN/by_regions/tL1_tL2/RENIN_324701_cells_Papilla_vs_Medulla.RData',
    "../../processed_data/RENIN/by_regions/PC1_PC2/RENIN_324701_cells_Papilla_vs_Medulla.RData",
    '../../processed_data/RENIN/by_celltypes/PT_vs_PT_VCAM1/RENIN_324701_cells_PT_vs_PT_VCAM1.RData'
))

In [24]:
tf_ranks_list_hli$output_name = c(
    "TAL1_TAL2_TAL3_324701_cells_Medulla_vs_Cortex.RData",
    "TAL1_TAL2_TAL3_324701_cells_Papilla_vs_Medulla.RData",
    "tL1_tL2_324701_cells_Papilla_vs_Medulla.RData",
    "PC1_PC2_324701_cells_Papilla_vs_Medulla.RData",
    "PT_vs_PT_VCAM1_324701_cells.RData"
)

In [25]:
tf_ranks_list_hli$comparison = c(
    "Medulla,Cortex",
    "Papilla,Medulla",
    "Papilla,Medulla",
    "Papilla,Medulla",
    "PT,PT_VCAM1"
    )

In [26]:
tf_ranks_list_hli

file_path,output_name,comparison
<chr>,<chr>,<chr>
../../processed_data/RENIN/by_regions/TAL1_TAL2_TAL3/RENIN_324701_cells_Medulla_vs_Cortex.RData,TAL1_TAL2_TAL3_324701_cells_Medulla_vs_Cortex.RData,"Medulla,Cortex"
../../processed_data/RENIN/by_regions/TAL1_TAL2_TAL3/RENIN_324701_cells_Papilla_vs_Medulla.RData,TAL1_TAL2_TAL3_324701_cells_Papilla_vs_Medulla.RData,"Papilla,Medulla"
../../processed_data/RENIN/by_regions/tL1_tL2/RENIN_324701_cells_Papilla_vs_Medulla.RData,tL1_tL2_324701_cells_Papilla_vs_Medulla.RData,"Papilla,Medulla"
../../processed_data/RENIN/by_regions/PC1_PC2/RENIN_324701_cells_Papilla_vs_Medulla.RData,PC1_PC2_324701_cells_Papilla_vs_Medulla.RData,"Papilla,Medulla"
../../processed_data/RENIN/by_celltypes/PT_vs_PT_VCAM1/RENIN_324701_cells_PT_vs_PT_VCAM1.RData,PT_vs_PT_VCAM1_324701_cells.RData,"PT,PT_VCAM1"


In [27]:
for (i in 1:dim(tf_ranks_list_hli)[1]){
    message(Sys.time())
    message(tf_ranks_list_hli[i, ])
    load_test(tf_ranks_list_hli[i, ], processed_folder, plots_folder, subfolder_name, palette = c(palette.novaseq.rna, palette.novaseq.renal_region_new)) 
}


2023-07-03 16:00:07

../../processed_data/RENIN/by_regions/TAL1_TAL2_TAL3/RENIN_324701_cells_Medulla_vs_Cortex.RDataTAL1_TAL2_TAL3_324701_cells_Medulla_vs_Cortex.RDataMedulla,Cortex

../../plots/RENIN/tf_ranks_unlabeled/RENIN_324701_cells_DEGs_from_446267_cells/TAL1_TAL2_TAL3_324701_cells_Medulla_vs_Cortex.pdf

“[1m[22mUsing `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
[36mℹ[39m Please use `linewidth` instead.”
../../plots/RENIN/tf_ranks_unlabeled/RENIN_324701_cells_DEGs_from_446267_cells/TAL1_TAL2_TAL3_324701_cells_Medulla_vs_Cortex.png

../../plots/RENIN/tf_ranks_labeled/RENIN_324701_cells_DEGs_from_446267_cells/TAL1_TAL2_TAL3_324701_cells_Medulla_vs_Cortex.png

2023-07-03 16:00:26

../../processed_data/RENIN/by_regions/TAL1_TAL2_TAL3/RENIN_324701_cells_Papilla_vs_Medulla.RDataTAL1_TAL2_TAL3_324701_cells_Papilla_vs_Medulla.RDataPapilla,Medulla

../../plots/RENIN/tf_ranks_unlabeled/RENIN_324701_cells_DEGs_from_446267_cells/TAL1_TAL2_TAL3_324701_cells_Papilla_vs_Medu