In [1]:
library(dplyr)
library(Matrix)
library(fgsea)


载入程序包：‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
source('fun.R')
setwd('~/workspace/Mida_collab/')


载入程序包：‘igraph’


The following objects are masked from ‘package:dplyr’:

    as_data_frame, groups, union


The following objects are masked from ‘package:stats’:

    decompose, spectrum


The following object is masked from ‘package:base’:

    union




In [3]:
###Setting parameters
exp <- 0.04
used_pct <- 0.05
size <- 20

In [4]:
RGG_meta <- read.csv('CellOracle/gradient_table2.vRG.tsv',sep='\t') %>% filter(flag == 'Pass')
(RGG_meta$p_val_adj <= 0.05 ) %>% all() ###check

In [5]:
all_genes <- read.csv('CellOracle/all_genes_exp.csv') %>%
             filter((expression >= exp) & (pct >= used_pct))
TF_gene_score <- read.csv('CellOracle/TF_gene_score.csv') %>% filter(TF %in% all_genes$X) %>% filter(gene %in% all_genes$X)

In [6]:
TF_gene_score %>% head()

Unnamed: 0_level_0,X,TF,gene,TF_gene_score
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>
1,ARID2_ABCA12,ARID2,ABCA12,0.004387156
2,ARID2_ABHD15-AS1,ARID2,ABHD15-AS1,0.004592286
3,ARID2_ABTB2,ARID2,ABTB2,0.001376487
4,ARID2_AC006059.1,ARID2,AC006059.1,0.007425303
5,ARID2_AC006387.1,ARID2,AC006387.1,0.002369358
6,ARID2_AC007389.1,ARID2,AC007389.1,0.000856486


In [7]:
lapply(unique(RGG_meta$Module),function(module){
    tmp <- RGG_meta %>% filter(Module==module) %>% .$gene
    return(tmp)
}) %>% setNames(unique(RGG_meta$Module)) -> gene_module

In [8]:
all_TFs <- unique(TF_gene_score$TF)
lapply(all_TFs,function(used_TF){
    tmp_df <- TF_gene_score %>% filter(TF==used_TF)
    return(setNames(tmp_df$TF_gene_score,tmp_df$gene))
}) %>% setNames(all_TFs) -> TF_gene_score_list

In [9]:
###Calculate the GSEA enrichment for all TFs and gene modules, see run_TF_module_GSEA in fun.R for details
res <- run_TF_module_GSEA(TF_gene_score_list, gene_module,minsize = size)

In [10]:
lapply(names(gene_module),function(module){
    tmp_res <- res %>% filter(pathway == module)
    tmp_res$padj.2 <- p.adjust(tmp_res$padj,method = 'fdr') 
    tmp_res <- tmp_res %>% as.data.frame() 
    return(tmp_res)
}) %>% do.call(rbind,.) -> res_new

In [11]:
res_new$genes <- sapply(res_new$leadingEdge,function (x) paste0(x,collapse = ','))
res_new <- res_new %>% mutate(module = pathway) %>% select(c('tf','module','NES','size','padj.2','genes'))
res_mat <- data.frame(row.names = unique(res_new$tf))
res_mat_NES <- data.frame(row.names = unique(res_new$tf))

In [12]:
###Fill TF-module matrix, including pvalue matrix and NES(normalized enrichment score) matrix
for (used_module in c('2','3','4','5')){
    for (used_TF in unique(res_new$tf)){
        tmp_res <- res_new %>% filter(tf == used_TF) %>% filter(module == used_module)
        if (nrow(tmp_res) == 1){
            res_mat[used_TF,used_module] <- tmp_res[1,'padj.2']
            res_mat_NES[used_TF,used_module] <- tmp_res[1,'NES']
            }
        else if (nrow(tmp_res)==0){
            res_mat[used_TF,used_module] <- NA
            res_mat_NES[used_TF,used_module] <- NA
            }
        }
    }

In [13]:
###we use pvalue<=0.05 and NES >=1.5 to make a TF-module sigfinicant, thus we have a binarized TF-module matrix for downstream analysis
res_mat_bi <- (res_mat <= 0.05) & (abs(res_mat_NES) >=1.5) 
res_mat_bi[is.na(res_mat_bi)] <- FALSE
res_mat_bi <- as.data.frame(res_mat_bi)
res_mat_bi <- res_mat_bi[rowSums(res_mat_bi)!=0,c("4", "5", "3","2")]

In [14]:
write.csv(res_new,paste0('CellOracle/output/GSEA_res_df_exp',exp,'pct',used_pct,'size',size,'.csv'),row.names=FALSE)

write.csv(res_mat,paste0('CellOracle/output/GSEA_res_mat_pvalue_exp',exp,'pct',used_pct,'size',size,'.csv'))

write.csv(res_mat_NES,paste0('CellOracle/output/GSEA_res_mat_NES_exp',exp,'pct',used_pct,'size',size,'.csv'))

write.csv(res_mat_bi,paste0('CellOracle/output/GSEA_res_mat_bi_exp',exp,'pct',used_pct,'size',size,'.csv'))