# GSEA for regional DEGs

In [1]:
suppressPackageStartupMessages({
    library(clusterProfiler)
    library(ggplot2)
    library(fs)
    library(tidyverse)
    library(org.Hs.eg.db)
})

In [2]:
root_dir <- "/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/regional_dar_deg/"
setwd(root_dir)
source_dir <- "deg"
result_dir <- "deg_gsea"

In [3]:
for (subclass_dir in dir_ls(source_dir)) {
    subclass <- path_file(subclass_dir)
    dir_create(sprintf("%s/%s", result_dir, subclass))
    background_file <- sprintf("%s/%s_filtered.tsv", subclass_dir, subclass)
    background_list <- read.table(background_file, sep = "\t")[, 1]
    
    for (region_file in dir_ls(subclass_dir, glob = "*_result*")) {
        region <- str_split_1(path_file(region_file), pattern = "_result.tsv")[1]
        region_df <- read.table(region_file, header = TRUE, sep = "\t", row.names = 1)
        deg_list <- rownames(region_df[(region_df$logFC > 0.0) & (region_df$FDR < 0.001), ])

        print(sprintf("#deg: %d; %s, %s", length(deg_list), subclass, region))
        flush.console()

        if (length(deg_list) <= 5) {
            next
        }

        curr_result <- enrichGO(
            gene = deg_list,
            universe = background_list,
            keyType = "SYMBOL",
            OrgDb = org.Hs.eg.db,
            ont = "BP",
            pvalueCutoff = 0.05,
            readable = TRUE,
        )
        if (is.null(curr_result)) {
            next
        }
        result_simplified <- clusterProfiler::simplify(curr_result)

        output_file <- sprintf("%s/%s/%s_gsea.tsv", result_dir, subclass, region)
        write.table(
            result_simplified@result,
            output_file,
            quote = FALSE,
            col.names = TRUE,
            row.names = TRUE,
            sep = "\t"
        )
    }
}

[1] "#deg: 2; Astro, A1C"
[1] "#deg: 13; Astro, A24"
[1] "#deg: 0; Astro, A9"
[1] "#deg: 0; Astro, AnG"
[1] "#deg: 0; Astro, FI"
[1] "#deg: 0; Astro, M1C"
[1] "#deg: 1; Astro, MTG"
[1] "#deg: 0; Astro, S1C"
[1] "#deg: 24; Astro, V1C"
[1] "#deg: 0; Chandelier, A1C"
[1] "#deg: 7; Chandelier, A24"
[1] "#deg: 0; Chandelier, A9"
[1] "#deg: 0; Chandelier, AnG"
[1] "#deg: 0; Chandelier, FI"
[1] "#deg: 0; Chandelier, M1C"
[1] "#deg: 0; Chandelier, MTG"
[1] "#deg: 0; Chandelier, S1C"
[1] "#deg: 53; Chandelier, V1C"
[1] "#deg: 0; Endo, A1C"
[1] "#deg: 0; Endo, A24"
[1] "#deg: 2; Endo, A9"
[1] "#deg: 0; Endo, AnG"
[1] "#deg: 0; Endo, FI"
[1] "#deg: 0; Endo, M1C"
[1] "#deg: 0; Endo, MTG"
[1] "#deg: 0; Endo, S1C"
[1] "#deg: 0; Endo, V1C"
[1] "#deg: 0; L2_3_IT, A1C"
[1] "#deg: 156; L2_3_IT, A24"
[1] "#deg: 2; L2_3_IT, A9"
[1] "#deg: 0; L2_3_IT, AnG"
[1] "#deg: 32; L2_3_IT, FI"
[1] "#deg: 59; L2_3_IT, M1C"
[1] "#deg: 2; L2_3_IT, MTG"
[1] "#deg: 97; L2_3_IT, S1C"
[1] "#deg: 1174; L2_3_IT, V1C"
[1] "#d

In [4]:
for (subclass_dir in dir_ls(source_dir)) {
    subclass <- path_file(subclass_dir)
    dir_create(sprintf("%s/%s", result_dir, subclass))
    background_file <- sprintf("%s/%s_filtered.tsv", subclass_dir, subclass)
    background_list <- read.table(background_file, sep = "\t")[, 1]
    
    deg_uniq_list <- c()
    for (region_file in dir_ls(subclass_dir, glob = "*_result*")) {
        region <- str_split_1(path_file(region_file), pattern = "_result.tsv")[1]
        region_df <- read.table(region_file, header = TRUE, sep = "\t", row.names = 1)
        deg_list <- rownames(region_df[(region_df$logFC > 0.0) & (region_df$FDR < 0.001), ])
        deg_uniq_list <- c(deg_uniq_list, deg_list)
    }
    deg_uniq_list <- unique(deg_uniq_list)

    if (length(deg_uniq_list) <= 5) {
        next
    }

    curr_result <- enrichGO(
        gene = deg_uniq_list,
        universe = background_list,
        keyType = "SYMBOL",
        OrgDb = org.Hs.eg.db,
        ont = "BP",
        pvalueCutoff = 0.05,
        readable = TRUE,
    )
    if (is.null(curr_result)) {
        next
    }
    result_simplified <- clusterProfiler::simplify(curr_result)

    output_file <- sprintf("%s/%s/%s-total-gsea.tsv", result_dir, subclass, subclass)
    write.table(
        result_simplified@result,
        output_file,
        quote = FALSE,
        col.names = TRUE,
        row.names = TRUE,
        sep = "\t"
    )
}