In [1]:
suppressMessages({
    library(tidyverse)
    library(data.table)

})


save_plot <- function(p, fn, w, h){
    for(ext in c(".pdf", ".png")){
        ggsave(filename=paste0(fn,ext), plot=p, width=w, height=h)
    }
}

dir.create('../_m/')
setwd('../_m/')

"'../_m' already exists"


In [2]:
#file_list

In [3]:
# create a list of all the filenames in the folder that end with ".csv"
file_list <- Sys.glob('../../custom_universe*/*/_m/*_gene_set_results.gsa.out')
#file_list
file_list <- file_list[file.info(file_list)$size > 0] %>% # remove empty files (no magma enrichment)
                discard(~str_detect(.x, '/custom_universe/blood_traits/_m/Cv*|/custom_universe/psych_general_traits/_m/Cv*')) # remove DE direction of effect from specific analysis for brain region x brain region 
file_list %>% head()

study_name <- gsub('.*.txt_|_gene_set.*','',file_list)
study_name %>% head()


brain_region <- gsub('.*_m/|_deg.genes.txt.*','',file_list) %>%
                gsub('_.*','',.)
brain_region <- lapply(brain_region, toupper) %>% as.character()

brain_region[brain_region == "CVD"] <- "CAUDATExDLPFC"
brain_region[brain_region == "CVH"] <- "CAUDATExHIPPOCAMPUS"

table(brain_region)
head(brain_region)




comparison <- gsub('.*_m/|_deg.genes.txt.*|','',file_list) %>%
                gsub('.*_','',.)

comparison[comparison == "betweensex"] <- "sex_by_sz"
comparison[comparison == "betweensex"] <- "sex_by_sz"



comparison %>% table()



#_betweensex_deg_genes: sex_by_sz

#XvD: region_by_sex

#maleVfemale: maleVfemale

brain_region
            CAUDATE       CAUDATExDLPFC CAUDATExHIPPOCAMPUS               DLPFC 
                 12                  12                  11                  22 
        HIPPOCAMPUS 
                 22 

.
        all maleVfemale   sex_by_sz 
         23          34          22 

In [4]:
df <- data.frame()

for (i in seq_along(file_list)) {
    dftmp <- fread(file_list[i], skip = 4) %>%
        set_names(c('VARIABLE', 'TYPE', 'NGENES', 'BETA', 'BETA_STD', 'SE', 'P')) %>%
        mutate(GWAS_set = study_name[i],
               Status = ifelse(grepl('down', VARIABLE), 'Downregulated', 'Upregulated'),
               Brain_Region = brain_region[i],
               DEComparison = comparison[i]
        ) %>%
        mutate(GWAS_set = ifelse(GWAS_set == 'SCZ', 'SCHIZOPHRENIA', GWAS_set)) %>%
        group_by(Brain_Region, DEComparison) %>%
        mutate(fdr_bh = p.adjust(P, method = "BH"),
               fdr_bonferroni = p.adjust(P, method = "bonferroni")) %>%
        select(VARIABLE,TYPE, NGENES, BETA, BETA_STD, SE, P, GWAS_set, fdr_bh, fdr_bonferroni, Status ,Brain_Region, DEComparison)
    df <- rbind(dftmp, df) %>% arrange(GWAS_set)
}

df <- df %>% 
        mutate(Status = ifelse(DEComparison == 'all', 'Both', Status),
               DEComparison = ifelse(DEComparison == 'all','region_by_sex', DEComparison))


head(df)
fwrite(df, 'magma_enrichment_results.tsv', quote = F, sep = '\t', row.names = F)


VARIABLE,TYPE,NGENES,BETA,BETA_STD,SE,P,GWAS_set,fdr_bh,fdr_bonferroni,Status,Brain_Region,DEComparison
<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>
CvH__all,SET,41,-0.063564,-0.0026278,0.14889,0.66528,ADHD,0.66528,0.66528,Both,CAUDATExHIPPOCAMPUS,region_by_sex
CvD__all,SET,439,0.044098,0.00594,0.044584,0.16132,ADHD,0.16132,0.16132,Both,CAUDATExDLPFC,region_by_sex
hippocampus__down,SET,30,-0.12791,-0.0044185,0.18894,0.75078,ADHD,0.89514,1.0,Downregulated,HIPPOCAMPUS,maleVfemale
hippocampus__up,SET,30,-0.22235,-0.0076808,0.17725,0.89514,ADHD,0.89514,1.0,Upregulated,HIPPOCAMPUS,maleVfemale
dlpfc__down,SET,75,0.063748,0.0034851,0.11852,0.29534,ADHD,0.59068,0.59068,Downregulated,DLPFC,maleVfemale
dlpfc__up,SET,102,-0.0401,-0.0025552,0.099966,0.65584,ADHD,0.65584,1.0,Upregulated,DLPFC,maleVfemale


In [5]:
subset(df , P < 0.05)

VARIABLE,TYPE,NGENES,BETA,BETA_STD,SE,P,GWAS_set,fdr_bh,fdr_bonferroni,Status,Brain_Region,DEComparison
<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>
Male__up__Hippocampus,SET,66,0.20254,0.010313,0.11082,0.033811,BASO,0.067622,0.067622,Upregulated,HIPPOCAMPUS,sex_by_sz
Male__down__DLPFC,SET,4,1.0977,0.013801,0.47235,0.010068,BASO,0.010068,0.010068,Downregulated,DLPFC,sex_by_sz
Male__up__Hippocampus,SET,65,0.3114,0.015935,0.18339,0.044755,BMI,0.08951,0.08951,Upregulated,HIPPOCAMPUS,sex_by_sz
Male__down__DLPFC,SET,4,1.302,0.016369,0.56729,0.01087,EO,0.01087,0.01087,Downregulated,DLPFC,sex_by_sz
Male__up__Hippocampus,SET,66,0.3042,0.015489,0.12965,0.0094876,NEUT,0.0189752,0.0189752,Upregulated,HIPPOCAMPUS,sex_by_sz


In [6]:
df %>% arrange(desc(abs(BETA))) %>% head(20)

VARIABLE,TYPE,NGENES,BETA,BETA_STD,SE,P,GWAS_set,fdr_bh,fdr_bonferroni,Status,Brain_Region,DEComparison
<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>
Male__down__DLPFC,SET,4,1.302,0.016369,0.56729,0.01087,EO,0.01087,0.01087,Downregulated,DLPFC,sex_by_sz
Male__down__DLPFC,SET,4,1.2273,0.015531,0.97708,0.10456,HEIGHT,0.10456,0.10456,Downregulated,DLPFC,sex_by_sz
Male__down__DLPFC,SET,4,1.0977,0.013801,0.47235,0.010068,BASO,0.010068,0.010068,Downregulated,DLPFC,sex_by_sz
Male__down__DLPFC,SET,4,0.79598,0.010007,0.59805,0.091611,MONO,0.091611,0.091611,Downregulated,DLPFC,sex_by_sz
Male__down__DLPFC,SET,4,-0.66036,-0.0083493,0.47647,0.91711,ADHD,0.91711,0.91711,Downregulated,DLPFC,sex_by_sz
hippocampus__down,SET,30,-0.49956,-0.017272,0.34626,0.92544,HEIGHT,0.92544,1.0,Downregulated,HIPPOCAMPUS,maleVfemale
CvH__all,SET,41,0.42939,0.017767,0.27638,0.060147,HEIGHT,0.060147,0.060147,Both,CAUDATExHIPPOCAMPUS,region_by_sex
hippocampus__up,SET,31,-0.37529,-0.013106,0.17701,0.983,BIPOLAR,0.983,1.0,Upregulated,HIPPOCAMPUS,maleVfemale
CvH__all,SET,42,-0.35296,-0.014687,0.14504,0.99252,BASO,0.99252,0.99252,Both,CAUDATExHIPPOCAMPUS,region_by_sex
hippocampus__down,SET,28,-0.3525,-0.011847,0.29751,0.88195,BMI,0.8937,1.0,Downregulated,HIPPOCAMPUS,maleVfemale


In [7]:
sessionInfo()

R version 4.1.2 (2021-11-01)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Arch Linux

Matrix products: default
BLAS:   /usr/lib/libopenblasp-r0.3.18.so
LAPACK: /usr/lib/liblapack.so.3.10.0

locale:
[1] C

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] data.table_1.14.8 lubridate_1.9.2   forcats_1.0.0     stringr_1.5.0    
 [5] dplyr_1.1.2       purrr_1.0.2       readr_2.1.4       tidyr_1.3.0      
 [9] tibble_3.2.1      ggplot2_3.4.2     tidyverse_2.0.0  

loaded via a namespace (and not attached):
 [1] pillar_1.9.0     compiler_4.1.2   base64enc_0.1-3  tools_4.1.2     
 [5] digest_0.6.33    uuid_1.1-0       timechange_0.2.0 jsonlite_1.8.7  
 [9] evaluate_0.21    lifecycle_1.0.3  gtable_0.3.3     pkgconfig_2.0.3 
[13] rlang_1.1.1      IRdisplay_1.1    cli_3.6.1        IRkernel_1.3.2  
[17] fastmap_1.1.1    repr_1.1.6       withr_2.5.0      hms_1.1.3       
[21] generics_0.1.3   vctrs_0.6.3      g