# Exploring the eigen values and correlation with phenotypes

In [1]:
suppressPackageStartupMessages({
    library(ComplexHeatmap)
    library(tidyverse)
    library(ggpubr)
})

## Prepare data

### Load phenotypes data

In [2]:
pheno = data.table::fread("../../../../input/phenotypes/merged/_m/merged_phenotypes.csv")
pheno %>% head(2)

V1,BrNum,RNum,Region,RIN,Age,Sex,Race,Dx,mitoRate,rRNA_rate,overallMapRate
<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
R12864,Br1303,R12864,Caudate,9.6,42.98,F,AA,Schizo,0.03265387,8.675159e-05,0.9093498
R12865,Br1320,R12865,Caudate,9.5,53.12,M,AA,Schizo,0.0197874,6.976684e-05,0.873484


In [3]:
ancestry = data.table::fread("../../../../input/ancestry_structure/structure.out_ancestry_proportion_raceDemo_compare")
ancestry %>% head(2)

id,Afr,Eur,group
<chr>,<dbl>,<dbl>,<chr>
Br2374,0.007,0.993,CAUC
Br1857,0.001,0.999,CAUC


### Load eigen values

In [4]:
eigen = data.table::fread("../../_m/eigengenes.csv")
modules = eigen %>% select(-V1) %>% colnames
eigen %>% dim
eigen[1:2, 1:5]

V1,MEblack,MEblue,MEbrown,MEcyan
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
R12258,0.06699524,0.0866778,-0.13382156,0.05218133
R12259,-0.11375148,0.1743282,-0.00232706,0.09789208


### Merge data

In [5]:
dt = eigen %>% left_join(pheno, by="V1") %>%
    inner_join(ancestry, by=c("BrNum"="id"))
dt %>% dim
dt[1:2, 1:5]

V1,MEblack,MEblue,MEbrown,MEcyan
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
R12258,0.06699524,0.0866778,-0.13382156,0.05218133
R12259,-0.11375148,0.1743282,-0.00232706,0.09789208


## Examine correlation of modules with ancestry

### Linear model

In [6]:
pvals = c()
for(mod in modules){
    model = paste0("Eur ~ ", mod)
    res = anova(lm(model, data=dt))
    pvals = c(pvals, res[mod, "Pr(>F)"])
}
fdr <- p.adjust(pvals, method="fdr")
df1 = data.frame("Modules"=modules, "Pvalue"=pvals, "FDR"=fdr)
df1 %>% filter(FDR < 0.05)

Modules,Pvalue,FDR
<chr>,<dbl>,<dbl>
MEblack,0.0012916836,0.01420852
MEcyan,0.0047683585,0.03933896
MEdarkred,0.0009788253,0.01420852
MEdarkturquoise,0.000612816,0.01420852


In [7]:
df1 %>% mutate(Tissue="DLPFC") %>%
    data.table::fwrite("eigen_correlation_ancestry.tsv", sep='\t')

### Pearson correlation

In [8]:
pvals = c(); est = c()
for(mod in modules){
    res = cor.test(dt[["Eur"]], dt[[mod]], method="pearson")
    pvals = c(pvals, res$p.value)
    est = c(est, res$estimate[[1]])
}
fdr <- p.adjust(pvals, method="fdr")
df2 = data.frame("Modules"=modules, "Rho"=est, "Pvalue"=pvals, "FDR"=fdr)
df2 %>% filter(FDR < 0.05)

Modules,Rho,Pvalue,FDR
<chr>,<dbl>,<dbl>,<dbl>
MEblack,0.2232639,0.0012916836,0.01420852
MEcyan,-0.1963941,0.0047683585,0.03933896
MEdarkred,-0.2285824,0.0009788253,0.01420852
MEdarkturquoise,-0.2372978,0.000612816,0.01420852


## Clustering modules

### Eigengene clustering

In [9]:
set.seed(13)
mat = eigen %>% column_to_rownames("V1") %>% as.matrix
pdf("eigengene_heatmap.pdf", width=10, height=8)
Heatmap(mat, name="Eigengene", column_dend_height=unit(3, "cm"), 
        show_row_names=FALSE, show_row_dend=FALSE, column_km=4)
dev.off()

### clustering within significant module violet

In [10]:
load("../../_m/01.RData", verbose=TRUE)
biomart_file = "../../../../input/biomart/biomart.csv"
biomart = data.table::fread(biomart_file)

Loading objects:
  datExpr
  sample_table
  datTraits


In [11]:
get_module_mat <- function(mod){
    MEmodule = data.table::fread("../../_m/modules.csv", header=TRUE) %>% 
        filter(module == mod)
    mat0 = datExpr %>% as.data.frame %>% 
        select(any_of(MEmodule$V1)) %>% as.matrix
    annot = data.frame("Geneid"=colnames(mat0), 
                       "ensembl_gene_id"=gsub("\\..*", "", colnames(mat0))) %>% 
        left_join(biomart, by="ensembl_gene_id") %>% 
        distinct(ensembl_gene_id, .keep_all=TRUE) %>%
        mutate(external_gene_name=coalesce(external_gene_name,ensembl_gene_id))
    #annot$external_gene_name %>% is.na %>% sum
    colnames(mat0) <- annot$external_gene_name
    return(mat0)
}

plot_module_heatmap <- function(mod, w, h){
    pdf(paste0(mod,"_module_heatmap.pdf"), width=w, height=h)
    set.seed(13)
    row_ha = rowAnnotation(EA = dt$Eur, AA = dt$Afr)
    ht = Heatmap(get_module_mat(mod), 
            name="Residualized\nExpression", 
            column_dend_height=unit(1, "cm"), row_km=2,
            show_row_names=FALSE, show_row_dend=TRUE,
            row_dend_width=unit(2, "cm"), 
            right_annotation = row_ha)
    draw(ht)
    dev.off()
}

In [12]:
df1 %>% filter(FDR < 0.05)

Modules,Pvalue,FDR
<chr>,<dbl>,<dbl>
MEblack,0.0012916836,0.01420852
MEcyan,0.0047683585,0.03933896
MEdarkred,0.0009788253,0.01420852
MEdarkturquoise,0.000612816,0.01420852


In [13]:
plot_module_heatmap("black", 24, 10)
plot_module_heatmap("cyan", 20, 10)
plot_module_heatmap("darkred", 20, 10)
plot_module_heatmap("darkturquoise", 24, 10)

The automatically generated colors map from the minus and plus 99^th of
the absolute values in the matrix. There are outliers in the matrix
whose patterns might be hidden by this color mapping. You can manually
set the color to `col` argument.

Use `suppressMessages()` to turn off this message.



## Reproducibility Information

In [14]:
Sys.time()
proc.time()
options(width = 120)
sessioninfo::session_info()

[1] "2021-10-01 13:40:17 EDT"

   user  system elapsed 
 23.807   1.847  26.173 

─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
 setting  value                       
 version  R version 4.0.3 (2020-10-10)
 os       Arch Linux                  
 system   x86_64, linux-gnu           
 ui       X11                         
 language (EN)                        
 collate  en_US.UTF-8                 
 ctype    en_US.UTF-8                 
 tz       America/New_York            
 date     2021-10-01                  

─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
 package        * version  date       lib source        
 abind            1.4-5    2016-07-21 [1] CRAN (R 4.0.2)
 assertthat       0.2.1    2019-03-21 [1] CRAN (R 4.0.2)
 backports        1.2.1    2020-12-09 [1] CRAN (R 4.0.2)
 base64enc        0.1-3    2015-07-28 [1] CRAN (R 4.0.2)
 BiocGenerics     0.36.1   2021-04-16 [1] Bioconductor  
 broom            0.7.9    20