# Exploring the eigen values and correlation with phenotypes

In [1]:
suppressPackageStartupMessages({
    library(ComplexHeatmap)
    library(tidyverse)
    library(ggpubr)
})

## Prepare data

### Load phenotypes data

In [2]:
pheno = data.table::fread("../../../../input/phenotypes/merged/_m/merged_phenotypes.csv")
pheno %>% head(2)

V1,BrNum,RNum,Region,RIN,Age,Sex,Race,Dx,mitoRate,rRNA_rate,overallMapRate
<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
R12864,Br1303,R12864,Caudate,9.6,42.98,F,AA,Schizo,0.03265387,8.675159e-05,0.9093498
R12865,Br1320,R12865,Caudate,9.5,53.12,M,AA,Schizo,0.0197874,6.976684e-05,0.873484


In [3]:
ancestry = data.table::fread("../../../../input/ancestry_structure/structure.out_ancestry_proportion_raceDemo_compare")
ancestry %>% head(2)

id,Afr,Eur,group
<chr>,<dbl>,<dbl>,<chr>
Br2374,0.007,0.993,CAUC
Br1857,0.001,0.999,CAUC


### Load eigen values

In [4]:
eigen = data.table::fread("../../_m/eigengenes.csv")
modules = eigen %>% select(-V1) %>% colnames
eigen %>% dim
eigen[1:2, 1:5]

V1,MEblack,MEblue,MEbrown,MEcyan
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
R10700,-0.02635168,-0.03415886,-0.02078395,0.05489625
R10706,-0.06053349,-0.07077148,-0.03261367,0.0779503


### Merge data

In [5]:
dt = eigen %>% left_join(pheno, by="V1") %>%
    inner_join(ancestry, by=c("BrNum"="id"))
dt %>% dim
dt[1:2, 1:5]

V1,MEblack,MEblue,MEbrown,MEcyan
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
R10700,-0.02635168,-0.03415886,-0.02078395,0.05489625
R10706,-0.06053349,-0.07077148,-0.03261367,0.0779503


## Examine correlation of modules with ancestry

### Linear model

In [6]:
pvals = c()
for(mod in modules){
    model = paste0("Eur ~ ", mod)
    res = anova(lm(model, data=dt))
    pvals = c(pvals, res[mod, "Pr(>F)"])
}
fdr <- p.adjust(pvals, method="fdr")
df1 = data.frame("Modules"=modules, "Pvalue"=pvals, "FDR"=fdr)
df1 %>% filter(FDR < 0.05)

Modules,Pvalue,FDR
<chr>,<dbl>,<dbl>
MEblack,0.007602563,0.04966458
MEdarkgrey,0.0007190295,0.009347383
MEdarkolivegreen,0.001664925,0.01623302
MEgrey,9.032128e-06,0.0001761265
MEgrey60,4.711608e-08,1.837527e-06
MElightcyan,0.009443591,0.04966458
MEmagenta,0.007749418,0.04966458
MEskyblue,0.01018761,0.04966458


In [7]:
df1 %>% mutate(Tissue="Dentate Gyrus") %>%
    data.table::fwrite("eigen_correlation_ancestry.tsv", sep='\t')

### Pearson correlation

In [8]:
pvals = c(); est = c()
for(mod in modules){
    res = cor.test(dt[["Eur"]], dt[[mod]], method="pearson")
    pvals = c(pvals, res$p.value)
    est = c(est, res$estimate[[1]])
}
fdr <- p.adjust(pvals, method="fdr")
df2 = data.frame("Modules"=modules, "Rho"=est, "Pvalue"=pvals, "FDR"=fdr)
df2 %>% filter(FDR < 0.05)

Modules,Rho,Pvalue,FDR
<chr>,<dbl>,<dbl>,<dbl>
MEblack,0.2843511,0.007602563,0.04966458
MEdarkgrey,-0.3558145,0.0007190295,0.009347383
MEdarkolivegreen,0.332276,0.001664925,0.01623302
MEgrey,-0.456054,9.032128e-06,0.0001761265
MEgrey60,0.5453477,4.711608e-08,1.837527e-06
MElightcyan,0.276817,0.009443591,0.04966458
MEmagenta,-0.2836943,0.007749418,0.04966458
MEskyblue,0.2741344,0.01018761,0.04966458


## Clustering modules

### Eigengene clustering

In [9]:
set.seed(13)
mat = eigen %>% column_to_rownames("V1") %>% as.matrix
pdf("eigengene_heatmap.pdf", width=10, height=8)
Heatmap(mat, name="Eigengene", column_dend_height=unit(3, "cm"), 
        show_row_names=FALSE, show_row_dend=FALSE, column_km=4)
dev.off()

### clustering within significant module violet

In [10]:
load("../../_m/01.RData", verbose=TRUE)
biomart_file = "../../../../input/biomart/biomart.csv"
biomart = data.table::fread(biomart_file)

Loading objects:
  datExpr
  sample_table
  datTraits


In [11]:
get_module_mat <- function(mod){
    MEmodule = data.table::fread("../../_m/modules.csv", header=TRUE) %>% 
        filter(module == mod)
    mat0 = datExpr %>% as.data.frame %>% 
        select(any_of(MEmodule$V1)) %>% as.matrix
    annot = data.frame("Geneid"=colnames(mat0), 
                       "ensembl_gene_id"=gsub("\\..*", "", colnames(mat0))) %>% 
        left_join(biomart, by="ensembl_gene_id") %>% 
        distinct(ensembl_gene_id, .keep_all=TRUE) %>%
        mutate(external_gene_name=coalesce(external_gene_name,ensembl_gene_id))
    #annot$external_gene_name %>% is.na %>% sum
    colnames(mat0) <- annot$external_gene_name
    return(mat0)
}

plot_module_heatmap <- function(mod, w, h){
    pdf(paste0(mod,"_module_heatmap.pdf"), width=w, height=h)
    set.seed(13)
    row_ha = rowAnnotation(EA = dt$Eur, AA = dt$Afr)
    ht = Heatmap(get_module_mat(mod), 
            name="Residualized\nExpression", 
            column_dend_height=unit(1, "cm"), row_km=2,
            show_row_names=FALSE, show_row_dend=TRUE,
            row_dend_width=unit(2, "cm"), 
            right_annotation = row_ha)
    draw(ht)
    dev.off()
}

In [12]:
df1 %>% filter(FDR < 0.05)

Modules,Pvalue,FDR
<chr>,<dbl>,<dbl>
MEblack,0.007602563,0.04966458
MEdarkgrey,0.0007190295,0.009347383
MEdarkolivegreen,0.001664925,0.01623302
MEgrey,9.032128e-06,0.0001761265
MEgrey60,4.711608e-08,1.837527e-06
MElightcyan,0.009443591,0.04966458
MEmagenta,0.007749418,0.04966458
MEskyblue,0.01018761,0.04966458


In [13]:
plot_module_heatmap("black", 24, 8)
plot_module_heatmap("darkgrey", 16, 8)
plot_module_heatmap("darkolivegreen", 12, 8)
plot_module_heatmap("grey60", 20, 8)
plot_module_heatmap("lightcyan", 20, 8)
plot_module_heatmap("magenta", 20, 8)
plot_module_heatmap("skyblue", 14, 8)

In [14]:
plot_module_heatmap("grey", 20, 8)

The automatically generated colors map from the minus and plus 99^th of
the absolute values in the matrix. There are outliers in the matrix
whose patterns might be hidden by this color mapping. You can manually
set the color to `col` argument.

Use `suppressMessages()` to turn off this message.

`use_raster` is automatically set to TRUE for a matrix with more than
2000 columns You can control `use_raster` argument by explicitly
setting TRUE/FALSE to it.

Set `ht_opt$message = FALSE` to turn off this message.

'magick' package is suggested to install to give better rasterization.

Set `ht_opt$message = FALSE` to turn off this message.



## Reproducibility Information

In [15]:
Sys.time()
proc.time()
options(width = 120)
sessioninfo::session_info()

[1] "2021-10-01 13:35:41 EDT"

   user  system elapsed 
 60.478   2.514  63.595 

─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
 setting  value                       
 version  R version 4.0.3 (2020-10-10)
 os       Arch Linux                  
 system   x86_64, linux-gnu           
 ui       X11                         
 language (EN)                        
 collate  en_US.UTF-8                 
 ctype    en_US.UTF-8                 
 tz       America/New_York            
 date     2021-10-01                  

─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
 package        * version  date       lib source        
 abind            1.4-5    2016-07-21 [1] CRAN (R 4.0.2)
 assertthat       0.2.1    2019-03-21 [1] CRAN (R 4.0.2)
 backports        1.2.1    2020-12-09 [1] CRAN (R 4.0.2)
 base64enc        0.1-3    2015-07-28 [1] CRAN (R 4.0.2)
 BiocGenerics     0.36.1   2021-04-16 [1] Bioconductor  
 broom            0.7.9    20