# Exploring the eigen values and correlation with phenotypes

In [None]:
suppressPackageStartupMessages({
    library(ComplexHeatmap)
    library(tidyverse)
    library(ggpubr)
})

## Prepare data

### Load phenotypes data

In [None]:
pheno = data.table::fread("../../../../input/phenotypes/merged/_m/merged_phenotypes.csv")
pheno %>% head(2)

In [None]:
ancestry = data.table::fread("../../../../input/ancestry_structure/structure.out_ancestry_proportion_raceDemo_compare")
ancestry %>% head(2)

### Load eigen values

In [None]:
eigen = data.table::fread("../../_m/eigengenes.csv")
modules = eigen %>% select(-V1) %>% colnames
eigen %>% dim
eigen[1:2, 1:5]

### Merge data

In [None]:
dt = eigen %>% left_join(pheno, by="V1") %>%
    inner_join(ancestry, by=c("BrNum"="id"))
dt %>% dim
dt[1:2, 1:5]

## Examine correlation of modules with ancestry

### Linear model

In [None]:
pvals = c()
for(mod in modules){
    model = paste0("Eur ~ ", mod)
    res = anova(lm(model, data=dt))
    pvals = c(pvals, res[mod, "Pr(>F)"])
}
fdr <- p.adjust(pvals, method="fdr")
df1 = data.frame("Modules"=modules, "Pvalue"=pvals, "FDR"=fdr)
df1 %>% filter(`Pvalue` < 0.05)

In [None]:
df1 %>% mutate(Tissue="Caudate") %>%
    data.table::fwrite("eigen_correlation_ancestry.tsv", sep='\t')

### Pearson correlation

In [None]:
pvals = c(); est = c()
for(mod in modules){
    res = cor.test(dt[["Eur"]], dt[[mod]], method="pearson")
    pvals = c(pvals, res$p.value)
    est = c(est, res$estimate[[1]])
}
fdr <- p.adjust(pvals, method="fdr")
df2 = data.frame("Modules"=modules, "Rho"=est, "Pvalue"=pvals, "FDR"=fdr)
df2 %>% filter(Pvalue < 0.05)

## Clustering modules

### Eigengene clustering

In [None]:
set.seed(13)
mat = eigen %>% column_to_rownames("V1") %>% as.matrix
pdf("eigengene_heatmap.pdf", width=10, height=10)
Heatmap(mat, name="Eigengene", column_dend_height=unit(3, "cm"), 
        show_row_names=FALSE, show_row_dend=FALSE, column_km=4)
dev.off()

### clustering within significant module violet

In [None]:
load("../../_m/01.RData", verbose=TRUE)
MEviolet = data.table::fread("../../_m/modules.csv", header=TRUE) %>% 
    filter(module == "violet")
violet.mat = datExpr %>% as.data.frame %>% 
    select(any_of(MEviolet$V1)) %>% as.matrix

In [None]:
biomart_file = "../../../../input/biomart/biomart.csv"
biomart = data.table::fread(biomart_file)
annot = data.frame("Geneid"=colnames(violet.mat), 
                   "ensembl_gene_id"=gsub("\\..*", "", colnames(violet.mat))) %>% 
    left_join(biomart, by="ensembl_gene_id")
annot$external_gene_name %>% is.na %>% sum

In [None]:
colnames(violet.mat) <- annot$external_gene_name

In [None]:
pdf("violet_module_heatmap.pdf", width=10, height=10)
set.seed(13)
row_ha = rowAnnotation(EA = dt$Eur, AA = dt$Afr)
Heatmap(violet.mat, name="Residualized\nExpression", 
        column_dend_height=unit(1, "cm"), row_km=2,
        show_row_names=FALSE, show_row_dend=TRUE,
        row_dend_width=unit(2, "cm"), 
        right_annotation = row_ha)
dev.off()

## Reproducibility Information

In [None]:
Sys.time()
proc.time()
options(width = 120)
sessioninfo::session_info()