# hdWGCNA: Run WGCNA on igvf_b01_LeftCortex Microglia cells
**Authorship:** Adam Klie (last updated: 08/07/2023)<br>
***
**Description:** This notebook 
***

In [13]:
# Conversion libraries and Seurat
library(SeuratDisk)
library(SeuratData)
library(Seurat)
library(Signac)

# plotting and data science packages
library(tidyverse)
library(cowplot)
library(patchwork)

# co-expression network analysis packages:
library(WGCNA)
library(hdWGCNA)

# using the cowplot theme for ggplot
theme_set(theme_cowplot())

# set random seed for reproducibility
set.seed(12345)

In [14]:
# TODO: change to relative path based on your dataset
seurat_dir <- "/cellar/users/aklie/data/igvf/topic_grn_links/seurat"
dataset_name <- "igvf_b01_LeftCortex"
results_dir <- "/cellar/users/aklie/projects/igvf/topic_grn_links/grn_inference/hdwgcna/results"
analysis <- "network_construction"

In [15]:
# Parameters
CELL.TYPES <- "all"
ASSAY <- "RNA"
NORMALIZATION <- "ND"
NN <- 25
GENES <- "all"

# Output fie path
NAME <- paste0(CELL.TYPES, "cells_", GENES, "genes_", NORMALIZATION, "_", NN, "neighbors")
OUT <- file.path(results_dir, dataset_name, "Microglia", "filtered", analysis)
OUT

In [16]:
# Load in the pre-processed data
seurat_obj <- readRDS(file=file.path(OUT, paste0(NAME, "_hdWGCNA.rds")))

In [17]:
power <- 3

In [41]:
# For TOM nonsense
setwd(OUT)

In [19]:
# construct co-expression network:
seurat_obj <- ConstructNetwork(
  seurat_obj, 
  soft_power=power,
  use_metacells=TRUE,
  setDatExpr=FALSE,
  tom_out_dir=OUT,
  tom_name=NAME # name of the topoligical overlap matrix written to disk
)

 Calculating consensus modules and module eigengenes block-wise from all genes
 Calculating topological overlaps block-wise from all genes
   Flagging genes and samples with too many missing values...
    ..step 1


    TOM calculation: adjacency..
    ..will not use multithreading.
     Fraction of slow calculations: 0.000000
    ..connectivity..
    ..matrix multiplication (system BLAS)..
    ..normalization..
    ..done.
 ..Working on block 1 .
 ..Working on block 1 .
 ..merging consensus modules that are too close..


In [20]:
# compute all MEs in the full single-cell dataset
seurat_obj <- ModuleEigengenes(
    seurat_obj, 
    assay="RNA", 
    verbose=FALSE
)

[1] "turquoise"


Centering and scaling data matrix

“Keys should be one or more alphanumeric characters followed by an underscore, setting key from pcaturquoise to pcaturquoise_”
“All keys should be one or more alphanumeric characters followed by an underscore '_', setting key to pcaturquoise_”


[1] "grey"


Centering and scaling data matrix

“Keys should be one or more alphanumeric characters followed by an underscore, setting key from pcagrey to pcagrey_”
“All keys should be one or more alphanumeric characters followed by an underscore '_', setting key to pcagrey_”


[1] "blue"


Centering and scaling data matrix

“Keys should be one or more alphanumeric characters followed by an underscore, setting key from pcablue to pcablue_”
“All keys should be one or more alphanumeric characters followed by an underscore '_', setting key to pcablue_”


In [21]:
# compute eigengene-based connectivity (kME):
seurat_obj <- ModuleConnectivity(
    seurat_obj,
    assay=ASSAY,
    slot="data",
    harmonized=FALSE
)

“bicor: zero MAD in variable 'x'. Pearson correlation was used for individual columns with zero (or missing) MAD.”


In [25]:
seurat_obj

$active_wgcna
[1] "allcells_allgenes_ND_25neighbors"

$allcells_allgenes_ND_25neighbors
$allcells_allgenes_ND_25neighbors$wgcna_group
[1] "all"

$allcells_allgenes_ND_25neighbors$wgcna_genes
   [1] "Xkr4"                  "Atp6v1h"               "Rb1cc1"               
   [4] "Pcmtd1"                "Sntg1"                 "Vcpip1"               
   [7] "Sgk3"                  "Cops5"                 "Cspp1"                
  [10] "Arfgef1"               "Ncoa2"                 "Tram1"                
  [13] "Lactb2"                "Kcnb2"                 "Terf1"                
  [16] "Stau2"                 "Ube2w"                 "Kcnq5"                
  [19] "Rims1"                 "Ogfrl1"                "Smap1"                
  [22] "Fam135a"               "Lmbrd1"                "Adgrb3"               
  [25] "Phf3"                  "Zfp451"                "Bend6"                
  [28] "Dst"                   "Fam168b"               "Hs6st1"               
  [31] "Uggt1"     

In [28]:
head(get(NAME, seurat_obj@misc)$datExpr)[1:5, c("Dgki", "Nav3", "Nhs")]

Unnamed: 0_level_0,Dgki,Nav3,Nhs
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>
B6J#13A_1,0.0,3.771756,1.4052497
B6J#13A_2,0.4131431,3.893589,0.9300689
B6J#13A_3,0.3213089,3.670514,1.1858868
B6J#13A_4,0.7099086,3.695969,1.4113324
B6J#13A_5,0.8877516,3.978202,0.0


In [30]:
dat <- get(NAME, seurat_obj@misc)$datExpr

In [32]:
adj <- WGCNA::adjacency(
    datExpr=dat,
    type="signed",
    power=3
)

In [36]:
getLinkList <- function(weightMatrix, reportMax=NULL, threshold=0) {
    if(!is.numeric(threshold)) {
    	stop("threshold must be a number.")
    }

	# Only process weights off-diagonal
  regulatorsInTargets <- rownames(weightMatrix)[rownames(weightMatrix) %in% colnames(weightMatrix)]
  if(length(regulatorsInTargets)==1) weightMatrix[regulatorsInTargets, regulatorsInTargets] <- NA
  if(length(regulatorsInTargets)>1) diag(weightMatrix[regulatorsInTargets, regulatorsInTargets]) <- NA

  linkList <- reshape2::melt(weightMatrix, na.rm=TRUE)
  colnames(linkList) <- c("regulatoryGene", "targetGene", "weight")
  linkList <- linkList[linkList$weight>=threshold,]
  linkList <- linkList[order(linkList$weight, decreasing=TRUE),]
  
  if(!is.null(reportMax)) {
  	linkList <- linkList[1:min(nrow(linkList), reportMax),]
  }
  rownames(linkList) <- NULL
  
  uniquePairs <- nrow(unique(linkList[,c("regulatoryGene", "targetGene")]))
  if(uniquePairs < nrow(linkList)) 
    warning("There might be duplicated regulator-target (gene id/name) pairs.")
  
  return(linkList)
}

In [39]:
link.list <- getLinkList(adj)

In [40]:
link.list

regulatoryGene,targetGene,weight
<fct>,<fct>,<dbl>
Dgki,Nav3,0.9342583
Nav3,Dgki,0.9342583
Abi3bp,Nav3,0.9130226
Nav3,Abi3bp,0.9130226
Dgki,Abi3bp,0.8899119
Abi3bp,Dgki,0.8899119
Nhs,Nav3,0.8826017
Nav3,Nhs,0.8826017
Rapgef5,Nav3,0.8762839
Nav3,Rapgef5,0.8762839


In [35]:
library(GENIE3)

ERROR: Error in library(GENIE3): there is no package called ‘GENIE3’


In [58]:
# Save the fully processed Seurat object to be used in all the other notebooks
saveRDS(seurat_obj, file=file.path(OUT, paste0(NAME, "_hdWGCNA.rds")))

# DONE!

---

# Scratch