In [1]:
# Conversion libraries and Seurat
library(SeuratDisk)
library(SeuratData)
library(Seurat)
library(Signac)

# plotting and data science packages
library(tidyverse)
library(cowplot)
library(patchwork)

# co-expression network analysis packages:
library(WGCNA)
library(hdWGCNA)

# using the cowplot theme for ggplot
theme_set(theme_cowplot())

# set random seed for reproducibility
set.seed(12345)

The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
which was just loaded, will retire in October 2023.
Please refer to R-spatial evolution reports for details, especially
https://r-spatial.org/r/2023/05/15/evolution4.html.
It may be desirable to make the sf package available;
package maintainers should consider adding sf to Suggests:.
The sp package is now running under evolution status 2
     (status 2 uses the sf package in place of rgdal)

Registered S3 method overwritten by 'SeuratDisk':
  method            from  
  as.sparse.H5Group Seurat

Attaching SeuratObject

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.2     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.4.2     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtidyr    

In [23]:
raw_path <- "/cellar/users/aklie/projects/igvf/topic_grn_links/grn_inference/hdwgcna/results/Bridge_Satpathy/balanced_genotype_microglia/0.05/raw/both"
normalized_path <- "/cellar/users/aklie/projects/igvf/topic_grn_links/grn_inference/hdwgcna/results/Bridge_Satpathy/balanced_genotype_microglia/0.05/log1p_cp10k/both"

In [30]:
cnts_mtx <- Read10X(data.dir = raw_path)
normalized_mtx <- Read10X(data.dir = normalized_path)
metadata <- read.csv(file.path(dir_path, "metadata.csv.gz"), row.names = 1)

In [31]:
# Create the Seurat object from the raw counts
seurat_obj <- CreateSeuratObject(cnts_mtx, meta.data = metadata)
seurat_obj

An object of class Seurat 
6277 features across 2996 samples within 1 assay 
Active assay: RNA (6277 features, 0 variable features)

In [32]:
# Add the normalized counts to the "data" slot of the "RNA" assay
seurat_obj[["RNA"]]@data <- normalized_mtx

In [33]:
# Finish preprocessing steps
seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(seurat_obj)
seurat_obj <- ScaleData(seurat_obj, features = all.genes)
seurat_obj <- RunPCA(seurat_obj, features = VariableFeatures(object = seurat_obj))

Centering and scaling data matrix

PC_ 1 
Positive:  Gm3005, Gm3252, Gm3667, Gm3164, Rpl15, Ugt1a7c, Gm3468, Gm3373, Gm5796, Pecam1 
	   Gm3194, Gm3500, Gm14044, Gm8108, Gm3020, Gm10409, Stab1, Gm26650, Hpgd, ENSMUSG00000120942 
	   Gm35154, 9930111J21Rik2, Trim30a, 2610042L04Rik, Gal3st4, Gm26893, Gm3558, Gm3752, Apobec1, Gm26552 
Negative:  Meg3, Fgf14, Csmd1, Adgrb3, Dlgap2, Lrrc7, Snhg11, Nrxn3, Erc2, Nrg3 
	   Nlgn1, Kcnd2, Nrg1, Opcml, Tenm2, Fgf12, Asic2, Cadm2, Grm7, Macrod2 
	   Adgrl3, Rims2, Negr1, Mdga2, Anks1b, Rims1, Plcb1, Grin2a, Ctnna2, Gabrb3 
PC_ 2 
Positive:  Rarb, Rgs9, Dach1, Adcy5, Gm10754, Unc13c, Kcnab1, Ryr3, Spock3, Dgkb 
	   Gng7, Gnal, Srgap1, Pard3, Meis2, Pde1c, Adamts3, Pde7b, Slc4a4, Stox2 
	   Rbms3, A330015K06Rik, Dcc, Bcl11b, Igsf11, Kirrel3, Kctd8, Reln, Inf2, Sv2c 
Negative:  Car10, Kcnt2, Pdzrn3, Satb2, Adcy2, Kcnj3, Fhod3, Cdh12, Cadps2, Rorb 
	   Kcnj6, Clstn2, Dpp10, Zmat4, Mlip, A830082K12Rik, Miat, Trhde, Unc5d, Mical2 
	   Ralyl, Dscam, Rgs6

In [34]:
# Set-up a Seurat object for WGCNA
seurat_obj <- SetupForWGCNA(
    seurat_obj,
    gene_select = "fraction", # the gene selection approach
    fraction = 0, # fraction of cells that a gene needs to be expressed in order to be included
    wgcna_name = "test" # the name of the hdWGCNA experiment
)

In [39]:
# Construct metacells n each group
seurat_obj <- MetacellsByGroups(
  seurat_obj=seurat_obj,
  group.by=c("Genotype", "sample"), # specify the columns in adata@meta.data to group by
  reduction="pca",
  k=25, # nearest-neighbors parameter
  max_shared=15, # maximum number of shared cells between two metacells
  ident.group='sample', # set the Idents of the metacell seurat object
  assay="RNA",
  slot="counts",
  min_cells=50
)

“Removing the following groups that did not meet min_cells: B6J#114”


In [40]:
# normalize metacell expression matrix:
seurat_obj <- NormalizeMetacells(seurat_obj)

In [42]:
# transpose the matrix, taking care of the
seurat_obj <- SetDatExpr(
    seurat_obj, 
    assay="RNA", 
    use_metacells=TRUE, 
    wgcna_name="test", 
    slot="data"
)

In [43]:
# Test different soft powers:
seurat_obj <- TestSoftPowers(
  seurat_obj,
  use_metacells=TRUE,  # this is the default, I'm just being explicit
  setDatExpr=FALSE  # set this to FALSE since we did this above
)

pickSoftThreshold: will use block size 6277.
 pickSoftThreshold: calculating connectivity for given powers...
   ..working on genes 1 through 6277 of 6277


“executing %dopar% sequentially: no parallel backend registered”
“bicor: zero MAD in variable 'x'. Pearson correlation was used for individual columns with zero (or missing) MAD.”
“bicor: zero MAD in variable 'y'. Pearson correlation was used for individual columns with zero (or missing) MAD.”


   Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k. max.k.
1      1   0.0421  13.90          0.975 3150.000  3.15e+03 3250.0
2      2   0.4270 -19.50          0.941 1620.000  1.61e+03 1780.0
3      3   0.8880 -11.60          0.936  855.000  8.38e+02 1080.0
4      4   0.9590  -7.05          0.954  464.000  4.42e+02  736.0
5      5   0.9670  -4.74          0.957  259.000  2.36e+02  537.0
6      6   0.9640  -3.56          0.954  149.000  1.28e+02  412.0
7      7   0.9650  -2.83          0.957   88.400  7.01e+01  329.0
8      8   0.9660  -2.37          0.960   54.500  3.89e+01  271.0
9      9   0.9650  -2.09          0.956   34.900  2.19e+01  228.0
10    10   0.9590  -1.89          0.948   23.200  1.24e+01  195.0
11    12   0.9550  -1.63          0.948   11.500  4.13e+00  146.0
12    14   0.9670  -1.45          0.969    6.450  1.44e+00  113.0
13    16   0.9570  -1.36          0.968    4.000  5.21e-01   89.4
14    18   0.9320  -1.32          0.948    2.650  1.93e-01   71.6
15    20  

In [47]:
# get the power table, can also access with head(get(NAME, seurat_obj@misc)$wgcna_powerTable)
power_table <- GetPowerTable(seurat_obj)
power <- power_table$Power[which(power_table$SFT.R.sq > 0.90)[1]]
power

In [48]:
dat <- get("test", seurat_obj@misc)$datExpr

In [49]:
adj <- WGCNA::adjacency(
    datExpr=dat,
    type="signed",
    power=power
)

In [50]:
getLinkList <- function(weightMatrix, reportMax=NULL, threshold=0) {
    if(!is.numeric(threshold)) {
    	stop("threshold must be a number.")
    }

	# Only process weights off-diagonal
  regulatorsInTargets <- rownames(weightMatrix)[rownames(weightMatrix) %in% colnames(weightMatrix)]
  if(length(regulatorsInTargets)==1) weightMatrix[regulatorsInTargets, regulatorsInTargets] <- NA
  if(length(regulatorsInTargets)>1) diag(weightMatrix[regulatorsInTargets, regulatorsInTargets]) <- NA

  linkList <- reshape2::melt(weightMatrix, na.rm=TRUE)
  colnames(linkList) <- c("regulatoryGene", "targetGene", "weight")
  linkList <- linkList[linkList$weight>=threshold,]
  linkList <- linkList[order(linkList$weight, decreasing=TRUE),]
  
  if(!is.null(reportMax)) {
  	linkList <- linkList[1:min(nrow(linkList), reportMax),]
  }
  rownames(linkList) <- NULL
  
  uniquePairs <- nrow(unique(linkList[,c("regulatoryGene", "targetGene")]))
  if(uniquePairs < nrow(linkList)) 
    warning("There might be duplicated regulator-target (gene id/name) pairs.")
  
  return(linkList)
}

In [51]:
link.list <- getLinkList(adj)

In [52]:
head(link.list)

Unnamed: 0_level_0,regulatoryGene,targetGene,weight
Unnamed: 0_level_1,<fct>,<fct>,<dbl>
1,mt-Co3,Gm28437,1
2,mt-Co2,Gm28661,1
3,Nnt.1,Nnt,1
4,Nnt,Nnt.1,1
5,Entpd4b,Entpd4,1
6,Entpd4,Entpd4b,1


In [None]:
# Save the adjacency list of the left cortex to a file
write.table(link.list, file = file.path(OUT, "adj.tsv"), sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE)