In [1]:
import rpy2
import scipy
import logging
import warnings
import anndata2ri
import pandas as pd
import scanpy as sc
import numpy as np
import seaborn as sb
import decoupler as dc
import scrublet as scr
import decoupler as dc
from scipy import sparse
from anndata import AnnData
from tabnanny import verbose
import matplotlib.pyplot as plt
from gsva_prep import prep_gsva
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
from typing import Optional, Union
from matplotlib.pyplot import rcParams
from functions import pathway_analyses
from statsmodels.stats.multitest import multipletests
from sklearn.model_selection import train_test_split
from pytorch_lightning.loggers import TensorBoardLogger
from rpy2.robjects.conversion import localconverter

In [2]:
def get_sys_dpi(width, height, diag):
    '''
    obtain dpi of system
    
    w: width in pixels (if unsure, go vist `whatismyscreenresolution.net`)
    h: height in pixels
    d: diagonal in inches
    '''
    w_inches = (diag**2/ (1 + height**2/width**2))**0.5
    return round(width/w_inches)

In [3]:
# # Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

# # Automatically convert rpy2 outputs to pandas dataframes
# pandas2ri.activate()
# anndata2ri.activate()
# %load_ext rpy2.ipython

warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# Automatically convert rpy2 outputs to pandas dataframes
pandas2ri.activate()
anndata2ri.activate()
%load_ext rpy2.ipython

rcParams['figure.dpi'] = get_sys_dpi(1512, 982, 14.125)
#rcParams['figure.figsize']=(4,4) #rescale figures

sc.settings.verbosity = 3
#sc.set_figure_params(dpi=200, dpi_save=300)
sc.logging.print_versions()



-----
anndata     0.8.0
scanpy      1.9.1
-----
OpenSSL                     22.0.0
PIL                         9.2.0
absl                        NA
anndata2ri                  1.1
appnope                     0.1.2
asttokens                   NA
astunparse                  1.6.3
attr                        21.4.0
backcall                    0.2.0
beta_ufunc                  NA
binom_ufunc                 NA
boto3                       1.26.32
botocore                    1.29.32
bottleneck                  1.3.5
brotli                      NA
certifi                     2022.09.24
cffi                        1.15.1
cloudpickle                 2.2.0
colorama                    0.4.4
cryptography                38.0.1
cycler                      0.10.0
cython_runtime              NA
dask                        2022.11.0
dateutil                    2.8.2
debugpy                     1.5.1
decorator                   5.1.1
decoupler                   1.4.0
defusedxml                  0.7.1
de

In [6]:
save_prefix = 'leng_etc'
map_meta = True
deg_method =  'DESeq2-Wald'
test_names = ['late_vs_early', 'early_vs_no', 'late_vs_no', 'ad_vs_no']
filter_genes = "TRUE"
subject_id = 'PatientID'                # for leng this is `PatientID` for mathys is 'Subject', and 'individualID' for allen
gene_celltype_threshold = 0.05          # determines number of cells the gene must be expressed in 
covariates = ['None']                   # list of covariates to be accounted for in regression.
gene_selection = 'custom'               # specifies the gene selection method when setting up seurat object for WGCNA. The th
celltypes = ["Excitatory", "Inhibitory", "Astrocyte", "Microglia", "Oligodendrocyte", "OPC"] #["Excitatory", "Inhibitory", "Astrocyte", "Microglia", "Oligodendrocyte", "OPC"]
metadata = f'../data/raw/{save_prefix}/{save_prefix}_metadata.csv' 
meta = pd.read_csv(metadata, encoding_errors='ignore')

gene_selection = 'custom'   # specifies the gene selection method when setting up seurat object for WGCNA. The th
                            # Posible values are "custom", "fraction", "variable"
                            # If custom, a list of genes must be passed.

## **Load Seurat object for Enrichment and Differential Module Eigen Gene Analysis**

In [8]:
%%R -i subject_id -i gene_celltype_threshold -i celltypes -i test_names -i save_prefix -i gene_selection -o seurat_obj

seurat_obj <- list()

for (cell_type in celltypes) {
    
    print(paste0('Loaded data for hdWGCNA Experiment in ', toupper(cell_type)))
    
    seura_obj[[cell_type]] <- readRDS(paste0("../results/hdWGCNA/SeuratObject/", save_prefix, '/', cell_type, '_hdWGCNA_object.rds'))
    print(seura_obj[[cell_type]])
}

print('loaded data')

class: SingleCellExperiment 
dim: 16585 41432 
metadata(0):
assays(1): X
rownames(16585): SAMD11 NOC2L ... S100B PRMT2
rowData names(12): mt n_cells_by_counts ... mean std
colnames(41432): EC2_AAACCTGAGGATGCGT EC2_AAACCTGAGTCAATAG ...
  EC10_TTTGTCATCTATCGCC EC10_TTTGTCATCTCTGCTG
colData names(24): SampleID PatientID ... predictions pathology.group
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):
[1] "loaded data into memory for recursive use"


# **Enrichment analysis**

Next, we perform enrichment tests on the hdWGCNA modules. We leverage the R pacakge enrichR to perform enrichment tests on a wide range of curated gene lists. This analysis should point us towards biological processes that our hdWGCNA modules are involved in. Additionally, we perform a gene set overlap analysis to compare the genes in hdWGCNA modules with the marker genes identified using Seurat’s FindAllMarkers function.


## **EnrichR**

hdWGCNA includes the function RunEnrichr to compare the set of genes in each module with any of the gene lists hosted by Enrichr.

The results of the enrichment tests are stored in the hdWGCNA experiment, so can be easily retrieved for downstream analysis or exporting to external applicaitons like Excel. In the following example, we perform the enrichment test with three Gene Ontology datbases:

- `GO_Biological_Process_2021`
- `GO_Cellular_Component_2021`
- `GO_Molecular_Function_2021`

In [74]:
%%R -i seurat_obj -o seurat_obj -o enrich_df

# dbs <- c('GO_Biological_Process_2021')

dbs <- c('GO_Biological_Process_2021','GO_Cellular_Component_2021','GO_Molecular_Function_2021')

enrich_df <- list()

for (cell_type in names(seurat_obj)){

  print(paste0('Estimating Module Enrichments in hdWGCNA Experiment for ', toupper(cell_type)))

  # enrichr databases to test

  # perform enrichment tests
  seurat_obj[[cell_type]] <- RunEnrichr(
    seurat_obj[[cell_type]],
    dbs=dbs, # character vector of enrichr databases to test
    max_genes = 100 # number of genes per module to test
  )
  # retrieve the output table
  enrich_df[[cell_type]] <- GetEnrichrTable(seurat_obj[[cell_type]]) %>% subset(P.value < 0.05)

}

[1] "Estimating Module Enrichments in hdWGCNA Experiment for EXCITATORY"
Selecting by kME_EXC-M1
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2021... Done.
  Querying GO_Cellular_Component_2021... Done.
  Querying GO_Molecular_Function_2021... Done.
Parsing results... Done.
Selecting by kME_EXC-M2
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2021... Done.
  Querying GO_Cellular_Component_2021... Done.
  Querying GO_Molecular_Function_2021... Done.
Parsing results... Done.
Selecting by kME_EXC-M3
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2021... Done.
  Querying GO_Cellular_Component_2021... Done.
  Querying GO_Molecular_Function_2021... Done.
Parsing results... Done.
Selecting by kME_EXC-M4
Uploading data to Enrichr... Done.
  Querying GO_Biological_Process_2021... Done.
  Querying GO_Cellular_Component_2021... Done.
  Querying GO_Molecular_Function_2021... Done.
Parsing results... Done.
Selecting by kME_EXC-M5
Upl

In [75]:
# convert nested list of Seurat object into Rpy2 object 
seurat_obj =  robjects.ListVector(
                        {
                            cell_type: seurat_obj[cell_type]
                     
                            for cell_type in seurat_obj.keys()
                        }
                    )

## **Visualize enrichments**

Now that we have done the enrichment tests, we can now visualize the results.

### **EnrichrBarPlot**

hdWGCNA includes the function EnrichrBarPlot to summarize the results of every Enrichr database and every module. This function outputs a .pdf figure for each module, containing a barplot showing the top N enriched terms. The following example will plot the top 10 terms in each module and will output the results to a folder called enrichr_plots.

In [76]:
%%R -i seurat_obj -o seurat_obj

fig_dir = paste0("../results/hdWGCNA/Enrichr/", save_prefix, '/')

if (!dir.exists(fig_dir)) {
  dir.create(fig_dir, recursive=TRUE)
}

for (cell_type in names(seurat_obj)){

  print(paste0('Enrichr Plots for Modules in hdWGCNA Experiment for ', toupper(cell_type)))

  # make GO term plots:
  EnrichrBarPlot(
    seurat_obj[[cell_type]],
    outdir = paste0(fig_dir, cell_type, "_enrichr_plots"), # name of output directory
    n_terms = 10, # number of enriched terms to show (sometimes more show if there are ties!!!)
    plot_size = c(5,7), # width, height of the output .pdfs
    logscale=TRUE # do you want to show the enrichment as a log scale?
  )
}

[1] "Enrichr Plots for Modules in hdWGCNA Experiment for EXCITATORY"
[1] "EXC-M1"
[1] "EXC-M2"
[1] "EXC-M3"
[1] "EXC-M4"
[1] "EXC-M5"
[1] "Enrichr Plots for Modules in hdWGCNA Experiment for INHIBITORY"
[1] "INH-M1"
[1] "INH-M2"
[1] "INH-M3"
[1] "INH-M4"
[1] "INH-M5"
[1] "Enrichr Plots for Modules in hdWGCNA Experiment for ASTROCYTE"
[1] "AST-M1"
[1] "AST-M2"
[1] "AST-M3"
[1] "AST-M4"
[1] "Enrichr Plots for Modules in hdWGCNA Experiment for MICROGLIA"
[1] "MIC-M1"
[1] "MIC-M2"
[1] "Enrichr Plots for Modules in hdWGCNA Experiment for OLIGODENDROCYTE"
[1] "OLI-M1"
[1] "OLI-M2"
[1] "OLI-M3"


In [77]:
# convert nested list of Seurat object into Rpy2 object 
seurat_obj =  robjects.ListVector(
                        {
                            cell_type: seurat_obj[cell_type]
                     
                            for cell_type in seurat_obj.keys()
                        }
                    )

### **EnrichrDotPlot**

hdWGCNA includes an additional visualization function for enrichment results, EnrichrDotPlot, which shows the top results for one Enrichr database in each module. In the following example, we plot the top term in the GO_Biological_Process_2021 database.

In [78]:
# %%R -i seurat_obj -o seurat_obj

# fig_dir = '../results/'
# # enrichr dotplot
# p <- EnrichrDotPlot(
#   seurat_obj,
#   mods='all',
#   database = "GO_Biological_Process_2021",
#   n_terms=3,
#   break_ties=TRUE
# )
# pdf(paste0(fig_dir, 'EX_GO_dotplot.pdf'), width=10, height=10, useDingbats=FALSE)
# p
# dev.off()

In [79]:
# # convert nested list of Seurat object into Rpy2 object 
# seurat_obj =  robjects.ListVector(
#                         {
#                             cell_type: seurat_obj[cell_type]
                     
#                             for cell_type in seurat_obj.keys()
#                         }
#                     )

# **Differential module eigengene (DME) analysis**

Here, we perform differential module eigengene (DME) analysis, revealing modules that are up- or down-regulated in given groups of cells.

## **DME analysis comparing two groups**

Here we perform DME testing between two different pathology groups (AD vs control). We use the hdWGCNA function FindDMEs, which is a special case of the Seurat function FindMarkers. We use the Mann-Whitney U test, also known as the Wilcoxon test, to compare two groups, but other tests can be used with the test.use parameter.

FindDMEs requires a list of barcodes for group1 and for group2. We are only going to compare cells from the EX cluster since that is the group that we performed network analysis on.



In [80]:
%%R -i seurat_obj -o seurat_obj -o DMEs

group_col <- 'cell_type'
groups <- celltypes
DMEs <- list()

dat_dir = paste0("../results/hdWGCNA/DMEs/", save_prefix, '/')

if (!dir.exists(dat_dir)) {
  dir.create(dat_dir, recursive=TRUE)
}


for (cell_type in names(seurat_obj)){

  print(paste0('Differential Module Eigengene Analysis in hdWGCNA Experiment for ', toupper(cell_type)))


  # get cell barcodes for this pseudotime bin in control & AD
  g1 <- seurat_obj[[cell_type]]@meta.data[seurat_obj[[cell_type]]$pathology.group %in% c('early', 'late'),] %>% rownames
  g2 <- seurat_obj[[cell_type]]@meta.data[seurat_obj[[cell_type]]$pathology.group == 'no',] %>% rownames

  DMEs[[cell_type]] <- FindDMEs(
          seurat_obj[[cell_type]],
          barcodes1 = g1,
          barcodes2 = g2,
          test.use='wilcox',
          wgcna_name=toupper(substr(cell_type, 1, 3)), # the name of the hdWGCNA experiment,,
          harmonized=TRUE
      )
  
  DMEs[[cell_type]]$group <- cell_type

  # fix infs:
  DMEs[[cell_type]]$avg_log2FC <- ifelse(abs(DMEs[[cell_type]]$avg_log2FC) == Inf, 0, DMEs[[cell_type]]$avg_log2FC)

  write.csv(DMEs[[cell_type]], row.names=FALSE, quote=FALSE, file=paste0(dat_dir, cell_type, '_AD_DMEs.csv'))
}


[1] "Differential Module Eigengene Analysis in hdWGCNA Experiment for EXCITATORY"
[1] 10449     5
[1] "EXC-M3" "EXC-M1" "EXC-M5" "EXC-M2" "EXC-M4"
[1] "Differential Module Eigengene Analysis in hdWGCNA Experiment for INHIBITORY"
[1] 5207    5
[1] "INH-M4" "INH-M5" "INH-M2" "INH-M3" "INH-M1"
[1] "Differential Module Eigengene Analysis in hdWGCNA Experiment for ASTROCYTE"
[1] 5851    4
[1] "AST-M4" "AST-M3" "AST-M2" "AST-M1"
[1] "Differential Module Eigengene Analysis in hdWGCNA Experiment for MICROGLIA"
[1] 5892    2
[1] "MIC-M1" "MIC-M2"
[1] "Differential Module Eigengene Analysis in hdWGCNA Experiment for OLIGODENDROCYTE"
[1] 9606    3
[1] "OLI-M2" "OLI-M3" "OLI-M1"


In [81]:
# convert nested list of Seurat object into Rpy2 object 
seurat_obj =  robjects.ListVector(
                        {
                            cell_type: seurat_obj[cell_type]
                     
                            for cell_type in seurat_obj.keys()
                        }
                    )


# convert nested list of Seurat object into Rpy2 object 
DMEs =  robjects.ListVector(
                        {
                            cell_type: DMEs[cell_type]
                     
                            for cell_type in DMEs.keys()
                        }
                    )

We can now visualize the results using the hdWGNCA functions `PlotDMEsLollipop` or `PlotDMEsVolcano`. First we make a lollipop plot to visualize the DME results.


In [82]:
%%R -i seurat_obj -o seurat_obj -i DMEs


fig_dir = paste0("../results/hdWGCNA/DMEs/", save_prefix, '/')

if (!dir.exists(fig_dir)) {
  dir.create(fig_dir, recursive=TRUE)
}


for (cell_type in names(seurat_obj)){

  print(paste0('Lollipop for DMEs in hdWGCNA Experiment for ', toupper(cell_type)))

  pdf(paste0(fig_dir, cell_type, '_dmes_lollipop.pdf'), width=10, height=10)
  p <- PlotDMEsLollipop(
    seurat_obj[[cell_type]], 
    DMEs[[cell_type]], 
    wgcna_name = toupper(substr(cell_type, 1, 3)), # the name of the hdWGCNA experiment,, 
    pvalue = "p_val_adj"
  )
  print(p)
  dev.off()

}

[1] "Lollipop for DMEs in hdWGCNA Experiment for EXCITATORY"
[1] "Please be aware comparison group/groups are not provided, which may casue an ERROR. PlotDMEsLollipop function will automatically assume all values are within the same group."
[1] "Lollipop for DMEs in hdWGCNA Experiment for INHIBITORY"
[1] "Please be aware comparison group/groups are not provided, which may casue an ERROR. PlotDMEsLollipop function will automatically assume all values are within the same group."
[1] "Lollipop for DMEs in hdWGCNA Experiment for ASTROCYTE"
[1] "Please be aware comparison group/groups are not provided, which may casue an ERROR. PlotDMEsLollipop function will automatically assume all values are within the same group."
[1] "Lollipop for DMEs in hdWGCNA Experiment for MICROGLIA"
[1] "Please be aware comparison group/groups are not provided, which may casue an ERROR. PlotDMEsLollipop function will automatically assume all values are within the same group."
[1] "Lollipop for DMEs in hdWGCNA Expe

In [83]:
seurat_obj =  robjects.ListVector(
                        {
                            cell_type: seurat_obj[cell_type]
                     
                            for cell_type in seurat_obj.keys()
                        }
                    )

# **Save Seurat Object**

In [None]:
%%R -i seurat_obj -o seurat_obj

dat_dir = paste0("../results/hdWGCNA/SeuratObject/", save_prefix, '/')

if (!dir.exists(dat_dir)) {
  dir.create(dat_dir, recursive=TRUE)
}

for (cell_type in names(seurat_obj)){
    
    print(paste0('Saving hdWGCNA object in hdWGCNA Experiment for ', toupper(cell_type)))

    saveRDS(seurat_obj[[cell_type]], file=paste0(dat_dir, cell_type, '_hdWGCNA_object.rds'))

}