In [None]:
import rpy2
import logging
import warnings
import anndata2ri
import pandas as pd
import scanpy as sc
from tabnanny import verbose
import matplotlib.pyplot as plt
from gsva_prep import prep_gsva
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
from matplotlib.pyplot import rcParams

In [None]:
def get_sys_dpi(width, height, diag):
    '''
    obtain dpi of system
    
    w: width in pixels (if unsure, go vist `whatismyscreenresolution.net`)
    h: height in pixels
    d: diagonal in inches
    '''
    w_inches = (diag**2/ (1 + height**2/width**2))**0.5
    return round(width/w_inches)

In [None]:
# # Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# Automatically convert rpy2 outputs to pandas dataframes
pandas2ri.activate()
anndata2ri.activate()
%load_ext rpy2.ipython

rcParams['figure.dpi'] = get_sys_dpi(1512, 982, 14.125)
#rcParams['figure.figsize']=(4,4) #rescale figures

sc.settings.verbosity = 3
#sc.set_figure_params(dpi=200, dpi_save=300)
sc.logging.print_versions()



# **hdWGCNA Enrichment Analysis**

In [None]:
%%R
suppressPackageStartupMessages({
    library(WGCNA)
    library(Matrix)
    library(viridis)
    library(harmony)
    library(ggpubr)
    library(tictoc)
    library(RColorBrewer)
    library(Hmisc)
    library(corrplot)
    library(grid)
    library(gridExtra)
    library(igraph)
    library(ggrepel)
    library(conflicted)
    library(readxl)

    # single-cell analysis package
    library(Seurat)

    # plotting and data science packages
    library(tidyverse)
    library(cowplot)
    library(patchwork)

    # co-expression network analysis packages:
    library(WGCNA)
    library(hdWGCNA)

    # gene enrichment packages
    library(enrichR)
    library(GeneOverlap)


    library(GSEABase)
    library(GSVA) 
# needs to be run every time you start R and want to use %>%
})

# using the cowplot theme for ggplot
theme_set(theme_cowplot())

# set random seed for reproducibility
set.seed(12345)

# optionally enable multithreading
# enableWGCNAThreads(nThreads = 4)

In [None]:
save_prefix = 'leng_sfg'
map_meta = True
deg_method =  'DESeq2-Wald'
test_names = ['late_vs_early', 'early_vs_no', 'late_vs_no', 'ad_vs_no']
filter_genes = "TRUE"
subject_ids_for_study = {'leng_sfg': 'PatientID',
                        'leng_etc': 'PatientID',
                        'seaad_mtg': 'individualID'}

subject_id = subject_ids_for_study[save_prefix]     # for leng this is `PatientID` for mathys is 'Subject', and allen is 'individualID'
gene_celltype_threshold = 0.10          # determines number of cells the gene must be expressed in 
covariates = ['None']                   # list of covariates to be accounted for in regression.
gene_selection = 'custom'               # specifies the gene selection method when setting up seurat object for WGCNA. The th
celltypes = ["Excitatory", "Inhibitory", "Astrocyte", "Microglia", "Oligodendrocyte", "OPC"]
metadata = f'../data/raw/{save_prefix}/{save_prefix}_metadata.csv' 
meta = pd.read_csv(metadata, encoding_errors='ignore')

gene_selection = 'custom'   # specifies the gene selection method when setting up seurat object for WGCNA. The th
                            # Posible values are "custom", "fraction", "variable"
                            # If custom, a list of genes must be passed.


gene_set_select = 'diff_exp'      # If gene_selection = 'custom'. This specifies how to obtain the list of
                                  # genes to pass into `SetupForWGCNA`. # The posible values are "diff_exp", "overlap", "all"

## **Load Seurat object for Enrichment and Differential Module Eigen Gene Analysis**

In [None]:
%%R -i subject_id -i gene_celltype_threshold -i celltypes -i test_names -i save_prefix -i gene_selection -i gene_set_select

# Function to handle failure in data loading
handle_loading_failure <- function(cell_type, seurat_obj) {
    cat(paste0('Could not load data for ', toupper(cell_type), '\n'))
    cat(paste0(toupper(cell_type), ' dropped from experiment\n'))
  
    # Remove the failed cell_type from the list
    seurat_obj <- seurat_obj[names(seurat_obj) != cell_type]
  
    # Update the modified seurat_obj in the global environment
    assign("seurat_obj", seurat_obj, envir = .GlobalEnv)
}

seurat_obj <- list()

for (cell_type in celltypes) {
  
    cat(paste0('Loading data for hdWGCNA Experiment in ', toupper(cell_type), '\n'))
  
    # Use tryCatch to load data and handle errors or messages
    tryCatch({
        # Attempt to read the RDS file into the list
        seurat_obj[[cell_type]] <- readRDS(paste0("../results/hdWGCNA/SeuratObject/", save_prefix, '/', cell_type, '_hdWGCNA_object.rds'))
        print(seurat_obj[[cell_type]])
        print('............')
  
    }, error = function(e){
        # Error handling: Remove the cell_type from the list and print an error message
        handle_loading_failure(cell_type, seurat_obj)

    }, message = function(m){
        handle_loading_failure(cell_type, seurat_obj)
    })
}

cat("Loaded available data\n")

# **Enrichment analysis**

To gain insights into the biological processes associated with hdWGCNA modules, we perform enrichment tests using the R package enrichR, which accesses a broad spectrum of curated gene lists. Additionally, a gene set overlap analysis is conducted to compare the genes in hdWGCNA modules with marker genes identified using Seurat’s FindAllMarkers function. This dual approach helps in understanding the functional relevance of the identified modules in the broader context of gene expression and biological pathways.


## **EnrichR**

hdWGCNA includes the function RunEnrichr to compare the set of genes in each module with any of the gene lists hosted by Enrichr.

The results of the enrichment tests are stored in the hdWGCNA experiment, so can be easily retrieved for downstream analysis or exporting to external applicaitons like Excel. In the following example, we perform the enrichment test with three Gene Ontology datbases:

- `GO_Biological_Process_2021`
- `GO_Cellular_Component_2021`
- `GO_Molecular_Function_2021`

We specifically focus on differentially expressed pathways, consistent with the construction of co-expression networks based on gene programs from these pathways.


In [None]:
%%R

# dbs <- c('GO_Biological_Process_2021')

dbs <- c('GO_Biological_Process_2021', 'GO_Cellular_Component_2021','GO_Molecular_Function_2021')
deps <- read.csv(paste0('../results/ad_vs_no/', save_prefix, '/Data/differentially_expressed_pathways.csv'))

enrich_df <- list()

for (cell_type in names(seurat_obj)){

  print(paste0('Estimating Module Enrichments in hdWGCNA Experiment for ', toupper(cell_type)))

  # enrichr databases to test

  # perform enrichment tests
  seurat_obj[[cell_type]] <- RunEnrichr(
    seurat_obj[[cell_type]],
    dbs=dbs, # character vector of enrichr databases to test
    max_genes = 500 # number of genes per module to test
  )
  # retrieve the output table
  enrich_df[[cell_type]] <- GetEnrichrTable(seurat_obj[[cell_type]]) %>% subset(P.value < 0.05)
  
  pathways <- subset(deps, P.Value < 0.05 & celltype == cell_type)$pathway
  bio_proc <- subset(enrich_df[[cell_type]], Term %in% pathways)
  combined_output <- rbind(subset(enrich_df[[cell_type]], db %in% c('GO_Cellular_Component_2021',
                          'GO_Molecular_Function_2021')), 
                          bio_proc)
                          
  seurat_obj[[cell_type]] <- SetEnrichrTable(seurat_obj[[cell_type]], combined_output)
  enrich_df[[cell_type]] <- GetEnrichrTable(seurat_obj[[cell_type]]) 

  
}

## **Visualize enrichments**

### **EnrichrBarPlot**

The enrichment results can be visualized using the `EnrichrBarPlot` function. This function provides a comprehensive summary of the enrichment analysis for each Enrichr database and module, generating a .pdf figure for each module. These figures contain bar plots that display the top N enriched terms. In the provided example, the function is set to plot the top 10 terms for each module, with the output directed to a folder named enrichr_plots.

In [None]:
%%R 

fig_dir = paste0("../results/hdWGCNA/Enrichr/", save_prefix, '/')

if (!dir.exists(fig_dir)) {
  dir.create(fig_dir, recursive=TRUE)
}

for (cell_type in names(seurat_obj)){

  print(paste0('Enrichr Plots for Modules in hdWGCNA Experiment for ', toupper(cell_type)))

  # make GO term plots:
  EnrichrBarPlot(
    seurat_obj[[cell_type]],
    outdir = paste0(fig_dir, cell_type, "_enrichr_plots"), # name of output directory
    n_terms = 10, # number of enriched terms to show (sometimes more show if there are ties!!!)
    plot_size = c(5,7), # width, height of the output .pdfs
    logscale=TRUE # do you want to show the enrichment as a log scale?
  )
}

### **EnrichrDotPlot**

hdWGCNA includes an additional visualization function for enrichment results, EnrichrDotPlot, which shows the top results for one Enrichr database in each module. In the following example, we plot the top term in the GO_Biological_Process_2021 database.

In [None]:
%%R

conflicts_prefer(dplyr::select)

fig_dir = paste0("../results/hdWGCNA/Enrichr/", save_prefix, '/DotPlot/')

if (!dir.exists(fig_dir)) {
  dir.create(fig_dir, recursive=TRUE)
}

for (cell_type in names(seurat_obj)){

    print(paste0('Enrichr Dot-Plots for Modules in hdWGCNA Experiment for ', toupper(cell_type)))

    # enrichr dotplot
    p <- EnrichrDotPlot(
    seurat_obj[[cell_type]],
        mods='all',
        database = c('GO_Biological_Process_2021'), 
        n_terms=3,
        break_ties=TRUE,
        )
        
    pdf(paste0(fig_dir, cell_type, "_enrichr_dotplots.pdf"), width=10, height=10, useDingbats=FALSE)
    print(p)
    dev.off()

}

## **Plot Only Select GO Terms**

Barplot

In [None]:
%%R


pathways_to_annotate = read_excel('../data/pathway_databases/pathways_to_annotate.xlsx', sheet = 1)

for (cell_type in names(seurat_obj)){

  fig_dir = paste0("../results/hdWGCNA/Enrichr/", save_prefix, '/', cell_type, '_enrichr_plots/')

  if (!dir.exists(fig_dir)) {
    dir.create(fig_dir, recursive=TRUE)
  }

  modules <- GetModules(seurat_obj[[cell_type]])
  mods <- levels(modules$module)
  mods <- mods[mods!='grey']
  module_colors <- modules %>% dplyr::select(c(module, color)) %>% distinct()
  rownames(module_colors) <- module_colors$module
  mod_colors <- module_colors[mods, 'color']
  names(mod_colors) <- mods

  # helper function to wrap text
  wrapText <- function(x, len) {
      sapply(x, function(y) paste(strwrap(y, len), collapse = "\n"), USE.NAMES = FALSE)
  }

  
  selected_terms <- subset(enrich_df[[cell_type]], Term %in% pathways_to_annotate$pathway)

  # remove GO Term ID
  selected_terms$Term <- str_replace(selected_terms$Term, " \\s*\\([^\\)]+\\)", "")

  selected_terms <- selected_terms %>%
    group_by(module) %>%
    arrange(Combined.Score) %>%
    slice_max(order_by=Combined.Score, n=10)

  selected_terms$wrap <- wrapText(selected_terms$Term, 35)

  selected_terms <- selected_terms %>% arrange(Combined.Score) 
  # selected_terms$wrap <- factor(selected_terms$wrap)
  # selected_terms$wrap <- factor(selected_terms$wrap, 
  #                         levels = unique(selected_terms$wrap[order(selected_terms$Combined.Score)]))
  selected_terms$wrap <- factor(selected_terms$wrap, 
                          levels = unique(selected_terms$wrap[order(selected_terms$module, 
                                                                    selected_terms$Combined.Score)]))


  p <- selected_terms  %>%
    ggplot(aes(x=log(Combined.Score), y=wrap, fill=module))+
    geom_bar(stat='identity', position='identity', color='white') +
    geom_text(aes(label=wrap), x=.1, color='black', size=3.5, hjust='left') +
    scale_fill_manual(values=mod_colors) +
    ylab('Term') + xlab('log(Enrichment)') +
    scale_x_continuous(expand = c(0, 0), limits = c(0, NA)) +
    theme(
      panel.grid.major=element_blank(),
      panel.grid.minor=element_blank(),
      legend.title = element_blank(),
      axis.ticks.y=element_blank(),
      axis.text.y=element_blank(),
      axis.line.y=element_blank(),
      plot.title = element_text(hjust = 0.5)
    )

  pdf(paste0(fig_dir, cell_type, '_selected_GO_terms.pdf'), width= 6, height=30, useDingbats=FALSE)
  print(p + facet_wrap(~module, ncol=2, scales='free') + NoLegend())
  dev.off()

}

Dotplot

In [None]:
%%R

conflicts_prefer(dplyr::rename)

pathways_to_annotate = read_excel('../data/pathway_databases/pathways_to_annotate.xlsx', sheet = 1)

fig_dir = paste0("../results/hdWGCNA/Enrichr/", save_prefix, '/DotPlot/')

if (!dir.exists(fig_dir)) {
  dir.create(fig_dir, recursive=TRUE)
}


for (cell_type in names(seurat_obj)){
    
    
  modules <- GetModules(seurat_obj[[cell_type]])
  color_df <- modules %>% subset(module!='grey') %>%
  select(c(module, color)) %>% distinct %>%
  mutate(module=droplevels(module)) %>%
  rename(c(group=module, colour=color))
  mods <- levels(modules$module); mods <- mods[mods != 'grey']

  color_df$group <- factor(as.character(color_df$group), levels=mods)

  # helper function to wrap text
  wrapText <- function(x, len) {
    sapply(x, function(y) paste(strwrap(y, len), collapse = "\n"), USE.NAMES = FALSE)
  }


  # subset selected terms
  selected_terms <- subset(enrich_df[[cell_type]], Term %in% pathways_to_annotate$pathway)

  selected_terms$group <- factor(
  as.character(selected_terms$module),
  levels = mods
  )

  # set max pval

  quantile(-log(selected_terms$P.value), 0.95)
  max_p <- 10

  selected_terms$logp <- -log(selected_terms$P.value)
  selected_terms$logp <- ifelse(selected_terms$logp > max_p, max_p, selected_terms$logp)

  # remove GO Term ID
  selected_terms$Term <- str_replace(selected_terms$Term, " \\s*\\([^\\)]+\\)", "")

  # selected_terms <- selected_terms %>%
  #                   arrange(group) %>%

  selected_terms <- selected_terms %>%
                    group_by(group) %>%
                    arrange(group) %>%
                    slice_max(n = 3, order_by = Combined.Score) %>%
                    ungroup()
  
  # selected_terms <- selected_terms %>%
  #                   arrange(group) %>%


  selected_terms$wrap <- wrapText(selected_terms$Term, 35)

  selected_terms$Term <- factor(
  as.character(selected_terms$Term),
  levels = rev(unique(as.character(selected_terms$Term)))
  )

  # GO Term dot plot

  p <- selected_terms %>%
  ggplot(aes(x = group, y = Term, color =logp, size=log(Combined.Score))) +
  geom_point() +
  scale_color_stepsn(colors=rev(magma(256))) +
  RotatedAxis() + xlab('') + ylab('') +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.border = element_rect(size=1, color='black', fill=NA),
    axis.line.x = element_blank(),
    axis.line.y = element_blank(),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    plot.margin = margin(0,0,0,0),
    panel.grid = element_line(size=0.25, color='lightgrey')
  )


  # make the colorbar as its own heatmap
  color_df$var <- 1
  cp <- color_df$colour; names(cp) <- color_df$group
  colorbar <- color_df %>%
  ggplot(aes(x=group, y=var, fill=group)) +
  geom_tile() +
  scale_fill_manual(values=cp) +
  coord_equal() +
  NoLegend() + RotatedAxis() +
  theme(
    plot.title=element_blank(),
    axis.line=element_blank(),
    axis.ticks.y =element_blank(),
    axis.text.y = element_blank(),
    axis.title = element_blank(),
    plot.margin=margin(0,0,0,0),
  )

  pdf(paste0(fig_dir, cell_type, '_selected_GO_terms.pdf'), width=15, height=8)
  print(p / colorbar) #+ plot_layout(heights=c(20,1))
  dev.off()

}

# **Differential module eigengene (DME) analysis**

We next conduct differential module eigengene (DME) analysis to identify modules that are up- or down-regulated in AD versus control.

We emply the FindDMEs function, a specialized adaptation of Seurat's FindMarkers function, typically employing the Mann-Whitney U test to compare the two groups, although other tests can be specified using the test.use parameter. FindDMEs requires a list of barcodes for each group. In this context, the analysis focuses on cells from the EX cluster, aligning with the group targeted in the network analysis.

In [None]:
%%R

group_col <- 'cell_type'
groups <- celltypes
DMEs <- list()

dat_dir = paste0("../results/hdWGCNA/DMEs/", save_prefix, '/')

if (!dir.exists(dat_dir)) {
  dir.create(dat_dir, recursive=TRUE)
}


for (cell_type in names(seurat_obj)){

  print(paste0('Differential Module Eigengene Analysis in hdWGCNA Experiment for ', toupper(cell_type)))


  # get cell barcodes for this pseudotime bin in control & AD
  g1 <- seurat_obj[[cell_type]]@meta.data[seurat_obj[[cell_type]]$pathology.group %in% c('early', 'late'),] %>% rownames
  g2 <- seurat_obj[[cell_type]]@meta.data[seurat_obj[[cell_type]]$pathology.group == 'no',] %>% rownames

  print(length(g1))

  DMEs[[cell_type]] <- FindDMEs(
          seurat_obj[[cell_type]],
          barcodes1 = g1,
          barcodes2 = g2,
          test.use='wilcox',
          wgcna_name=toupper(substr(cell_type, 1, 3)), # the name of the hdWGCNA experiment,,
          harmonized=TRUE
      )
  
  DMEs[[cell_type]]$group <- cell_type

  # fix infs:
  DMEs[[cell_type]]$avg_log2FC <- ifelse(abs(DMEs[[cell_type]]$avg_log2FC) == Inf, 0, DMEs[[cell_type]]$avg_log2FC)

  write.csv(DMEs[[cell_type]], row.names=FALSE, quote=FALSE, file=paste0(dat_dir, cell_type, '_AD_DMEs.csv'))
}


We can now visualize the results using the hdWGNCA functions `PlotDMEsLollipop` or `PlotDMEsVolcano`. First we make a lollipop plot to visualize the DME results.


In [None]:
%%R


fig_dir = paste0("../results/hdWGCNA/DMEs/", save_prefix, '/')

if (!dir.exists(fig_dir)) {
  dir.create(fig_dir, recursive=TRUE)
}


for (cell_type in names(seurat_obj)){

  print(paste0('Lollipop for DMEs in hdWGCNA Experiment for ', toupper(cell_type)))

  pdf(paste0(fig_dir, cell_type, '_dmes_lollipop.pdf'), width=10, height=10)
  p <- PlotDMEsLollipop(
    seurat_obj[[cell_type]], 
    DMEs[[cell_type]], 
    wgcna_name = toupper(substr(cell_type, 1, 3)), # the name of the hdWGCNA experiment,, 
    pvalue = "p_val_adj"
  )
  print(p)
  dev.off()

}

# **Save Seurat Object**

In [None]:
%%R -i seurat_obj -o seurat_obj

dat_dir = paste0("../results/hdWGCNA/SeuratObject/", save_prefix, '/')

if (!dir.exists(dat_dir)) {
  dir.create(dat_dir, recursive=TRUE)
}

for (cell_type in names(seurat_obj)){
    
    print(paste0('Saving hdWGCNA object in hdWGCNA Experiment for ', toupper(cell_type)))

    saveRDS(seurat_obj[[cell_type]], file=paste0(dat_dir, cell_type, '_hdWGCNA_object.rds'))
}