In [None]:
import rpy2
import logging
import warnings
import anndata2ri
import pandas as pd
import scanpy as sc
from gsva_prep import prep_gsva
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
from matplotlib.pyplot import rcParams
from functions import pathway_analyses

In [None]:
def get_sys_dpi(width, height, diag):
    '''
    obtain dpi of system
    
    w: width in pixels (if unsure, go vist `whatismyscreenresolution.net`)
    h: height in pixels
    d: diagonal in inches
    '''
    w_inches = (diag**2/ (1 + height**2/width**2))**0.5
    return round(width/w_inches)

In [None]:
# # Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# Automatically convert rpy2 outputs to pandas dataframes
pandas2ri.activate()
anndata2ri.activate()
%load_ext rpy2.ipython

rcParams['figure.dpi'] = get_sys_dpi(1512, 982, 14.125)
#rcParams['figure.figsize']=(4,4) #rescale figures

sc.settings.verbosity = 3
#sc.set_figure_params(dpi=200, dpi_save=300)
sc.logging.print_versions()



# **hdWGCNA Network Visualizaion**

In [None]:
%%R
suppressPackageStartupMessages({
    library(WGCNA)
    library(Matrix)
    library(viridis)
    library(harmony)
    library(ggpubr)
    library(tictoc)
    library(RColorBrewer)
    library(Hmisc)
    library(corrplot)
    library(grid)
    library(gridExtra)
    library(igraph)
    library(ggrepel)
    library(conflicted)
    library(readxl)


    # single-cell analysis package
    library(Seurat)

    # plotting and data science packages
    library(tidyverse)
    library(cowplot)
    library(patchwork)

    # co-expression network analysis packages:
    library(WGCNA)
    library(hdWGCNA)

    # gene enrichment packages
    library(enrichR)
    library(GeneOverlap)


    library(GSEABase)
    library(GSVA) 
# needs to be run every time you start R and want to use %>%
})

# using the cowplot theme for ggplot
theme_set(theme_cowplot())

# set random seed for reproducibility
set.seed(12345)

# optionally enable multithreading
# enableWGCNAThreads(nThreads = 4)

## **Prepare data**

Now, we load the preprocessed and annotated data for downstream analysis.

In [None]:
save_prefix = 'leng_sfg'
map_meta = True
deg_method =  'DESeq2-Wald'
test_names = ['late_vs_early', 'early_vs_no', 'late_vs_no', 'ad_vs_no']
filter_genes = "TRUE"
subject_ids_for_study = {'leng_sfg': 'PatientID',
                        'leng_etc': 'PatientID',
                        'seaad_mtg': 'individualID'}

subject_id = subject_ids_for_study[save_prefix]     # for leng this is `PatientID` for mathys is 'Subject', and allen is 'individualID'
gene_celltype_threshold = 0.10          # determines number of cells the gene must be expressed in 
covariates = ['None']                   # list of covariates to be accounted for in regression.
gene_selection = 'custom'               # specifies the gene selection method when setting up seurat object for WGCNA. The th
celltypes = ["Excitatory", "Inhibitory", "Astrocyte", "Microglia", "Oligodendrocyte", "OPC"]
metadata = f'../data/raw/{save_prefix}/{save_prefix}_metadata.csv' 
meta = pd.read_csv(metadata, encoding_errors='ignore')

gene_selection = 'custom'   # specifies the gene selection method when setting up seurat object for WGCNA. The th
                            # Posible values are "custom", "fraction", "variable"
                            # If custom, a list of genes must be passed.

gene_set_select = 'diff_exp'      # If gene_selection = 'custom'. This specifies how to obtain the list of
                                  # genes to pass into `SetupForWGCNA`. # The posible values are "diff_exp", "overlap", "all"

## **Load Seurat object for Network Visualizations**

In [None]:
%%R -i subject_id -i gene_celltype_threshold -i celltypes -i test_names -i save_prefix -i gene_selection -i gene_set_select

# Function to handle failure in data loading
handle_loading_failure <- function(cell_type, seurat_obj) {
    cat(paste0('Could not load data for ', toupper(cell_type), '\n'))
    cat(paste0(toupper(cell_type), ' dropped from experiment\n'))
  
    # Remove the failed cell_type from the list
    seurat_obj <- seurat_obj[names(seurat_obj) != cell_type]
  
    # Update the modified seurat_obj in the global environment
    assign("seurat_obj", seurat_obj, envir = .GlobalEnv)
}

seurat_obj <- list()

for (cell_type in celltypes) {
  
    cat(paste0('Loading data for hdWGCNA Experiment in ', toupper(cell_type), '\n'))
  
    # Use tryCatch to load data and handle errors or messages
    tryCatch({
        # Attempt to read the RDS file into the list
        seurat_obj[[cell_type]] <- readRDS(paste0("../results/hdWGCNA/SeuratObject/", save_prefix, '/', cell_type, '_hdWGCNA_object.rds'))
        print(seurat_obj[[cell_type]])
        print('............')
  
    }, error = function(e){
        # Error handling: Remove the cell_type from the list and print an error message
        handle_loading_failure(cell_type, seurat_obj)

    }, message = function(m){
        handle_loading_failure(cell_type, seurat_obj)
    })
}

cat("Loaded available data\n")

# **Network Visualization**


Here, we visualize the co-expression networks made with hdWGCNA using the following functions:

- `ModuleNetworkPlot`, visualizes a separate network plot for each module, showing the top N genes by kME.
- `HubGeneNetworkPlot`, visualizes the network comprisng all modules with a given number of hub genes per module.
- `ModuleUMAPPlot`, visualizes all of the genes in the co-expression simultaneously using the UMAP dimensionality reduction algorithm.

## **Individual module network plots**


Here we demonstrate using the `ModuleNetworkPlot` function to visualize the network underlying the top 25 hub genes for each module. By default, this function creates a new folder called “ModuleNetworks”, and generates a .pdf figure for each module.



In [None]:
%%R

fig_dir = paste0('../results/hdWGCNA/NetworkPlot/', save_prefix, '/')

if (!dir.exists(fig_dir)) {
  dir.create(fig_dir, recursive=TRUE)
}

for (cell_type in names(seurat_obj)){

  print(paste0('Module network plots in hdWGCNA Experiment for ', toupper(cell_type)))

  # individual module networks
  ModuleNetworkPlot(
    seurat_obj[[cell_type]],
    mods = "all",
    outdir = paste0(fig_dir, cell_type, '_hubNetworks/')
  )
}

In this network, each node represents a gene, and each edge represents the co-expression relationship between two genes in the network. The module network plots are colored based on the color column in the hdWGCNA module assignment table GetModules(seurat_obj). The top 10 hub genes by kME are placed in the center of the plot, while the remaining 15 genes are placed in the outer circle.

Optionally, certain visualization parameters can be changed in this plot:

- `edge.alpha`: determines the opacity of the network edges
- `vertex.size`: determines the size of the nodes
- `vertex.label.cex`: determines the font size of the gene label


## **Combined hub gene network plots**

The `HubGeneNetworkPlot` function in hdWGCNA is used to create a network plot that combines all modules. It selects the top n hub genes, along with other randomly chosen genes, to form a joint network using the force-directed graph drawing algorithm. For enhanced visual clarity, the network's number of edges can be reduced using the edge_prop parameter. An example of this would be visualizing the top 3 hub genes and 6 additional genes per module.


In [None]:
%%R

  fig_dir = paste0("../results/hdWGCNA/HubGeneNetworkPlot/", save_prefix, '/')
  if (!dir.exists(fig_dir)) {
    dir.create(fig_dir, recursive=TRUE)
  }


for (cell_type in names(seurat_obj)){

  print(paste0('Hubs Gene Network Plot in hdWGCNA Experiment for ', toupper(cell_type)))

  # hubgene network

  # Save plot to PDF
  pdf(file = paste0(fig_dir,  cell_type, "_allHubGeneNetwork.pdf"), width = 4, height = 4, useDingbats = FALSE)
  HubGeneNetworkPlot(
    seurat_obj[[cell_type]],
    n_hubs = 6,
    n_other=8,
    edge_prop = 0.75,
    mods = 'all',
    sample_edges = TRUE,
    return_graph = FALSE,
    edge.alpha = 0.25,
    vertex.label.cex = 0.25,
    hub.vertex.size = 6,
    other.vertex.size = 2,
    wgcna_name = NULL,

  )
  dev.off()

}

As in the previous network plot, each node represents a gene and each edge represents a co-expression relationship. In this network, `we color intramodular edges with the module’s color, and intermodular edges gray.` The opacity of edges in this network is scaled by the strength of the co-expression relationship.

## **Applying UMAP to co-expression networks**

the `RunModuleUMAP` function is utilized to apply UMAP for visualizing the entire co-expression network. This function embeds the hdWGCNA topological overlap matrix (TOM) in a low-dimensional manifold, targeting specifically the top n hub genes by kME for each module. The placement of each gene in the UMAP space reflects its connectivity with the network's hub genes. `RunModuleUMAP` leverages the UMAP implementation from the `uwot R` package, allowing for the inclusion of additional UMAP parameters like `min_dist` or spread from the `uwot::umap` function.

In [None]:
%%R

for (cell_type in names(seurat_obj)){

  print(paste0('Obtaining Module UMAP in hdWGCNA Experiment for ', toupper(cell_type)))

  seurat_obj[[cell_type]] <- RunModuleUMAP(
    seurat_obj[[cell_type]],
    n_hubs = 5, # number of hub genes to include for the UMAP embedding
    n_neighbors=15, # neighbors parameter for UMAP
    min_dist=0.3, # min distance between points in UMAP space
    spread = 2,
  )
  
}

Next we will make a simple visualization of the UMAP for Excitatory cells using ggplot2:

In [None]:
%%R 

# get the hub gene UMAP table from the seurat object

umap_df <- list()

for (cell_type in names(seurat_obj)){

  print(paste0('Store Module UMAP in hdWGCNA Experiment for ', toupper(cell_type)))

  umap_df[[cell_type]] <- GetModuleUMAP(seurat_obj[[cell_type]])

}

# plot sample for Excitatory cells with ggplot
ggplot(umap_df[[celltypes[1]]], aes(x=UMAP1, y=UMAP2)) +
  geom_point(
   color=umap_df[[celltypes[1]]]$color, # color each point by WGCNA module
   size=umap_df[[celltypes[1]]]$kME*2 # size of each point based on intramodular connectivity
  ) +
  umap_theme()

In this plot, each point represents a single gene. The size of each dot is scaled by the gene’s kME for it’s assigned module. We can use the function ModuleUMAPPlot to plot the genes and their co-expression relationships.

In [None]:
%%R

fig_dir = paste0("../results/hdWGCNA/HubGeneUMAP/", save_prefix, '/')

if (!dir.exists(fig_dir)) {
  dir.create(fig_dir, recursive=TRUE)
}

for (cell_type in names(seurat_obj)){

  print(paste0('UMAP Plot of Connectivities in hdWGCNA Experiment for ', toupper(cell_type)))

  # hubgene network

  pdf(paste0(fig_dir, cell_type, '_hubgene_umap_igraph.pdf'), width=10, height=10)

  ModuleUMAPPlot(
    seurat_obj[[cell_type]],
    edge.alpha = 0.5,
    sample_edges=TRUE,
    keep_grey_edges=FALSE,
    edge_prop=0.1,  # proportion of edges to sample (10% here) # taking the top 10% strongest edges in each module 
    #label_genes = label_genes,
    label_hubs=10 # how many hub genes to plot per module?
  )
  dev.off()
}

### **Visualizing Hub-DEGs**

The hub genes, which are also differentially expressed genes (DEGs) in the pathways can be visualized. You can find the results of the DEG's saved at `../results/{test_name}/{save_prefix}_{deg_method}_degs.xlsx` from the previous analysis.

Save to pathway names to R

In [None]:
%%R -i test_names -i deg_method

conflicts_prefer(base::intersect)

fig_dir = paste0("../results/hdWGCNA/HubGeneUMAP/", save_prefix, '/')

if (!dir.exists(fig_dir)) {
  dir.create(fig_dir, recursive=TRUE)
}

# label the DEGs in the overlapping pathways

for (test in c('ad_vs_no')){

  print(toupper(test))
  for (cell_type in names(seurat_obj)){

    print(paste0('UMAP Plot of Hub-DEG Connectivities in hdWGCNA Experiment for ', toupper(cell_type)))

    wgcna_genes <- GetWGCNAGenes(seurat_obj[[cell_type]])
    degs <- read_excel(paste0('../results/', test, '/', save_prefix, '/DEG/', deg_method, '_degs.xlsx'), sheet = cell_type)
    degs <- intersect(wgcna_genes, subset(degs, p_val_adj<0.01 & abs_logFC>0.25)$gene)

    hub_genes <- GetHubGenes(seurat_obj[[cell_type]], 25)
    label_genes <- intersect(hub_genes$gene_name, unique(degs))
    
    if (length(label_genes) == 0) {
      print(paste0('No genes to label for ', toupper(cell_type)))
      print('setting label_genes to NULL')
      label_genes <- NULL
    } else {
      label_genes <- label_genes
    }

    pdf(paste0(fig_dir, test, "_", cell_type, '_', gene_set_select, '_hubDEGs_umap_igraph.pdf'), width=10, height=10)
    ModuleUMAPPlot(
      seurat_obj[[cell_type]],
      edge.alpha=0.5,
      sample_edges=TRUE,
      keep_grey_edges=FALSE,
      edge_prop=0.1,  # proportion of edges to sample (10% here) # taking the top 10% strongest edges in each module 
      label_genes = label_genes,
      label_hubs=0 # how many hub genes to plot per module?
    )
    dev.off()
  }

}



### **Visualizing Hub-AD Genes**

Next, we visualize the hub genes, which are also part of the list of AD genes.

We load the AD genes from the AD databases obtained from, [**Open Targets Platform**](https://platform.opentargets.org/disease/MONDO_0004975/associations), [**KEGG Alzheimer's pathway**](https://www.genome.jp/pathway/hsa05010) and [**Harmonizome (Mayaanlab)**](https://maayanlab.cloud/Harmonizome/gene_set/Alzheimer+Disease/dbGAP+Gene-Trait+Associations) 

In [None]:
KEGG_paths = pathway_analyses.read_pathways('../data/pathway_databases/KEGG_2019_Human.txt')
KEGG_genes = [gene for gene in list(KEGG_paths[(KEGG_paths[0].str.startswith('Alzheimer disease'))].iloc[:, 1:].values[0]) if str(gene)!="nan"]
mayaanlab_genes = pd.read_csv('../data/pathway_databases/AD_genes.csv').Symbol.to_list()
otp_genes = pd.read_csv('../data/pathway_databases/MONDO_0004975-associated-diseases.tsv', sep='\t')
otp_genes = otp_genes[otp_genes.textMining!='No data'].symbol.to_list()

AD_genes = list(set(set(KEGG_genes).union(set(mayaanlab_genes), set(otp_genes))))

In [None]:
%%R -i test_names -i deg_method -i AD_genes

conflicts_prefer(base::intersect)

fig_dir = paste0("../results/hdWGCNA/HubGeneUMAP/", save_prefix, '/')

if (!dir.exists(fig_dir)) {
  dir.create(fig_dir, recursive=TRUE)
}


for (cell_type in names(seurat_obj)){

  print(paste0('UMAP Plot of Hub-DEG Connectivities in hdWGCNA Experiment for ', toupper(cell_type)))

  # hubgene network

  hub_genes <- GetHubGenes(seurat_obj[[cell_type]], 25)
  label_genes <- intersect(hub_genes$gene_name, unique(as.vector(AD_genes)))
  
  if (length(label_genes) == 0) {
    print(paste0('No genes to label for ', toupper(cell_type)))
    print('setting label_genes to NULL')
    label_genes <- NULL
  } else {
    label_genes <- label_genes
  }

  pdf(paste0(fig_dir, cell_type, '_AD_genes_hubDEGs_umap_igraph.pdf'), width=10, height=10)
  ModuleUMAPPlot(
    seurat_obj[[cell_type]],
    edge.alpha=0.25,
    sample_edges=TRUE,
    keep_grey_edges=FALSE,
    edge_prop=0.1,  # proportion of edges to sample (10% here) # taking the top 10% strongest edges in each module 
    label_genes = label_genes,
    label_hubs=0 # how many hub genes to plot per module?
  )
  dev.off()
}

### **Visualizing Hub-DEGs&AD Genes**

Next, we visualize the hub DEGs and AD on one UMAP.

In [None]:
%%R -i test_names -i deg_method

conflicts_prefer(base::intersect)

fig_dir = paste0("../results/hdWGCNA/HubGeneUMAP/", save_prefix, '/')

if (!dir.exists(fig_dir)) {
  dir.create(fig_dir, recursive=TRUE)
}

# label the DEGs in the overlapping pathways

for (test in c('ad_vs_no')){

  print(toupper(test))
  for (cell_type in names(seurat_obj)){

    print(paste0('UMAP Plot of Hub-DEG Connectivities in hdWGCNA Experiment for ', toupper(cell_type)))

    wgcna_genes <- GetWGCNAGenes(seurat_obj[[cell_type]])
    degs <- read_excel(paste0('../results/', test, '/', save_prefix, '/DEG/', deg_method, '_degs.xlsx'), sheet = cell_type)
    degs <- intersect(wgcna_genes, subset(degs, p_val_adj<0.01 & abs_logFC>0.25)$gene)
    degs_and_ad_genes <- c(degs, AD_genes)

    hub_genes <- GetHubGenes(seurat_obj[[cell_type]], 25)
    label_genes <- intersect(hub_genes$gene_name, unique(degs_and_ad_genes))
    
    if (length(label_genes) == 0) {
      print(paste0('No genes to label for ', toupper(cell_type)))
      print('setting label_genes to NULL')
      label_genes <- NULL
    } else {
      label_genes <- label_genes
    }

    pdf(paste0(fig_dir, test, "_", cell_type, '_', gene_set_select, '_AD_n_hubDEGs_umap_igraph.pdf'), width=10, height=10)
    ModuleUMAPPlot(
      seurat_obj[[cell_type]],
      edge.alpha=0.5,
      sample_edges=TRUE,
      keep_grey_edges=FALSE,
      edge_prop=0.1,  # proportion of edges to sample (10% here) # taking the top 10% strongest edges in each module 
      label_genes = label_genes,
      label_hubs=0 # how many hub genes to plot per module?
    )
    dev.off()
  }

}



# **Save Seurat Object**

In [None]:
%%R

dat_dir = paste0("../results/hdWGCNA/SeuratObject/", save_prefix, '/')

if (!dir.exists(dat_dir)) {
  dir.create(dat_dir, recursive=TRUE)
}

for (cell_type in names(seurat_obj)){
    
    print(paste0('Saving hdWGCNA object in hdWGCNA Experiment for ', toupper(cell_type)))

    saveRDS(seurat_obj[[cell_type]], file=paste0(dat_dir, cell_type, '_hdWGCNA_object.rds'))

}