
# 10x Multiome Hippocampus

## Set-up environment and download data 
We will first create a directory to store the data and results

In [None]:
import warnings
import sys
import os
import re
import os
import json
import pickle
import dill
import random
from typing import Union, Dict, Sequence, Optional, List
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np

os.environ["MODIN_ENGINE"] = "ray"
import ray

# Plotting
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import networkx as nx
from matplotlib.colors import to_rgba, to_hex
import seaborn as sns

# SCENIC
import scanpy as sc
import mudata
import anndata
import pycisTopic
import pyranges
from pycisTopic.cistopic_class import create_cistopic_object
from pycisTopic.lda_models import run_cgs_models,evaluate_models
from pycisTopic.topic_binarization import binarize_topics
from pycisTopic.diff_features import find_highly_variable_features,impute_accessibility,normalize_scores
from pycisTopic.utils import region_names_to_coordinates
from scenicplus.plotting.dotplot import heatmap_dotplot
from scenicplus.scenicplus_class import mudata_to_scenicplus
from scenicplus.networks import create_nx_tables, create_nx_graph, plot_networkx, export_to_cytoscape
from sklearn.preprocessing import MinMaxScaler, StandardScaler


#Setup ray spill to store tmp objects if ray store gets full
os.makedirs(os.path.join(work_dir, 'tmp/spill'), exist_ok=True)

# Check if current working directory is named "python" and change if needed
current_dir = os.getcwd()
if os.path.basename(current_dir) == "python":
    os.chdir("../../../")
    print(f"Changed working directory to: {os.getcwd()}")
else:
    print(f"Current working directory: {os.getcwd()}")

#Set up working directory
from os import listdir
work_dir = 'data/SCENIC/ExNeu/'
listdir(work_dir)
if not os.path.exists(os.path.join(work_dir, 'model')):
    os.makedirs(os.path.join(work_dir, 'model'))

#set some figure parameters for nice display inside jupyternotebooks.
%matplotlib inline

## Load and process ATAC object

### Load ATAC object from R

In [None]:
## 1. Initialize cisTopic object
atac_meta_ad = sc.read_h5ad(os.path.join(work_dir, "objects/ATAC.h5ad"))
matrix = pd.DataFrame(atac_meta_ad.X.transpose().toarray())
matrix.columns = list(atac_meta_ad.obs_names)
matrix.index = [re.sub('-', ':', region, 1) for region in list(atac_meta_ad.var_names)] #Have to change region name format for cistopic

cisTopic_obj = create_cistopic_object(matrix)

###Fix cell names to match seurat RNA object
cisTopic_obj.cell_names = [L.removesuffix("___cisTopic") for L in cisTopic_obj.cell_names]

# Also add the cell meta data
cisTopic_obj.add_cell_data(atac_meta_ad.obs)
cisTopic_obj.cell_data.index = [L.removesuffix("___cisTopic") for L in cisTopic_obj.cell_data.index]

### Compute region topics (co-accessible groups of regions)

In [None]:
# Run models
models=run_cgs_models(
    cisTopic_obj,
    n_topics=[i for i in range(2, 52, 2)],
    n_cpu=9,
    n_iter=500,
    random_state=555,
    alpha=50,
    alpha_by_topic=True,
    eta=0.1,
    eta_by_topic=False,
    save_path="../ExNeu/pycistopic/"
)

# Save models
if not os.path.exists(os.path.join(work_dir, 'pycistopic/model')):
    os.makedirs(os.path.join(work_dir, 'pycistopic/model'))

pickle.dump(models,
            open(os.path.join(work_dir, 'pycistopic/model/pycistargetmodel.pkl'), 'wb'))

In [None]:
##Pick number of topics with highest metric scores
model = evaluate_models(models,
                       select_model=26,
                       return_model=True,
                       metrics=['Arun_2010','Cao_Juan_2009', 'Minmo_2011', 'loglikelihood'],
                       plot_metrics=False)

### Save

In [None]:
cisTopic_obj.add_LDA_model(model)
pickle.dump(cisTopic_obj,
            open(os.path.join(work_dir, 'pycistopic/cistopic_obj.pkl'), 'wb'))

### Inferring candidate enhancer regions

Next we will infer candidate enhancer regions by:

1. binarization of region-topic probabilites.
2. calculation differentially accessibile regions (DARs) per meta cell.

These regions will be used as input for the next step, [pycistarget](https://pycistarget.readthedocs.io/en/latest/), in which we will look which motifs are enriched in these regions.

First we will binarize the topics using the [otsu](http://ieeexplore.ieee.org/document/4310076/) method and by taking the top 5k regions per topic.

In [None]:
cisTopic_obj = dill.load(open(os.path.join(work_dir, 'pycistopic/cistopic_obj.pkl'), 'rb'))
region_bin_topics_otsu = binarize_topics(cisTopic_obj, method='otsu')
region_bin_topics_top3k = binarize_topics(cisTopic_obj, method='ntop', ntop = 3000,plot=True, num_columns=5)

Next we will calculate DARs per Substate

In [None]:
##I am using SEACell column - choose whatever works for you
imputed_acc_obj = impute_accessibility(cisTopic_obj, selected_cells=None, 
                                       selected_regions=None, scale_factor=10**6)

normalized_imputed_acc_obj = normalize_scores(imputed_acc_obj, scale_factor=10**4)
variable_regions = find_highly_variable_features(normalized_imputed_acc_obj,  
                                                 min_disp = 0.05,
                                                 min_mean = 0.0125,
                                                 max_mean = 3,
                                                 max_disp = np.inf,
                                                 n_bins=20, plot = True)

#Can alter thresholds to fit your data/question
markers_dict = find_diff_features(cisTopic_obj, imputed_acc_obj, 
                                  variable='SEACell', 
                                  split_pattern = '-', 
                                  adjpval_thr = 0.05, log2fc_thr = 1, n_cpu=9)

In [None]:
#Filter out sets with no DARs
markers_dict = {k: v for k, v in markers_dict.items() if len(v.index) > 0}
markers_dict

### Save candidate enhancers

In [None]:
os.makedirs(os.path.join(work_dir, "region_sets"), exist_ok = True)
os.makedirs(os.path.join(work_dir, "region_sets", "Topics_otsu"), exist_ok = True)
os.makedirs(os.path.join(work_dir, "region_sets", "Topics_top_3k"), exist_ok = True)
os.makedirs(os.path.join(work_dir, "region_sets", "DARs_cell_type"), exist_ok = True)

In [None]:
for topic in region_bin_topics_otsu:
    region_names_to_coordinates(
        region_bin_topics_otsu[topic].index
    ).sort_values(
        ["Chromosome", "Start", "End"]
    ).to_csv(
        os.path.join(work_dir, "region_sets", "Topics_otsu", f"{topic}.bed"),
        sep = "\t",
        header = False, index = False
    )

for topic in region_bin_topics_top_3k:
    region_names_to_coordinates(
        region_bin_topics_top_3k[topic].index
    ).sort_values(
        ["Chromosome", "Start", "End"]
    ).to_csv(
        os.path.join(work_dir, "region_sets", "Topics_top_3k", f"{topic}.bed"),
        sep = "\t",
        header = False, index = False
    )

for SEAcell in markers_dict:
    region_names_to_coordinates(
        markers_dict[SEAcell].index
    ).sort_values(
        ["Chromosome", "Start", "End"]
    ).to_csv(
        os.path.join(work_dir, "region_sets", "DARs_cell_type", f"{SEAcell}.bed"),
        sep = "\t",
        header = False, index = False
    )

In [None]:
if not os.path.exists(os.path.join(work_dir, 'pycistopic/candidate_enhancers')):
    os.makedirs(os.path.join(work_dir, 'pycistopic/candidate_enhancers'))
import pickle
pickle.dump(region_bin_topics_otsu, open(os.path.join(work_dir, 'pycistopic/candidate_enhancers/region_bin_topics_otsu.pkl'), 'wb'))
pickle.dump(region_bin_topics_top3k, open(os.path.join(work_dir, 'pycistopic/candidate_enhancers/region_bin_topics_top3k.pkl'), 'wb'))
pickle.dump(markers_dict, open(os.path.join(work_dir, 'pycistopic/candidate_enhancers/markers_dict.pkl'), 'wb'))

Between R and Python we have now completed all the mininal scATAC-seq preprocessing steps. 

In particular we:

1. generated a set of consensus peaks (R)
2. performed quality control steps, only keeping cell barcods which passed QC metrics in both the scRNA-seq and scATAC-seq assay (R)
3. performed topic modeling (Python)
4. inferred candidate enhancer regions by binarizing the region-topic probabilities and DARs per cell type (Py)

In the next step we will perform motif enrichment analysis on these candidate enhancer regions using the python package, [pycistarget](phttps://pycistarget.readthedocs.io/en/latest/). For this a precomputed motif-score database is needed. A sample specific database can be generated by scoring the consensus peaks with motifs or a general pre-scored database can be used as well.

## Generate a custom cisTarget database

Pad genome fasta

In [None]:
%%bash
REGION_BED="../ExNeu/objects/ConsensusPeaks.bed"
GENOME_FASTA="../resources/genome.fa"
CHROMSIZES="../resources/mm10.chrom.sizes"
DATABASE_PREFIX="multiome_ExNeu"
SCRIPT_DIR="../create_cisTarget_databases"

${SCRIPT_DIR}/create_fasta_with_padded_bg_from_bed.sh \
        ${GENOME_FASTA} \
        ${CHROMSIZES} \
        ${REGION_BED} \
        "../../results/SCENIC/ExNeu/objects/mm10.${DATABASE_PREFIX}.with_1kb_bg_padding.fa" \
        1000 \
        yes

Download motifs

In [None]:
%%bash
mkdir -p ../resources/aertslab_motif_colleciton
wget -O ../resources/aertslab_motif_colleciton/v10nr_clust_public.zip https://resources.aertslab.org/cistarget/motif_collections/v10nr_clust_public/v10nr_clust_public.zip
cd ../resources/aertslab_motif_colleciton/; unzip -q v10nr_clust_public.zip
cd ../code/
ls ../resources/aertslab_motif_colleciton/v10nr_clust_public/singletons > ../resources/motifs.txt

Generate custom database

In [None]:
%%bash
source ~/.bashrc
source activate /opt/conda/envs/scenicplus
conda install -y flatbuffers
CBDIR="../../resources/aertslab_motif_colleciton/v10nr_clust_public/singletons"
FASTA_FILE="../../data/SCENIC/ExNeu/objects/mm10.${DATABASE_PREFIX}.with_1kb_bg_padding.fa"
MOTIF_LIST="../../resources/motifs.txt"
OUT_DIR="../../data/SCENIC/ExNeu/objects/"
REGION_BED="../../data/SCENIC/ExNeu/objects/ConsensusPeaks.bed"
GENOME_FASTA="../../resources/genome.fa"
CHROMSIZES="../../resources/mm10.chrom.sizes"
DATABASE_PREFIX="multiome_ExNeu"
SCRIPT_DIR="../create_cisTarget_databases"

"${SCRIPT_DIR}/create_cistarget_motif_databases.py" \
    -f ${FASTA_FILE} \
    -M ${CBDIR} \
    -m ${MOTIF_LIST} \
    -o ${OUT_DIR}/${DATABASE_PREFIX} \
    --bgpadding 1000 \
    -t 9

## SCENIC+ Pipeline with Snakemake

### Cistarget databases

In order to run pycistarget one needs a precomputed database containing motif scores for genomic regions.

You can choose to compute this database yourself by scoring the consensus peaks generated in the scATAC-seq analysis using a set of motifs. The advantage of creating a sample specific database is that you can potentially pick up more target regions, given that only regions included/overlappig with regions in the cistarget database will be used for the SCENIC+ analysis. For more information checkout the [create_cisTarget_databases repo on github](https://github.com/aertslab/create_cisTarget_databases). 

We also provide several precomputed databases containing regions covering many experimentally defined candidate cis-regulatory elements. These databases are available on: [https://resources.aertslab.org/cistarget/](https://resources.aertslab.org/cistarget/).

For this analysis we will use a precomputed database using [screen regions](https://screen.encodeproject.org/).

Next to a precomputed motif database we also need a motif-to-tf annotation database. This is also available on [https://resources.aertslab.org/cistarget/](https://resources.aertslab.org/cistarget/).

### Load RNA object and save as raw counts

In [None]:
# Set stderr to null to avoid strange messages from ray
_stderr = sys.stderr                                                         
null = open(os.devnull,'wb')

adata = sc.read_h5ad(os.path.join(work_dir, "objects/RNA.h5ad"))
adata.raw = adata
adata.write_h5ad(os.path.join(work_dir, "objects/RNA.h5ad"))

In [None]:
!mkdir -p "../../results/SCENIC/scplus_pipeline"
!scenicplus init_snakemake --out_dir "../../results/SCENIC/scplus_pipeline"

In [None]:
!mkdir -p ../../results/SCENIC/ExNeu/outs
!mkdir -p ../../data/SCENIC/ExNeu/tmp

Uncomment this cell to load resources if not previously downloaded

In [None]:
#!wget -P ../../resources/ https://resources.aertslab.org/cistarget/motif2tf/motifs-v10nr_clust-nr.mgi-m0.001-o0.0.tbl

### Run SCENIC+ pipeline

In [None]:
%%bash
cd "../../results/SCENIC/scplus_pipeline" 
snakemake --cores 9

## Load Results and Analyse

In [None]:
scplus_mdata = mudata.read("../../results/SCENIC/ExNeu/scplus_pipeline/Snakemake/scplusmdata.h5mu")

In [None]:
scplus_mdata.obs["scRNA_counts:Engram"] = adata.obs["Engram"]

Direct and extended predicted TF-to-region-to-gene links. This dataframe contains also a ranking of each TF-region-gene triplet, based on its importance triplet_rank.

In [None]:
scplus_mdata.uns["direct_e_regulon_metadata"]

Count the number of regions in each TFs GRN

In [None]:
scplus_mdata.uns["direct_e_regulon_metadata"].TF.value_counts()

View Fosb GRN

In [None]:
scplus_mdata.uns["direct_e_regulon_metadata"][scplus_mdata.uns["direct_e_regulon_metadata"].TF == "Fosb"]

eRegulon dimensionality reduction

In [None]:
eRegulon_gene_AUC = anndata.concat(
    [scplus_mdata["direct_gene_based_AUC"], scplus_mdata["extended_gene_based_AUC"]],
    axis = 1,
)

In [None]:
eRegulon_gene_AUC.obs = scplus_mdata.obs.loc[eRegulon_gene_AUC.obs_names]
sc.pp.neighbors(eRegulon_gene_AUC, use_rep = "X")
sc.tl.umap(eRegulon_gene_AUC)
sc.pl.umap(eRegulon_gene_AUC, color = "scRNA_counts:clusters")

View engram cells and IDs on this GRN UMAP

In [None]:
sc.pl.umap(eRegulon_gene_AUC, color = "scRNA_counts:Engram")

In [None]:
sc.pl.umap(eRegulon_gene_AUC, color = "scRNA_counts:ID")

#### Heatmap dotplot of GRN activity in engram and non-engram cells

In [None]:
heatmap_dotplot(
    scplus_mudata = scplus_mdata,
    color_modality = "direct_gene_based_AUC",
    size_modality = "direct_region_based_AUC",
    group_variable = "scRNA_counts:Engram",
    eRegulon_metadata_key = "direct_e_regulon_metadata",
    color_feature_key = "Gene_signature_name",
    size_feature_key = "Region_signature_name",
    feature_name_key = "eRegulon_name",
    sort_data_by = "direct_gene_based_AUC",
    orientation = "horizontal",
    figsize = (16, 5),
    scale_size_matrix = False, scale_color_matrix = False
)

### Plot GRNs

Convert to scplus object to enable graph plotting 

In [None]:
scplus_obj = mudata_to_scenicplus(
    mdata = scplus_mdata,
    path_to_cistarget_h5 = "../ExNeu/scplus_pipeline/Snakemake/ctx_results.hdf5",
    path_to_dem_h5 = "../ExNeu/scplus_pipeline/Snakemake/dem_results.hdf5"
)

In [None]:
#### Find variable features as we'll plot these for GRN visibility

In [None]:
scplus_obj.metadata_cell["Engram"] = scplus_mdata.obs["scRNA_counts:Engram"]
hvr = find_highly_variable_features(scplus_obj.to_df('ACC').loc[list(set(scplus_obj.uns['eRegulon_metadata']['Region']))], n_top_features=6000, plot = True)
hvg = find_highly_variable_features(scplus_obj.to_df('EXP')[list(set(scplus_obj.uns['eRegulon_metadata']['Gene']))].T, n_top_features=6000, plot = True)

#### GRN Plots

In [None]:
##Custom function to allow adding extended regulons to the plots

def create_extended_nx_tables(scplus_obj: 'SCENICPLUS',
                     eRegulon_metadata_key: str ='eRegulon_metadata_filtered',
                     subset_eRegulons: list = None,
                     subset_regions: list = None,
                     subset_genes: list = None,
                     add_differential_gene_expression: bool = False,
                     add_differential_region_accessibility: bool = False,
                     differential_variable: list =[]):
    """
    A function to format eRegulon data into tables for plotting eGRNs. Adapted from SCENIC+ to enable plotting of networks from extended motifs.
    
    Parameters
    ---------
    scplus_obj: SCENICPLUS
        A SCENICPLUS object with eRegulons
    eRegulon_metadata_key: str, optional
        Key where the eRegulon metadata dataframe is stored
    subset_eRegulons: list, optional
        List of eRegulons to subset
    subset_regions: list, optional
        List of regions to subset
    subset_genes: list, optional
        List of genes to subset
    add_differential_gene_expression: bool, optional
        Whether to calculate differential gene expression logFC for a given variable
    add_differential_region_accessibility: bool, optional
        Whether to calculate differential region accessibility logFC for a given variable
    differential_variable: list, optional
        Variable to calculate differential gene expression or region accessibility.
        
    Return
    ---------
    A dictionary with edge feature tables ('TF2G', 'TF2R', 'R2G') and node feature tables ('TF', 'Gene', 'Region')
    """
    er_metadata = scplus_obj.uns[eRegulon_metadata_key].copy()
    if subset_eRegulons is not None:
        er_metadata = er_metadata[er_metadata['TF'].isin(subset_eRegulons)]
    if subset_regions is not None:
        er_metadata = er_metadata[er_metadata['Region'].isin(subset_regions)]
    if subset_genes is not None:
        er_metadata = er_metadata[er_metadata['Gene'].isin(subset_genes)]
    nx_tables = {}
    nx_tables['Edge'] = {}
    nx_tables['Node'] = {}

    if subset_regions is not None:
        er_metadata = er_metadata[er_metadata['Region'].isin(subset_regions)]
    if subset_genes is not None:
        er_metadata = er_metadata[er_metadata['Gene'].isin(subset_genes)]
    nx_tables = {}
    nx_tables['Edge'] = {}
    nx_tables['Node'] = {}
    # Generate edge tables
    r2g_columns = [x for x in er_metadata.columns if 'R2G' in x]
    tf2g_columns = [x for x in er_metadata.columns if 'TF2G' in x]
    nx_tables['Edge']['TF2R'] = er_metadata[er_metadata.columns.difference(
        r2g_columns + tf2g_columns)].drop('Gene', axis=1).drop_duplicates()
    nx_tables['Edge']['TF2R'] = nx_tables['Edge']['TF2R'][['TF', 'Region'] +
    nx_tables['Edge']['TF2R'].columns.difference(['TF', 'Region']).tolist()]
    nx_tables['Edge']['R2G'] = er_metadata[er_metadata.columns.difference(
        tf2g_columns)].drop('TF', axis=1).drop_duplicates()
    nx_tables['Edge']['R2G'] = nx_tables['Edge']['R2G'][['Region', 'Gene'] +
                                                        nx_tables['Edge']['R2G'].columns.difference(['Region', 'Gene']).tolist()]
    nx_tables['Edge']['TF2G'] = er_metadata[er_metadata.columns.difference(
        r2g_columns)].drop('Region', axis=1).drop_duplicates()
    nx_tables['Edge']['TF2G'] = nx_tables['Edge']['TF2G'][['TF', 'Gene'] +
                                                            nx_tables['Edge']['TF2G'].columns.difference(['TF', 'Gene']).tolist()]
    # Generate node tables
    tfs = list(set(er_metadata['TF']))
    nx_tables['Node']['TF'] = pd.DataFrame(
        'TF', index=tfs, columns=['Node_type'])
    nx_tables['Node']['TF']['TF'] = tfs
    genes = list(set(er_metadata['Gene']))
    genes = [x for x in genes if x not in tfs]
    nx_tables['Node']['Gene'] = pd.DataFrame(
        'Gene', index=genes, columns=['Node_type'])
    nx_tables['Node']['Gene']['Gene'] = genes
    regions = list(set(er_metadata['Region']))
    nx_tables['Node']['Region'] = pd.DataFrame(
        'Region', index=regions, columns=['Node_type'])
    nx_tables['Node']['Region']['Region'] = regions
    # Add gene logFC
    if add_differential_gene_expression is True:
        for var in differential_variable:
            nx_tables['Node']['TF'] = pd.concat([nx_tables['Node']['TF'], _get_log2fc_nx(
                scplus_obj, var, nx_tables['Node']['TF'].index.tolist(), contrast='gene')], axis=1)
            nx_tables['Node']['Gene'] = pd.concat([nx_tables['Node']['Gene'], _get_log2fc_nx(
                scplus_obj, var, nx_tables['Node']['Gene'].index.tolist(), contrast='gene')], axis=1)
    if add_differential_region_accessibility is True:
        for var in differential_variable:
            nx_tables['Node']['Region'] = pd.concat([nx_tables['Node']['Region'], _get_log2fc_nx(
                scplus_obj, var, nx_tables['Node']['Region'].index.tolist(), contrast='region')], axis=1)
    return nx_tables
            
def _get_log2fc_nx(scplus_obj: 'SCENICPLUS',
                  variable,
                  features,
                  contrast: str = 'gene'
                  ):
    """
    A helper function to derive log2fc changes
    """
    if contrast == 'gene':
        adata = anndata.AnnData(X=scplus_obj.X_EXP, obs=pd.DataFrame(
            index=scplus_obj.cell_names), var=pd.DataFrame(index=scplus_obj.gene_names))
    if contrast == 'region':
        adata = anndata.AnnData(X=scplus_obj.X_ACC.T, obs=pd.DataFrame(
            index=scplus_obj.cell_names), var=pd.DataFrame(index=scplus_obj.region_names))
    adata.obs = pd.DataFrame(scplus_obj.metadata_cell[variable])
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    adata = adata[:, features]
    sc.tl.rank_genes_groups(
        adata, variable, method='wilcoxon', corr_method='bonferroni')
    groups = adata.uns['rank_genes_groups']['names'].dtype.names
    diff_list = [_format_df_nx(sc.get.rank_genes_groups_df(
        adata, group=group), group, variable) for group in groups]
    return pd.concat(diff_list, axis=1)

def _format_df_nx(df, key, var):
    """
    A helper function to format differential test results
    """
    df.index = df['names']
    df = pd.DataFrame(df['logfoldchanges'])
    df.columns = [var+'_Log2FC_'+key]
    df.index.name = None
    return df

Format the eRegulons into a table which can be used to create a network using the package [networkx](https://networkx.org/)

In [None]:
scplus_obj.metadata_cell['clusters'] = scplus_obj.metadata_cell['clusters'] = scplus_obj.metadata_cell['clusters'].astype(str)

#Whichever TFs you want to plot the regulons of
TF1 = 'Fosb'
TF2 = 'Mef2c'
TF3 = 'Junb'
TF4 = "Nr4a1"
TF5 = "Creb1"

nx_tables = create_extended_nx_tables(
    scplus_obj = scplus_obj,
    eRegulon_metadata_key ='eRegulon_metadata',
    subset_eRegulons = [TF1, TF2, TF3, TF4, TF5],
    #subset_regions = hvr,
    #subset_genes = hvg,
    add_differential_gene_expression = True,
    add_differential_region_accessibility = True,
    differential_variable = ['Engram'])

Next we layout the graph.

In [None]:
#Change 'Engram_Log2FC_Varset' to the comparison you want to plot as the colours (Has to be one calculated above)

G, pos, edge_tables, node_tables = create_nx_graph(nx_tables,
                   use_edge_tables = ['TF2R','R2G'],
                   color_edge_by = {'TF2R': {'variable' : 'TF', 'category_color' : {TF1: 'cornflowerblue', TF2: 'tan', TF3: 'cyan', TF4: 'Purple', TF5: 'peru'}},
                                    'R2G': {'variable' : 'importance_x_rho', 'continuous_color' : 'viridis', 'v_min': -1, 'v_max': 1}},
                   transparency_edge_by =  {'R2G': {'variable' : 'importance_x_rho', 'min_alpha': 0.1, 'v_min': 0}},
                   width_edge_by = {'R2G': {'variable' : 'importance_x_rho', 'max_size' :  1.5, 'min_size' : 1}},
                   color_node_by = {'TF': {'variable': 'TF', 'category_color' : {TF1: 'cornflowerblue', TF2: 'tan', TF3: 'cyan', TF4: 'Purple', TF5: 'peru'}},
                                    'Gene': {'variable': 'Engram_Log2FC_Varset', 'continuous_color' : 'bwr', 'v_max': 1.5, 'v_min': -1.5},
                                    'Region': {'variable': 'Engram_Log2FC_Varset', 'continuous_color' : 'bwr', 'v_max': 1.5, 'v_min': -1.5}},
                   transparency_node_by =  {'Region': {'variable' : 'Engram_Log2FC_Varset', 'min_alpha': 0.5},
                                    'Gene': {'variable' : 'Engram_Log2FC_Varset', 'min_alpha': 0.5}},
                   size_node_by = {'TF': {'variable': 'fixed_size', 'fixed_size': 40},
                                    'Gene': {'variable': 'fixed_size', 'fixed_size': 20},
                                    'Region': {'variable': 'fixed_size', 'fixed_size': 10}},
                   shape_node_by = {'TF': {'variable': 'fixed_shape', 'fixed_shape': 'ellipse'},
                                    'Gene': {'variable': 'fixed_shape', 'fixed_shape': 'ellipse'},
                                    'Region': {'variable': 'fixed_shape', 'fixed_shape': 'diamond'}},
                   label_size_by = {'TF': {'variable': 'fixed_label_size', 'fixed_label_size': 40.0},
                                    'Gene': {'variable': 'fixed_label_size', 'fixed_label_size': 20.0},
                                    'Region': {'variable': 'fixed_label_size', 'fixed_label_size': 0.0}},
                   layout='kamada_kawai_layout',
                   scale_position_by=250)

Finally we can visualize the network.

In this network diamond shapes represent regions and they are color coded by their log2fc value in B cells target genes and TFs are visualized using circles and are labeled.

In [None]:
%matplotlib inline
plt.figure(figsize=(30, 30))
plot_networkx(G, pos)

In [None]:
##Plt colourbar to add to cytoscape
import pylab as pl
import numpy as np

a = np.array([[-1.5,1.5]])
pl.figure(figsize=(5.5, 0.8))
img = pl.imshow(a, cmap="bwr")
pl.gca().set_visible(False)
cax = pl.axes([0.1, 0.2, 0.8, 0.6])

cb = pl.colorbar(orientation="horizontal", cax=cax)
cb = cb.set_label(label='log2FoldChange in Engram Cells',size=15,weight='bold')
pl.savefig(os.path.join(work_dir, "outs/colorbar.png"), bbox_inches="tight")

Export to cytoscape to make this plot even nicer

This network can be imported using `file -> import -> Network from file ...`

Also make sure to import the SCENIC+ network layout using `file -> import -> Styles from file ...`.

This layout is available under [cytoscape_styles/SCENIC+.xml](https://raw.githubusercontent.com/aertslab/scenicplus/main/cytoscape_styles/SCENIC%2B.xml).

In [None]:
export_to_cytoscape(G, pos, out_file = os.path.join(work_dir, 'outs/network_AP1.cyjs'))

## Save output

In [None]:
###Export Eregulons
scplus_obj.uns['eRegulon_metadata'].to_csv(os.path.join(work_dir, 'Eregulons.csv'))
pd.DataFrame(scplus_obj.uns['eRegulon_AUC']['Region_based'], columns=["eRegulon"]).to_csv(os.path.join(work_dir, 'SelectedEregulons.csv'))
pd.DataFrame(scplus_obj.uns['eRegulon_AUC']['Gene_based'], columns=["eRegulon"]).to_csv(os.path.join(work_dir, 'SelectedGeneEregulons.csv'))

###Export Gene-RegionLinks
scplus_obj.uns['region_to_gene'].to_csv(os.path.join(work_dir, 'outs/Gene-RegionLinks.csv'))

###Export List of Genes in Analysis
scplus_obj.metadata_genes.to_csv(os.path.join(work_dir, 'outs/GeneList.csv'))

In [None]:
###----Export AUC Matrices
scplus_obj.uns['eRegulon_AUC']["Gene_based"].to_csv(os.path.join(work_dir, 'outs/Gene-Based_AUC.csv'))
scplus_obj.uns['eRegulon_AUC']["Region_based"].to_csv(os.path.join(work_dir, 'outs/Region-based_AUC.csv'))

In [None]:
###Save scplus object
dill.dump(scplus_obj, open(os.path.join(work_dir, 'outs/scplus_obj.pkl'), 'wb'), protocol=-1)