In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import sys
import os
_stderr = sys.stderr
null = open(os.devnull,'wb')

import dill

import mudata

In [None]:
scplus_mdata = mudata.read("./outs/scplus_pipeline/Snakemake/outs/scplusmdata.h5mu")

In [None]:
import anndata as ad
import numpy as np, pandas as pd
from scipy import sparse

cistrome_direct = ad.read_h5ad('outs/scplus_pipeline/Snakemake/outs/cistromes_direct.h5ad')

regions = np.asarray(cistrome_direct.obs_names)  # RE IDs
tfs     = np.asarray(cistrome_direct.var_names)  # TF names
X = cistrome_direct.X

if sparse.issparse(X):
    coo = X.tocoo(copy=False)
    tf_re = pd.DataFrame({"TF": tfs[coo.col], "region": regions[coo.row]})
else:
    arr = np.asarray(X)
    r, c = np.where(arr != 0)
    tf_re = pd.DataFrame({"TF": tfs[c], "region": regions[r]})

tf_re.drop_duplicates().to_csv("outs/scplus_pipeline/Snakemake/outs/cistromes_direct.TF_RE.tsv", sep="\t", index=False)

In [None]:
scplus_mdata.uns["extended_e_regulon_metadata"]

# eRegulon dimensionality reduction

In [None]:
import scanpy as sc
import anndata
eRegulon_gene_AUC = anndata.concat(
    [scplus_mdata["direct_gene_based_AUC"], scplus_mdata["extended_gene_based_AUC"]],
    axis = 1,
)

In [None]:
eRegulon_gene_AUC.obs = scplus_mdata.obs.loc[eRegulon_gene_AUC.obs_names]

In [None]:
sc.pp.neighbors(eRegulon_gene_AUC, use_rep = "X")

In [None]:
sc.pl.umap(eRegulon_gene_AUC, color = "scRNA_counts:celltype")

In [None]:
sc.pl.umap(eRegulon_gene_AUC, color = "scRNA_counts:celltype",legend_loc = 'on data',legend_fontsize = 8,legend_fontweight= 'normal')

In [None]:
sc.pl.umap(eRegulon_gene_AUC, color = "scRNA_counts:Celltype2",legend_loc = 'on data',legend_fontsize = 8,legend_fontweight= 'normal')

# eRegulon specificity score

In [None]:
from scenicplus.RSS import (regulon_specificity_scores, plot_rss)

In [None]:
rss = regulon_specificity_scores(
    scplus_mudata = scplus_mdata,
    variable = "scRNA_counts:celltype",
    modalities = ["direct_gene_based_AUC", "extended_gene_based_AUC"]
)

In [None]:
plot_rss(
    data_matrix = rss,
    top_n = 5,
    num_columns = 5
)

# Plot eRegulon enrichment scores

In [None]:
sc.pl.umap(eRegulon_gene_AUC, color = list(set([x for xs in [rss.loc[ct].sort_values(ascending=False,)[0:2].index for ct in rss.index] for x in xs ])))

# Heatmap dotplot

In [None]:
from scenicplus.plotting.dotplot import heatmap_dotplot

In [None]:
filtered_df = pd.read_csv('outs/scplus_pipeline/Snakemake/outs/eRegulon_direct_filtered.tsv',sep='\t')
mask = (filtered_df["triplet_pass"] == True) & (filtered_df["symbols"].eq("+/+"))
subset_df = filtered_df.loc[mask].copy()
unique_features = subset_df['eRegulon_name'].unique()
group_order = [
    "gCap-C2_BPD", "gCap-C2_Control",
    "gCap-C1_BPD", "gCap-C1_Control",
    "aCap_BPD", "aCap_Control",
    "Venous_BPD", "Venous_Control",
    "SystemicVenous_BPD", "SystemicVenous_Control",
    "Lymphatics_BPD", "Lymphatics_Control",
    "Arterial_BPD", "Arterial_Control",
]
group_order_rev = list(reversed(group_order))

dotplot= heatmap_dotplot(
    scplus_mudata = scplus_mdata,
    color_modality = "direct_gene_based_AUC",
    size_modality = "direct_region_based_AUC",
    group_variable = "scRNA_counts:celltype",
    eRegulon_metadata_key = "direct_e_regulon_metadata",
    color_feature_key = "Gene_signature_name",
    size_feature_key = "Region_signature_name",
    feature_name_key = "eRegulon_name",
    sort_data_by = "direct_gene_based_AUC",
    #sort_data_by=None,
    orientation = "horizontal",
    subset_feature_names = unique_features,
    group_variable_order= group_order_rev,
)

from plotnine import theme, element_text

dotplot += theme(
    axis_text_x=element_text(size=15),
    axis_text_y=element_text(size=15)
)
dotplot += theme(
    axis_text_x=element_text(size=12, angle=90, ha='center')
)
dotplot

dotplot.save("outs/scplus_pipeline/Snakemake/outs/dotplot_heatmap_positive.png", dpi=600, width=22, height=6.5, units="in", verbose=False)



fig = dotplot.draw()                         # renders to a Matplotlib Figure
ax = fig.axes[0]

# If features are on the X axis (orientation="horizontal", as in your code):
x_labels = [t.get_text() for t in ax.get_xticklabels()]

# Save for R
with open("outs/scplus_pipeline/Snakemake/outs/eregulon_labels.txt", "w") as f:
    for lab in x_labels:
        f.write(lab + "\n")

In [22]:
import mudata
import os
scplus_outdir = "outs/scplus_pipeline/Snakemake/outs/"

scplus_mdata = mudata.read(os.path.join(scplus_outdir, 'scplusmdata.h5mu'))

In [23]:
scplus_mdata.obs['scRNA_counts:celltype'] = scplus_mdata.obs.index.str.rsplit('_', n=1).str[0]
scplus_mdata.obs['scATAC_counts:celltype'] = scplus_mdata.obs.index.str.rsplit('_', n=1).str[0]

In [24]:
scplus_mdata.obs

Unnamed: 0,scRNA_counts:orig.ident,scRNA_counts:nCount_RNA,scRNA_counts:nFeature_RNA,scRNA_counts:nCount_ATAC,scRNA_counts:nFeature_ATAC,scRNA_counts:barcode,scRNA_counts:gex_barcode,scRNA_counts:atac_barcode,scRNA_counts:is_cell,scRNA_counts:excluded_reason,...,scATAC_counts:wsnn_res.10,scATAC_counts:Celltype_4,scATAC_counts:Cluster,scATAC_counts:Celltype,scATAC_counts:Group,scATAC_counts:celltype,scATAC_counts:sample_id,scATAC_counts:pycisTopic_leiden_10_0.6,scATAC_counts:pycisTopic_leiden_10_1.2,scATAC_counts:pycisTopic_leiden_10_3
D253_Multiome_GACCTTTGTTGCACGG-1-NL_Control_CAP,D253_Multiome,3593.0,1761,6379.0,5599,GACCTTTGTTGCACGG-1,GACCTTTGTTGCACGG-1,GGTGCATCAGGTCAAC-1,1,0,...,43,CAP1,1,1,D253_CAP1,D253_Multiome_GACCTTTGTTGCACGG-1-NL_Control,NL_Control_CAP,2,1,26
D253_Multiome_TGAGCACGTGCGCGTA-1-NL_Control_CAP,D253_Multiome,1667.0,971,2676.0,2479,TGAGCACGTGCGCGTA-1,TGAGCACGTGCGCGTA-1,TAAACCGCAAGTGCCG-1,1,0,...,37,CAP2,8,2,D253_CAP2,D253_Multiome_TGAGCACGTGCGCGTA-1-NL_Control,NL_Control_CAP,1,0,5
D253_Multiome_TACCGTTGTGGGTGAA-1-NL_Control_CAP,D253_Multiome,3320.0,1814,6499.0,5639,TACCGTTGTGGGTGAA-1,TACCGTTGTGGGTGAA-1,CGGTGTGCAAGTATCG-1,1,0,...,20,CAP1,3,1,D253_CAP1,D253_Multiome_TACCGTTGTGGGTGAA-1-NL_Control,NL_Control_CAP,0,5,2
D253_Multiome_TTGGGTTAGCTCAAAC-1-NL_Control_CAP,D253_Multiome,2636.0,1554,2650.0,2457,TTGGGTTAGCTCAAAC-1,TTGGGTTAGCTCAAAC-1,GTGTGAGTCGGTAATA-1,1,0,...,12,CAP2,8,2,D253_CAP2,D253_Multiome_TTGGGTTAGCTCAAAC-1-NL_Control,NL_Control_CAP,1,0,4
D253_Multiome_TAAGTAGCAGCCTGCA-1-NL_Control_CAP,D253_Multiome,1997.0,1094,1866.0,1689,TAAGTAGCAGCCTGCA-1,TAAGTAGCAGCCTGCA-1,GTTAACCGTTTCTAAG-1,1,0,...,14,CAP1,4,1,D253_CAP1,D253_Multiome_TAAGTAGCAGCCTGCA-1-NL_Control,NL_Control_CAP,0,8,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
D253_Multiome_CCGATGCAGCTCCTAC-1-NL_Control_CAP,D253_Multiome,1138.0,739,2652.0,2351,CCGATGCAGCTCCTAC-1,CCGATGCAGCTCCTAC-1,GTCGGTTTCTTAGCCA-1,1,0,...,25,CAP2,8,2,D253_CAP2,D253_Multiome_CCGATGCAGCTCCTAC-1-NL_Control,NL_Control_CAP,1,0,4
D253_Multiome_TGGCTAAGTAATCCCT-1-NL_Control_CAP,D253_Multiome,3884.0,1968,2105.0,1997,TGGCTAAGTAATCCCT-1,TGGCTAAGTAATCCCT-1,CTTCCAGCAGCCTGAG-1,1,0,...,37,CAP2,8,2,D253_CAP2,D253_Multiome_TGGCTAAGTAATCCCT-1-NL_Control,NL_Control_CAP,1,0,15
D111_Multiome_TGCATCCTCAGGCTAT-1-NL_Control_CAP,D111_Multiome,5251.0,1874,6145.0,5312,TGCATCCTCAGGCTAT-1,TGCATCCTCAGGCTAT-1,CGCTTTAAGGCACGTA-1,1,0,...,45,CAP2,8,2,D111_CAP2,D111_Multiome_TGCATCCTCAGGCTAT-1-NL_Control,NL_Control_CAP,1,4,6
D111_Multiome_ACGGTACGTGATTTGG-1-NL_Control_CAP,D111_Multiome,2275.0,1398,4891.0,4284,ACGGTACGTGATTTGG-1,ACGGTACGTGATTTGG-1,AGCTAATCACTAAGGC-1,1,0,...,20,CAP1,3,1,D111_CAP1,D111_Multiome_ACGGTACGTGATTTGG-1-NL_Control,NL_Control_CAP,0,3,9


In [25]:
from scenicplus.scenicplus_class import mudata_to_scenicplus
scplus_obj = mudata_to_scenicplus(
    mdata = scplus_mdata
)
import pickle
pickle.dump(
    scplus_obj,
    open(os.path.join(scplus_outdir, "scplus_obj_old.pkl"), "wb")
)

In [26]:
scplus_obj.metadata_cell

Unnamed: 0,orig.ident,nCount_RNA,nFeature_RNA,nCount_ATAC,nFeature_ATAC,barcode,gex_barcode,atac_barcode,is_cell,excluded_reason,...,wsnn_res.7,wsnn_res.8,wsnn_res.9,wsnn_res.10,Celltype_4,Cluster,Celltype,Group,celltype,sample_id
D253_Multiome_GACCTTTGTTGCACGG-1-NL_Control_CAP,D253_Multiome,3593.0,1761,6379.0,5599,GACCTTTGTTGCACGG-1,GACCTTTGTTGCACGG-1,GGTGCATCAGGTCAAC-1,1,0,...,34,1,21,43,CAP1,1,1,D253_CAP1,D253_CAP1,NL_Control_CAP
D253_Multiome_TGAGCACGTGCGCGTA-1-NL_Control_CAP,D253_Multiome,1667.0,971,2676.0,2479,TGAGCACGTGCGCGTA-1,TGAGCACGTGCGCGTA-1,TAAACCGCAAGTGCCG-1,1,0,...,7,29,31,37,CAP2,8,2,D253_CAP2,D253_CAP2,NL_Control_CAP
D253_Multiome_TACCGTTGTGGGTGAA-1-NL_Control_CAP,D253_Multiome,3320.0,1814,6499.0,5639,TACCGTTGTGGGTGAA-1,TACCGTTGTGGGTGAA-1,CGGTGTGCAAGTATCG-1,1,0,...,29,18,18,20,CAP1,3,1,D253_CAP1,D253_CAP1,NL_Control_CAP
D253_Multiome_TTGGGTTAGCTCAAAC-1-NL_Control_CAP,D253_Multiome,2636.0,1554,2650.0,2457,TTGGGTTAGCTCAAAC-1,TTGGGTTAGCTCAAAC-1,GTGTGAGTCGGTAATA-1,1,0,...,7,9,14,12,CAP2,8,2,D253_CAP2,D253_CAP2,NL_Control_CAP
D253_Multiome_TAAGTAGCAGCCTGCA-1-NL_Control_CAP,D253_Multiome,1997.0,1094,1866.0,1689,TAAGTAGCAGCCTGCA-1,TAAGTAGCAGCCTGCA-1,GTTAACCGTTTCTAAG-1,1,0,...,30,14,15,14,CAP1,4,1,D253_CAP1,D253_CAP1,NL_Control_CAP
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
D253_Multiome_CCGATGCAGCTCCTAC-1-NL_Control_CAP,D253_Multiome,1138.0,739,2652.0,2351,CCGATGCAGCTCCTAC-1,CCGATGCAGCTCCTAC-1,GTCGGTTTCTTAGCCA-1,1,0,...,11,37,19,25,CAP2,8,2,D253_CAP2,D253_CAP2,NL_Control_CAP
D253_Multiome_TGGCTAAGTAATCCCT-1-NL_Control_CAP,D253_Multiome,3884.0,1968,2105.0,1997,TGGCTAAGTAATCCCT-1,TGGCTAAGTAATCCCT-1,CTTCCAGCAGCCTGAG-1,1,0,...,18,21,31,37,CAP2,8,2,D253_CAP2,D253_CAP2,NL_Control_CAP
D111_Multiome_TGCATCCTCAGGCTAT-1-NL_Control_CAP,D111_Multiome,5251.0,1874,6145.0,5312,TGCATCCTCAGGCTAT-1,TGCATCCTCAGGCTAT-1,CGCTTTAAGGCACGTA-1,1,0,...,11,37,29,45,CAP2,8,2,D111_CAP2,D111_CAP2,NL_Control_CAP
D111_Multiome_ACGGTACGTGATTTGG-1-NL_Control_CAP,D111_Multiome,2275.0,1398,4891.0,4284,ACGGTACGTGATTTGG-1,ACGGTACGTGATTTGG-1,AGCTAATCACTAAGGC-1,1,0,...,29,33,18,20,CAP1,3,1,D111_CAP1,D111_CAP1,NL_Control_CAP
