In [64]:
import os, glob, re, pickle
from functools import partial
from collections import OrderedDict
import operator as op
from cytoolz import compose

import pandas as pd
import seaborn as sns
import numpy as np
import scanpy as sc
import anndata as ad
import matplotlib as mpl
import matplotlib.pyplot as plt

from pyscenic.export import export2loom, add_scenic_metadata
from pyscenic.utils import load_motifs
from pyscenic.transform import df2regulons
from pyscenic.aucell import aucell
from pyscenic.binarization import binarize
from pyscenic.rss import regulon_specificity_scores
from pyscenic.plotting import plot_binarization, plot_rss

from IPython.display import HTML, display

ImportError: cannot import name 'load_signatures'

In [2]:
DATA_FOLDER="/home/lmlu/scRNAseq/SCENIC/NB"
RESOURCES_FOLDER="/home/lmlu/scRNAseq/SCENIC/NB"
DATABASE_FOLDER = "/home/lmlu/scRNAseq/SCENIC/cisTarget_databases/"
SCHEDULER="123.122.8.24:8786"
DATABASES_GLOB = os.path.join(DATABASE_FOLDER, "hg19-*.feather")
MOTIF_ANNOTATIONS_FNAME = os.path.join(RESOURCES_FOLDER, "motifs-v9-nr.hgnc-m0.001-o0.0.tbl")
MM_TFS_FNAME = os.path.join(RESOURCES_FOLDER, 'hs_hgnc_tfs.txt')
SC_EXP_FNAME = os.path.join(RESOURCES_FOLDER, "allNB+CTC_extract_tumor_counts.txt")
REGULONS_FNAME = os.path.join(DATA_FOLDER, "regulons.csv")
MOTIFS_FNAME = os.path.join(DATA_FOLDER, "motifs.csv")

In [40]:
DATA_FOLDER="/Users/apple/Desktop/work/data/SCENIC/NB"
RESOURCES_FOLDER="/Users/apple/Desktop/work/data/SCENIC/数据"
DATABASE_FOLDER = "/Users/apple/Desktop/work/data/SCENIC/cisTarget_databases/"
SCHEDULER="123.122.8.24:8786"
DATABASES_GLOB = os.path.join(DATABASE_FOLDER, "hg19-*.feather")
MOTIF_ANNOTATIONS_FNAME = os.path.join(RESOURCES_FOLDER, "motifs-v9-nr.hgnc-m0.001-o0.0.tbl")
MM_TFS_FNAME = os.path.join(RESOURCES_FOLDER, 'hs_hgnc_tfs.txt')
SC_EXP_FNAME = os.path.join(RESOURCES_FOLDER, "expMatrix_cover800_3P_CTCs_244.txt")
REGULONS_FNAME = os.path.join(DATA_FOLDER, "regulons.csv")
MOTIFS_FNAME = os.path.join(DATA_FOLDER, "motifs.csv")

In [None]:
# 读入表达矩阵，表达矩阵的格式：横坐标是基因，纵坐标是细胞
ex_matrix = pd.read_csv(SC_EXP_FNAME, sep='\t', header=0, index_col=0).T
ex_matrix.shape

In [None]:
# 导入转录因子
tf_names = load_tf_names(MM_TFS_FNAME)

# 导入数据库
db_fnames = glob.glob(DATABASES_GLOB)
def name(fname):
    return os.path.basename(fname).split(".")[0]   
dbs = [RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames]
dbs

In [None]:
ex_matrix.head()

## 1. Inference of co-expression modules

In [None]:
# 两条命令解决
adjacencies = grnboost2(ex_matrix, tf_names=tf_names, verbose=True) #耗时

In [None]:
adjacencies.head()

In [None]:
modules = list(modules_from_adjacencies(adjacencies, ex_matrix))

In [None]:
modules[:10]

## 2. Prune modules for targets with cis regulatory footprints (aka RcisTarget)

In [41]:
# Calculate a list of enriched motifs and the corresponding target genes for all modules.
with ProgressBar():
    df = prune2df(dbs, modules, MOTIF_ANNOTATIONS_FNAME)

# Create regulons from this table of enriched motifs.
regulons = df2regulons(df)

# Save the enriched motifs and the discovered regulons to disk.
df.to_csv(MOTIFS_FNAME)
with open(REGULONS_FNAME, "wb") as f:
    pickle.dump(regulons, f)

NameError: name 'ProgressBar' is not defined

## 3. Cellular regulon enrichment matrix (aka AUCell)

In [None]:
auc_mtx = aucell(ex_matrix, regulons, num_workers=4)
# 这一步出图
# sns.clustermap(auc_mtx, figsize=(8,8))

In [None]:
auc_mtx.to_csv(AUC_FNAME)

## pySCENIC结果导入R

In [None]:
auc_mtx = pd.read_csv("/home/lmlu/scRNAseq/SCENIC/CTC/auc.tsv", index_col=0)

In [6]:
with open(REGULONS_FNAME,"rb") as f:
    regulons=pickle.load(f)

In [77]:
regulons[1]

Regulon(name='AR(+)', gene2weight=<frozendict {'SGK1': 2.345140543709752, 'ZFP36L1': 2.804323331293004, 'AR': 1.0, 'ZNF805': 2.5315971221112394, 'SULF1': 3.8658290772477955, 'RNF125': 1.8089783963877972, 'RPP30': 3.2753758800241735}>, gene2occurrence=<frozendict {}>, transcription_factor='AR', context=frozenset({'transfac_pro__M00447.png', 'activating'}), score=4.117355491320991, nes=0.0, orthologous_identity=0.0, similarity_qvalue=0.0, annotation='')

In [79]:
regulon_df = pd.DataFrame(columns = ["regulon_name", "transcription_factor", "genes",
"weights", "score", "context"])
for i in range(len(regulons)):
    regulon = regulons[i]
    regulon_dict = dict({"regulon_name": regulon.name,
    "transcription_factor": regulon.transcription_factor,
    "genes": list(regulon.genes),
    "weights": list(regulon.weights),
    "score": regulon.score,
    "nes": regulon.nes,
    "gene2occurrence": regulon.gene2occurrence,                    
    "context": list(regulon.context)})
    regulon_df = regulon_df.append(regulon_dict, ignore_index = True)

regulon_df.genes = regulon_df.genes.apply(lambda x: ", ".join(x))
regulon_df.weights = regulon_df.weights.apply(lambda x: str(x))
regulon_df.weights = regulon_df.weights.apply(lambda x: x.replace('[', ''))
regulon_df.context = regulon_df.context.apply(lambda x: ", ".join(x))

regulon_df.to_csv("/Users/apple/Desktop/work/data/SCENIC/NB/regulons.csv", index = False)

In [None]:
from pyscenic.export import export2loom
export2loom(ex_mtx = ex_matrix, auc_mtx = auc_mtx, regulons = [r.rename(r.name.replace('(+)',' ('+str(len(r))+'g)')) for r in regulons], 
            out_fname = "/home/lmlu/scRNAseq/SCENIC/CTC/CTC.loom")
# 这一句话运行完毕后，会在指定目录下生成xxx.loom文件，这就是导入R所需要的文件

## OPTIONAL STEP 5 - Regulon activity binarization

In [1]:
BIN_MTX_FNAME = "/home/lmlu/scRNAseq/SCENIC/CTC/CTC.bin.csv"
THR_FNAME = "/home/lmlu/scRNAseq/SCENIC/CTC/CTC.thresholds.csv"

In [None]:
bin_mtx, thresholds = binarize(auc_mtx) 
bin_mtx.to_csv(BIN_MTX_FNAME) 
thresholds.to_frame().rename(columns={0:'threshold'}).to_csv(THR_FNAME)

In [None]:
bin_mtx = pd.read_csv(BIN_MTX_FNAME, index_col=0)
thresholds = pd.read_csv(THR_FNAME, index_col=0).threshold

### Create heatmap with binarized regulon activity.

In [None]:
def palplot(pal, names, colors=None, size=1):
    n = len(pal)
    f, ax = plt.subplots(1, 1, figsize=(n * size, size))
    ax.imshow(np.arange(n).reshape(1, n),
              cmap=mpl.colors.ListedColormap(list(pal)),
              interpolation="nearest", aspect="auto")
    ax.set_xticks(np.arange(n) - .5)
    ax.set_yticks([-.5, .5])
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    colors = n * ['k'] if colors is None else colors
    for idx, (name, color) in enumerate(zip(names, colors)):
        ax.text(0.0+idx, 0.0, name, color=color, horizontalalignment='center', verticalalignment='center')
    return f

In [None]:
sns.set()
sns.set_style("whitegrid")
fig = palplot(bw_palette, ['OFF', 'ON'], ['k', 'w'])
savesvg('legend - GSE115978 - on_off.svg', fig)