In [None]:
import os
import glob
import pickle 
import pandas as pd
import numpy as np

In [None]:
from dask.diagnostics import ProgressBar 
from arboreto.utils import load_tf_names
from arboreto.algo import grnboost2
from pyscenic.rnkdb import FeatherRankingDatabase as RankingDatabase
from pyscenic.utils import modules_from_adjacencies, load_motifs 
from pyscenic.prune import prune2df, df2regulons
from pyscenic.aucell import aucell 

In [None]:
# Directory to hold results from pySCENIC
output_folder="pySCENIC/outs"
regulons_fname=os.path.join(output_folder, "regulons.p")
motifs_fname=os.path.join(output_folder, "motifs.csv")
# Directory to read in database
database_folder="scenic_database"
database_glob=os.path.join(database_folder, "hg38__*.feather")
motif_annotations_fname=os.path.join(database_folder, "motifs-v9-nr.hgnc-m0.001-o0.0.tbl")
hm_tfs_fname=os.path.join(database_folder, "hs_hgnc_curated_tfs.txt")
# Directory to read in inputs for pySCENIC, results from 08_exportScenic
resources_folder="output"
sc_expr_fname="".join([resources_folder, "/Competition_forScenic.csv"])

In [None]:
# Expression matrix 
ex_matrix=pd.read_csv(sc_expr_fname, header=0, index_col=0).T
ex_matrix.head()

In [None]:
ex_matrix.shape

In [None]:
# Transcription factors
tf_names=load_tf_names(hm_tfs_fname)

In [None]:
# Feather files 
db_fnames=glob.glob(database_glob)
def name(fname): 
    return os.path.basename(fname).split(".")[0]
dbs=[RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames]
dbs

In [None]:
# Run GRNBoost from arboreto to infer co-expression modules 
adjacencies=grnboost2(expression_data=ex_matrix, tf_names=tf_names, verbose=True)

In [None]:
adjacencies.to_csv("grnboost_out.csv", encoding="utf-8")

In [None]:
modules=list(modules_from_adjacencies(adjacencies, ex_matrix))

In [None]:
with open("modules.txt", "wb") as fp: 
    pickle.dump(modules, fp)

In [None]:
with open("modules.txt", "rb") as fp: 
    b=pickle.load(fp)