In [1]:
import os
import glob
import pickle
import pandas as pd
import numpy as np
import scanpy as sc
from dask.diagnostics import ProgressBar

from arboreto.utils import load_tf_names
from arboreto.algo import grnboost2

from ctxcore.rnkdb import FeatherRankingDatabase as RankingDatabase
from pyscenic.utils import modules_from_adjacencies, load_motifs
from pyscenic.prune import prune2df, df2regulons
from pyscenic.aucell import aucell

In [2]:
DATABASES_GLOB = os.path.join('../../data/feather_files/', "mm10_*.feather")
db_fnames = glob.glob(DATABASES_GLOB)
def name(fname):
    return os.path.splitext(os.path.basename(fname))[0]
dbs = [RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames]
dbs

[FeatherRankingDatabase(name="mm10_500bp_up_100bp_down_full_tx_v10_clust.genes_vs_motifs.rankings"),
 FeatherRankingDatabase(name="mm10_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings"),
 FeatherRankingDatabase(name="mm10_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.scores"),
 FeatherRankingDatabase(name="mm10_500bp_up_100bp_down_full_tx_v10_clust.genes_vs_motifs.scores")]

# Modules and motif enrichment for 6Ho

In [None]:
#Load the data
data = sc.read_h5ad('../../data/combined_data_5ht6hointersection_union_highly_var.h5ad')
ho6_data = data[data.obs['orig.ident'] == '6Ho']
ho6_basal = ho6_data[ho6_data.obs['cluster1'] == 'Mammary epithelial cells-Basal']
counts_df = pd.DataFrame(ho6_basal.X, index=ho6_basal.obs_names,columns=ho6_basal.var_names)
#Load the motif annotation file name
MOTIF_ANNOTATIONS_FNAME = "../../data/feather_files/motifs-v10nr_clust-nr.mgi-m0.001-o0.0.tbl"
#Load the tfs
tfs = load_tf_names('../../data/allTFs_mm.txt')
tfs = sorted(list((set(tfs).intersection(ho6_data.var_names))))

#Run loop to obtain regulons for files obtained from step 1
for i in range(0,20):
    #Obatain the adjacency matrix
    curr_file_name = f'../../results/results_step_1_grnboost2_6ho_basal/6ho_basal_adjacencies_run{i}_allTFs.csv'
    curr_adjacencies = pd.read_csv(curr_file_name)
    #Generate modules for 6ho basal 
    modules = list(modules_from_adjacencies(curr_adjacencies, counts_df))
    #get regulons
    with ProgressBar():
        curr_df = prune2df(dbs, modules, MOTIF_ANNOTATIONS_FNAME)
    curr_df.to_csv(f'../../results/results_step_2_strat_1_6ho_basal/motif_enrichment_6ho_basal_run{i}.csv')
    curr_regulons = df2regulons(curr_df)
    with open(f'../../results/results_step_2_strat_1_6ho_basal/6ho_basal_regulons_run{i}.p', "wb") as f:
        pickle.dump(curr_regulons, f)
    