# BS1140 SCENIC+

In [None]:
import warnings
warnings.simplefilter(action = 'ignore', category=FutureWarning)
import sys
import os
import imp
import dill
import scanpy as sc
import pandas
import pyranges
# set stderr to null to avoid strange messages from ray
import sys
_stderr = sys.stderr
null = open(os.devnull, 'wb')
work_dir = 'BS1140'

In [None]:
sys.version

'3.9.13 | packaged by conda-forge | (main, May 27 2022, 17:01:00) \n[Clang 13.0.1 ]'

# Inferring enhancer-driven GRNs using SCENIC+

In [None]:
adata = sc.read_h5ad(os.path.join(work_dir, 'scRNA/adata_archr.h5ad')) # preprocessed scRNA result
cistopic_obj = dill.load(open(os.path.join(work_dir, 'scATAC/cistopic_obj.pkl'), 'rb')) # preprocessed scATAC results
menr = dill.load(open(os.path.join(work_dir, 'motifs_euler/menr.pkl'), 'rb')) # preprocessed scATAC results

  import imp


In [None]:
# create a SCENIC+ object to store gene expression, chromatin accessibility, motif enrichment results and cell/region/gene metadata
from scenicplus.scenicplus_class import create_SCENICPLUS_object
import numpy as np
scplus_obj = create_SCENICPLUS_object(
    GEX_anndata=adata.raw.to_adata(),
    cisTopic_obj=cistopic_obj,
    menr = menr,
    bc_transform_func=lambda x: f'{x}-BS1140' # function to convert scATAC barcodes to scRNA ones
)
scplus_obj.X_EXP = np.array(scplus_obj.X_EXP.todense())
scplus_obj

2022-11-01 10:53:02,389 cisTopic     INFO     Imputing drop-outs
2022-11-01 10:53:03,876 cisTopic     INFO     Scaling
2022-11-01 10:53:04,858 cisTopic     INFO     Keep non zero rows
2022-11-01 10:53:05,622 cisTopic     INFO     Imputed accessibility sparsity: 0.31028790911462056
2022-11-01 10:53:05,622 cisTopic     INFO     Create CistopicImputedFeatures object
2022-11-01 10:53:05,623 cisTopic     INFO     Done!


SCENIC+ object with n_cells x n_genes = 2546 x 19979 and n_cells x n_regions = 2546 x 154223
	metadata_regions:'Chromosome', 'Start', 'End', 'Width', 'cisTopic_nr_frag', 'cisTopic_log_nr_frag', 'cisTopic_nr_acc', 'cisTopic_log_nr_acc'
	metadata_genes:'gene_ids', 'feature_types', 'genome', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
	metadata_cell:'GEX_n_genes', 'GEX_doublet_score', 'GEX_predicted_doublet', 'GEX_n_genes_by_counts', 'GEX_total_counts', 'GEX_total_counts_mt', 'GEX_pct_counts_mt', 'GEX_louvain_res_1.0', 'GEX_celltype', 'GEX_cluster_archr', 'ACC_Total_nr_frag', 'ACC_cisTopic_log_nr_frag', 'ACC_barcode', 'ACC_Total_nr_frag_in_regions', 'ACC_Log_total_nr_frag', 'ACC_Log_unique_nr_frag', 'ACC_cisTopic_nr_frag', 'ACC_Dupl_rate', 'ACC_Dupl_nr_frag', 'ACC_cisTopic_log_nr_acc', 'ACC_FRIP', 'ACC_TSS_enrichment', 'ACC_cisTopic_nr_acc', 'ACC_Unique_nr_frag', 'ACC_Unique_nr_

In [None]:
# select a optimal biomart host
ensembl_version_dict = {'105': 'http://www.ensembl.org',
                        '104': 'http://may2021.archive.ensembl.org/',
                        '103': 'http://feb2021.archive.ensembl.org/',
                        '102': 'http://nov2020.archive.ensembl.org/',
                        '101': 'http://aug2020.archive.ensembl.org/',
                        '100': 'http://apr2020.archive.ensembl.org/',
                        '99': 'http://jan2020.archive.ensembl.org/',
                        '98': 'http://sep2019.archive.ensembl.org/',
                        '97': 'http://jul2019.archive.ensembl.org/',
                        '96': 'http://apr2019.archive.ensembl.org/',
                        '95': 'http://jan2019.archive.ensembl.org/',
                        '94': 'http://oct2018.archive.ensembl.org/',
                        '93': 'http://jul2018.archive.ensembl.org/',
                        '92': 'http://apr2018.archive.ensembl.org/',
                        '91': 'http://dec2017.archive.ensembl.org/',
                        '90': 'http://aug2017.archive.ensembl.org/',
                        '89': 'http://may2017.archive.ensembl.org/',
                        '88': 'http://mar2017.archive.ensembl.org/',
                        '87': 'http://dec2016.archive.ensembl.org/',
                        '86': 'http://oct2016.archive.ensembl.org/',
                        '80': 'http://may2015.archive.ensembl.org/',
                        '77': 'http://oct2014.archive.ensembl.org/',
                        '75': 'http://feb2014.archive.ensembl.org/',
                        '54': 'http://may2009.archive.ensembl.org/'}

import pybiomart as pbm
def test_ensembl_host(scplus_obj, host, species):
    dataset = pbm.Dataset(name=species+'_gene_ensembl',  host=host)
    annot = dataset.query(attributes=['chromosome_name', 'transcription_start_site', 'strand', 'external_gene_name', 'transcript_biotype'])
    annot.columns = ['Chromosome', 'Start', 'Strand', 'Gene', 'Transcript_type']
    annot['Chromosome'] = annot['Chromosome'].astype('str')
    filter = annot['Chromosome'].str.contains('CHR|GL|JH|MT')
    annot = annot[~filter]
    annot.columns=['Chromosome', 'Start', 'Strand', 'Gene', 'Transcript_type']
    gene_names_release = set(annot['Gene'].tolist())
    ov=len([x for x in scplus_obj.gene_names if x in gene_names_release])
    print('Genes recovered: ' + str(ov) + ' out of ' + str(len(scplus_obj.gene_names)))
    return ov

n_overlap = {}
for version in ensembl_version_dict.keys():
    print(f'host: {version}')
    try:
        n_overlap[version] =  test_ensembl_host(scplus_obj, ensembl_version_dict[version], 'hsapiens')
    except:
        print('Host not reachable')
v = sorted(n_overlap.items(), key=lambda item: item[1], reverse=True)[0][0]
print(f"version: {v} has the largest overlap, use {ensembl_version_dict[v]} as biomart host")

host: 105
Genes recovered: 15623 out of 19979
host: 104
Genes recovered: 15735 out of 19979
host: 103
Genes recovered: 19516 out of 19979
host: 102
Genes recovered: 19556 out of 19979
host: 101
Genes recovered: 19623 out of 19979
host: 100
Genes recovered: 19725 out of 19979
host: 99
Genes recovered: 19760 out of 19979
host: 98
Genes recovered: 19957 out of 19979
host: 97
Genes recovered: 19753 out of 19979
host: 96
Genes recovered: 19234 out of 19979
host: 95
Genes recovered: 19117 out of 19979
host: 94
Genes recovered: 19059 out of 19979
host: 93
Genes recovered: 18940 out of 19979
host: 92
Genes recovered: 18856 out of 19979
host: 91
Genes recovered: 18720 out of 19979
host: 90
Genes recovered: 18700 out of 19979
host: 89
Host not reachable
host: 88
Host not reachable
host: 87
Host not reachable
host: 86
Host not reachable
host: 80
Genes recovered: 15229 out of 19979
host: 77
Genes recovered: 15021 out of 19979
host: 75
Host not reachable
host: 54
Host not reachable
version: 98 has 

In [None]:
# set the biomart host
biomart_host = "http://sep2019.archive.ensembl.org/"

download list of known human TFs

In [None]:
!wget -O pbmc_tutorial/data/utoronto_human_tfs_v_1.01.txt  http://humantfs.ccbr.utoronto.ca/download/v_1.01/TF_names_v_1.01.txt

 download a the program bedToBigBed this will be used to generate files which can be uploaded to the UCSC genome browser

In [None]:
!wget -O pbmc_tutorial/bedToBigBed http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bedToBigBed
!chmod +x pbmc_tutorial/bedToBigBed

In [None]:
#only keep the first two columns of the PCA embedding in order to be able to visualize this in SCope
scplus_obj.dr_cell['GEX_X_pca'] = scplus_obj.dr_cell['GEX_X_pca'].iloc[:, 0:2]
#scplus_obj.dr_cell['GEX_rep'] = scplus_obj.dr_cell['GEX_rep'].iloc[:, 0:2]

In [None]:
# computation done on cluster
tmp_dir = '/Users/jinhuixin/tmp'
from scenicplus.wrappers.run_scenicplus import run_scenicplus
try:
    run_scenicplus(
        scplus_obj = scplus_obj,
        variable = ['GEX_celltype'],
        species = 'hsapiens',
        assembly = 'hg38',
        tf_file = '/Users/jinhuixin/Master/thesis/GRN/pbmc_tutorial/data/utoronto_human_tfs_v_1.01.txt',
        save_path = os.path.join(work_dir, 'scenicplus'),
        biomart_host = biomart_host,
        upstream = [1000, 150000],
        downstream = [1000, 150000],
        calculate_TF_eGRN_correlation = True,
        calculate_DEGs_DARs = True,
        export_to_loom_file = True,
        export_to_UCSC_file = True,
        path_bedToBigBed = '/Users/jinhuixin/Master/thesis/GRN/pbmc_tutorial',
        n_cpu = 7,
        _temp_dir = os.path.join(tmp_dir, 'ray_spill'))
except Exception as e:
    #in case of failure, still save the object
    dill.dump(scplus_obj, open(os.path.join(work_dir, 'scenicplus/scplus_obj.pkl'), 'wb'), protocol=-1)
    raise(e)

2022-11-01 13:23:37,372 SCENIC+_wrapper INFO     BS1140/scenicplus folder already exists.
2022-11-01 13:23:37,374 SCENIC+_wrapper INFO     Inferring region to gene relationships
2022-11-01 13:23:37,724 R2G          INFO     Calculating region to gene importances, using GBM method
2022-11-01 13:39:04,143 R2G          INFO     Took 926.4185910224915 seconds
2022-11-01 13:39:04,144 R2G          INFO     Calculating region to gene correlation, using SR method
2022-11-01 13:46:53,149 R2G          INFO     Took 469.00419187545776 seconds
2022-11-01 13:46:58,065 R2G          INFO     Done!
2022-11-01 13:46:58,221 SCENIC+_wrapper INFO     Inferring TF to gene relationships
2022-11-01 13:47:04,026 TF2G         INFO     Calculating TF to gene correlation, using GBM method


In [None]:
# load scenicplus object generated on euler
scplus_obj = dill.load(open(os.path.join(work_dir, 'scenicplus/scplus_obj.pkl'), 'rb'))

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [None]:
scplus_obj

SCENIC+ object with n_cells x n_genes = 2546 x 19979 and n_cells x n_regions = 2546 x 154223
	metadata_regions:'Chromosome', 'Start', 'End', 'Width', 'cisTopic_nr_frag', 'cisTopic_log_nr_frag', 'cisTopic_nr_acc', 'cisTopic_log_nr_acc'
	metadata_genes:'gene_ids', 'feature_types', 'genome', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
	metadata_cell:'GEX_n_genes', 'GEX_doublet_score', 'GEX_predicted_doublet', 'GEX_n_genes_by_counts', 'GEX_total_counts', 'GEX_total_counts_mt', 'GEX_pct_counts_mt', 'GEX_louvain_res_1.0', 'GEX_celltype', 'GEX_cluster_archr', 'ACC_Total_nr_frag', 'ACC_cisTopic_log_nr_frag', 'ACC_barcode', 'ACC_Total_nr_frag_in_regions', 'ACC_Log_total_nr_frag', 'ACC_Log_unique_nr_frag', 'ACC_cisTopic_nr_frag', 'ACC_Dupl_rate', 'ACC_Dupl_nr_frag', 'ACC_cisTopic_log_nr_acc', 'ACC_FRIP', 'ACC_TSS_enrichment', 'ACC_cisTopic_nr_acc', 'ACC_Unique_nr_frag', 'ACC_Unique_nr_

# Output of SCENIC+

## Gene expression and chromatin accessibility data

In [None]:
scplus_obj.to_df('EXP').head() # expression counts

Unnamed: 0,AL627309.1,AL627309.5,LINC01409,LINC01128,LINC00115,FAM41C,AL645608.6,SAMD11,NOC2L,KLHL17,...,MT-ND4,MT-ND5,MT-ND6,MT-CYB,MAFIP,AC011043.1,AL354822.1,AL592183.1,AC240274.1,AC007325.4
ATTCCTCCATAATCAC-1-BS1140,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.194697,1.194697,0.0,1.194697,0.0,0.0,0.0,0.0,0.0,0.0
TGAAACTGTTAGAGGG-1-BS1140,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,2.023663,0.0,2.64844,0.0,0.0,0.0,0.0,0.0,0.0
TTGACATCAGTTTGTG-1-BS1140,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.772823,2.140287,0.0,3.305294,0.0,0.0,0.0,0.0,0.0,0.0
TGCACTTGTTAGGTGC-1-BS1140,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.440718,0.0,0.0,0.0,0.0,0.0,0.0
CCCAAACCAGGCAAGC-1-BS1140,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.628536,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
scplus_obj.to_df('ACC').head() # accessibility

Unnamed: 0,ATTCCTCCATAATCAC-1-BS1140,TGAAACTGTTAGAGGG-1-BS1140,TTGACATCAGTTTGTG-1-BS1140,TGCACTTGTTAGGTGC-1-BS1140,CCCAAACCAGGCAAGC-1-BS1140,CACTGACCAAGTGTCC-1-BS1140,GATTTGCAGGTGTCCA-1-BS1140,TTAGCAGGTTTAGTCC-1-BS1140,AGGTAACCATCCCTCA-1-BS1140,GCGAAGCCAACTAGCC-1-BS1140,...,CGCCTCATCACTAAGC-1-BS1140,CAAATCATCTGCAACG-1-BS1140,CATAGCTAGGGTGAAC-1-BS1140,CTTCACTCAATGAGGT-1-BS1140,GTGCTGATCGCAAACT-1-BS1140,CGCATGATCCTCACTA-1-BS1140,GCTTTCATCATCAGTA-1-BS1140,AGCACTAGTACCGAAC-1-BS1140,GGGCAATAGGTAAGGC-1-BS1140,GCAAGTCGTGTTTCAC-1-BS1140
GL000194.1:114742-115242,2,3,2,2,2,3,2,2,2,2,...,3,2,2,2,3,2,2,2,3,2
GL000194.1:101154-101654,7,9,7,8,6,17,7,7,5,4,...,11,5,10,7,8,5,7,6,9,5
GL000195.1:92476-92976,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
GL000195.1:131013-131513,0,0,0,0,0,0,1,0,1,1,...,0,0,0,1,0,1,0,0,0,0
GL000195.1:30584-31084,16,13,17,15,18,12,16,15,21,21,...,13,20,13,20,15,22,17,17,15,20


## Cell, region and gene metadata

In [None]:
scplus_obj.metadata_cell.head() # cell metadata

Unnamed: 0,GEX_n_genes,GEX_doublet_score,GEX_predicted_doublet,GEX_n_genes_by_counts,GEX_total_counts,GEX_total_counts_mt,GEX_pct_counts_mt,GEX_louvain_res_1.0,GEX_celltype,GEX_cluster_archr,...,ACC_doublet_score,ACC_predicted_doublet,ACC_n_genes_by_counts,ACC_total_counts,ACC_total_counts_mt,ACC_pct_counts_mt,ACC_louvain_res_1.0,ACC_celltype,ACC_cluster_archr,ACC_sample_id
ATTCCTCCATAATCAC-1-BS1140,2069.0,0.03168,False,2068.0,4343.0,11.0,0.253281,M-IL7R+,M-IL7R+,C8-Memory-1,...,0.03168,False,2068.0,4343.0,11.0,0.253281,M-IL7R+,C8-Memory-1,C8-Memory-1,BS1140
TGAAACTGTTAGAGGG-1-BS1140,969.0,0.072093,False,967.0,1523.0,19.0,1.247538,Ex,Ex,C2-Exhausted-2,...,0.072093,False,967.0,1523.0,19.0,1.247538,Ex,C2-Exhausted-2,C2-Exhausted-2,BS1140
TTGACATCAGTTTGTG-1-BS1140,1413.0,0.222222,False,1409.0,2666.0,74.0,2.775694,M-IL7R+,M-IL7R+,C8-Memory-1,...,0.222222,False,1409.0,2666.0,74.0,2.775694,M-IL7R+,C8-Memory-1,C8-Memory-1,BS1140
TGCACTTGTTAGGTGC-1-BS1140,1634.0,0.081425,False,1631.0,3102.0,11.0,0.35461,M-IL7R+,M-IL7R+,C8-Memory-1,...,0.081425,False,1631.0,3102.0,11.0,0.35461,M-IL7R+,C8-Memory-1,C8-Memory-1,BS1140
CCCAAACCAGGCAAGC-1-BS1140,977.0,0.021739,False,976.0,1556.0,10.0,0.642674,M-CD69+,M-CD69+,C6-EM-2,...,0.021739,False,976.0,1556.0,10.0,0.642674,M-CD69+,C6-EM-2,C6-EM-2,BS1140


In [None]:
scplus_obj.metadata_genes.head()

Unnamed: 0,gene_ids,feature_types,genome,n_cells,mt,n_cells_by_counts,mean_counts,pct_dropout_by_counts,total_counts,highly_variable,means,dispersions,dispersions_norm
AL627309.1,ENSG00000238009,Gene Expression,GRCh38,3,False,3,0.001023,99.897716,3.0,False,0.006147,1.972477,1.508182
AL627309.5,ENSG00000241860,Gene Expression,GRCh38,13,False,13,0.004773,99.556768,14.0,False,0.013088,1.195926,-0.803162
LINC01409,ENSG00000237491,Gene Expression,GRCh38,198,False,197,0.077395,93.283328,227.0,True,0.190262,1.783858,0.946773
LINC01128,ENSG00000228794,Gene Expression,GRCh38,186,False,186,0.071258,93.65837,209.0,False,0.202416,1.536482,0.210478
LINC00115,ENSG00000225880,Gene Expression,GRCh38,21,False,21,0.00716,99.28401,21.0,True,0.026549,1.773981,0.917376


In [None]:
scplus_obj.metadata_regions.head()

Unnamed: 0,Chromosome,Start,End,Width,cisTopic_nr_frag,cisTopic_log_nr_frag,cisTopic_nr_acc,cisTopic_log_nr_acc
GL000194.1:114742-115242,GL000194.1,114742,115242,500,62,1.792392,62,1.792392
GL000194.1:101154-101654,GL000194.1,101154,101654,500,244,2.38739,203,2.307496
GL000195.1:92476-92976,GL000195.1,92476,92976,500,5,0.69897,5,0.69897
GL000195.1:131013-131513,GL000195.1,131013,131513,500,19,1.278754,18,1.255273
GL000195.1:30584-31084,GL000195.1,30584,31084,500,451,2.654177,400,2.60206


Motif enrichment data

In [None]:
scplus_obj.menr.keys()

dict_keys(['CTX_topics_otsu_All', 'CTX_topics_otsu_No_promoters', 'DEM_topics_otsu_All', 'DEM_topics_otsu_No_promoters', 'CTX_topics_top_3_All', 'CTX_topics_top_3_No_promoters', 'DEM_topics_top_3_All', 'DEM_topics_top_3_No_promoters', 'CTX_DARs_All', 'CTX_DARs_No_promoters', 'DEM_DARs_All', 'DEM_DARs_No_promoters'])

Dimensionality reduction data

In [None]:
scplus_obj.dr_cell.keys()

dict_keys(['GEX_X_pca', 'GEX_X_umap', 'eRegulons_UMAP', 'eRegulons_tSNE'])

some unstructured info

In [None]:
scplus_obj.uns.keys()

dict_keys(['Cistromes', 'search_space', 'region_to_gene', 'TF2G_adj', 'eRegulons', 'eRegulon_metadata', 'eRegulon_signatures', 'eRegulon_AUC', 'Pseudobulk', 'TF_cistrome_correlation', 'eRegulon_AUC_thresholds', 'RSS', 'DEGs', 'DARs'])

## The eRegulons entry

In [None]:
scplus_obj.uns['eRegulons'][0:5]

[eRegulon for TF ARID5B in context frozenset({'positive tf2g', 'Top 15 region-to-gene links per gene', 'positive r2g', 'Top 10 region-to-gene links per gene', 'Cistromes_Unfiltered', 'BASC binarized'}).
 	This eRegulon has 19 target regions and 15 target genes.,
 eRegulon for TF ATF2 in context frozenset({'positive tf2g', 'Top 15 region-to-gene links per gene', 'positive r2g', 'Top 10 region-to-gene links per gene', 'Cistromes_Unfiltered', 'BASC binarized', '0.85 quantile'}).
 	This eRegulon has 18 target regions and 13 target genes.,
 eRegulon for TF ATF3 in context frozenset({'positive tf2g', 'Top 15 region-to-gene links per gene', 'positive r2g', 'Top 10 region-to-gene links per gene', 'Cistromes_Unfiltered', 'BASC binarized', 'Top 5 region-to-gene links per gene', '0.85 quantile', '0.9 quantile', '0.95 quantile'}).
 	This eRegulon has 118 target regions and 51 target genes.,
 eRegulon for TF ATF6 in context frozenset({'positive tf2g', 'Cistromes_Unfiltered', 'BASC binarized', 'posi

In [None]:
for attr in dir(scplus_obj.uns['eRegulons'][0]):
    if not attr.startswith('_'):
        print(f"{attr}: {getattr(scplus_obj.uns['eRegulons'][0], attr) if not type(getattr(scplus_obj.uns['eRegulons'][0], attr)) == list else getattr(scplus_obj.uns['eRegulons'][0], attr)[0:5]}")

cistrome_name: ARID5B_(517r)
context: frozenset({'positive tf2g', 'Top 15 region-to-gene links per gene', 'positive r2g', 'Top 10 region-to-gene links per gene', 'Cistromes_Unfiltered', 'BASC binarized'})
gsea_adj_pval: None
gsea_enrichment_score: None
gsea_pval: None
in_leading_edge: None
is_extended: False
n_target_genes: 15
n_target_regions: 19
regions2genes: [r2g(region='chr5:157344232-157344732', target='ITK', importance=0.01694017203078059, rho=0.19743390222890106, importance_x_rho=0.003344564268465899, importance_x_abs_rho=0.003344564268465899), r2g(region='chr6:16473114-16473614', target='ATXN1', importance=0.036620395425374805, rho=0.33063230171542995, importance_x_rho=0.012107885629220874, importance_x_abs_rho=0.012107885629220874), r2g(region='chr2:203861321-203861821', target='ICOS', importance=0.030269561376561387, rho=0.3582964469743678, importance_x_rho=0.010845476292694499, importance_x_abs_rho=0.010845476292694499), r2g(region='chr2:203914728-203915228', target='ICOS',

In [None]:
scplus_obj.uns['eRegulon_metadata'].head()

Unnamed: 0,Region_signature_name,Gene_signature_name,TF,is_extended,Region,Gene,R2G_importance,R2G_rho,R2G_importance_x_rho,R2G_importance_x_abs_rho,TF2G_importance,TF2G_regulation,TF2G_rho,TF2G_importance_x_abs_rho,TF2G_importance_x_rho
0,ARID5B_+_+_(19r),ARID5B_+_+_(15g),ARID5B,False,chr5:157344232-157344732,ITK,0.01694,0.197434,0.003345,0.003345,9.205713,1,0.115597,1.064154,1.064154
1,ARID5B_+_+_(19r),ARID5B_+_+_(15g),ARID5B,False,chr6:16473114-16473614,ATXN1,0.03662,0.330632,0.012108,0.012108,1.737764,1,0.111201,0.193241,0.193241
2,ARID5B_+_+_(19r),ARID5B_+_+_(15g),ARID5B,False,chr6:16419719-16420219,ATXN1,0.015978,0.325449,0.0052,0.0052,1.737764,1,0.111201,0.193241,0.193241
3,ARID5B_+_+_(19r),ARID5B_+_+_(15g),ARID5B,False,chr6:16699292-16699792,ATXN1,0.004254,0.159969,0.00068,0.00068,1.737764,1,0.111201,0.193241,0.193241
4,ARID5B_+_+_(19r),ARID5B_+_+_(15g),ARID5B,False,chr2:203861321-203861821,ICOS,0.03027,0.358296,0.010845,0.010845,4.094361,1,0.099097,0.405741,0.405741


In [None]:
# export cell metadata
scplus_obj.uns['eRegulon_metadata'].to_csv('/Users/jinhuixin/Master/thesis/GRN/BS1140/eRegulon_metadata.csv')