# CellOracle GRN

In [6]:
import pandas as pd
import muon as mu
import scanpy as sc
import celloracle as co

In [7]:
# Load args
path_data = "/cellar/users/aklie/data/datasets/paul15/annotated/Paul_etal_15.h5mu"
path_r2g = None
path_tf2r = None
use_base_grn = True
cluster_key = "louvain_annot"
layer = "scaled_imputed_counts"
alpha = 10
bagging_number = 20
path_out = "/cellar/users/aklie/data/datasets/paul15/analysis/celloracle/grn.csv"

In [8]:
# Process base GRN
if use_base_grn:
    base_grn = co.data.load_mouse_scATAC_atlas_base_GRN()
else:
    r2g = pd.read_csv(path_r2g)
    tfb = pd.read_csv(path_tf2r)
    if (r2g.shape[0] == 0) or (tfb.shape[0] == 0):
        grn = pd.DataFrame(columns=['source', 'target', 'score', 'pval'])
        grn.to_csv(path_out, index=False)
        exit()
    tfb['score'] = 1
    r2g = r2g[['cre', 'gene']]
    base_grn = pd.merge(
        r2g,
        tfb
        .pivot(index='cre', columns='tf')
        .fillna(0)
        .droplevel(0, axis=1)
        .reset_index()
    )
    base_grn = base_grn.rename(columns={'cre': 'peak_id', 'gene': 'gene_short_name'})
    base_grn['peak_id'] = base_grn['peak_id'].str.replace('-', '_')

In [9]:
# Init object
mdata = mu.read(path_data)
adata = mdata.mod["rna"].copy()
adata.obs[cluster_key] = mdata.obs[cluster_key].copy()
if layer in adata.layers:
    print(f"Using data in layer {layer} for regression.")
    adata.X = adata.layers[layer].copy()
else:
    print(f"Could not find layer {layer}. Using log normalized counts for regression.")
    adata.X = adata.layers["counts"].copy()
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)

Using data in layer scaled_imputed_counts for regression.


In [10]:
# Select only MEP_0 cluster
adata = adata[adata.obs[cluster_key] == "MEP_0"].copy()
adata.obs[]

AnnData object with n_obs × n_vars = 165 × 1999
    obs: 'paul15_clusters', 'n_counts_all', 'n_counts', 'louvain', 'cell_type', 'louvain_annot', 'Lineage_MEP', 'Lineage_GMP', 'Pseudotime_Lineage_MEP', 'Pseudotime_Lineage_GMP', 'Pseudotime'
    var: 'n_counts', 'symbol', 'isin_top1000_var_mean_genes', 'isin_TFdict_targets', 'isin_TFdict_regulators'
    uns: 'cell_type_colors', 'diffmap_evals', 'draw_graph', 'iroot', 'log1p', 'louvain', 'louvain_annot_colors', 'louvain_colors', 'louvain_sizes', 'neighbors', 'paga', 'paul15_clusters_colors', 'pca'
    obsm: 'X_diffmap', 'X_draw_graph_fa', 'X_pca'
    varm: 'PCs'
    layers: 'imputed_count', 'normalized_count', 'raw_count', 'scaled_imputed_counts'
    obsp: 'connectivities', 'distances'

In [19]:
# Model TF ~ G for every cluster
cluster_grns = {}
for cluster in adata.obs[cluster_key].cat.categories:
    print(f"Building GRN for {cluster}")
    adata_sub = adata[adata.obs[cluster_key] == cluster].copy()
    net = co.Net(
        gene_expression_matrix=adata_sub.to_df(), # Input gene expression matrix as data frame
        TFinfo_matrix=base_grn, # Input base GRN
        verbose=True
    )
    net.fit_All_genes(
        bagging_number=bagging_number,
        alpha=alpha,
        scaling=False,
        verbose=True
    )
    net.updateLinkList(verbose=True)
    inference_result = net.linkList.copy()
    cluster_grns[cluster] = inference_result
    print(f"Finished building GRN for {cluster}")

Building GRN for Ery_0
initiating Net object ...
gem_shape: (101, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Ery_0
Building GRN for Ery_1
initiating Net object ...
gem_shape: (110, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Ery_1
Building GRN for Ery_2
initiating Net object ...
gem_shape: (98, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Ery_2
Building GRN for Ery_3
initiating Net object ...
gem_shape: (103, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Ery_3
Building GRN for Ery_4
initiating Net object ...
gem_shape: (118, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Ery_4
Building GRN for Ery_5
initiating Net object ...
gem_shape: (119, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Ery_5
Building GRN for Ery_6
initiating Net object ...
gem_shape: (68, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Ery_6
Building GRN for Ery_7
initiating Net object ...
gem_shape: (191, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Ery_7
Building GRN for Ery_8
initiating Net object ...
gem_shape: (108, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Ery_8
Building GRN for Ery_9
initiating Net object ...
gem_shape: (77, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Ery_9
Building GRN for GMP_0
initiating Net object ...
gem_shape: (109, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for GMP_0
Building GRN for GMP_1
initiating Net object ...
gem_shape: (209, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for GMP_1
Building GRN for GMPl_0
initiating Net object ...
gem_shape: (246, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for GMPl_0
Building GRN for Gran_0
initiating Net object ...
gem_shape: (143, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Gran_0
Building GRN for Gran_1
initiating Net object ...
gem_shape: (104, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Gran_1
Building GRN for Gran_2
initiating Net object ...
gem_shape: (165, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Gran_2
Building GRN for MEP_0
initiating Net object ...
gem_shape: (157, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for MEP_0
Building GRN for Mk_0
initiating Net object ...
gem_shape: (93, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Mk_0
Building GRN for Mo_0
initiating Net object ...
gem_shape: (146, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Mo_0
Building GRN for Mo_1
initiating Net object ...
gem_shape: (206, 1999)
TF info shape: (91976, 1095)
initiation completed.


  0%|          | 0/1848 [00:00<?, ?it/s]

  0%|          | 0/1846 [00:00<?, ?it/s]

Finished building GRN for Mo_1


In [20]:
# Extract grn
grn = pd.concat([v.assign(cluster=k) for k, v in cluster_grns.items()])
grn = grn.dropna()[['source', 'target', 'coef_mean', 'p', 'cluster']]
grn = grn.rename(columns={'coef_mean': 'score', 'p': 'pval'})
grn = grn.rename(columns={'source': 'tf', 'target': 'gene'})
grn = grn.sort_values(['tf', 'score'], ascending=[True, False])

In [23]:
# Write
grn.to_csv(path_out, index=False)

# DONE!

---