In [25]:
import numpy as np
import pandas as pd
import muon as mu
import celloracle as co
import os
import argparse

In [33]:
path_mdata = "/cellar/users/aklie/data/datasets/neurips2021_small/annotation/2023_12_05/mdata.h5mu"
path_p2g = "/cellar/users/aklie/data/datasets/neurips2021_small/analysis/celloracle/2024_05_01/processed_peak_file.csv"
path_tfb = "/cellar/users/aklie/data/datasets/neurips2021_small/analysis/celloracle/2024_05_01/celloracle.tfb.csv"
alpha = 10
pthr = 1
top_n = None
path_out = "/cellar/users/aklie/data/datasets/neurips2021_small/analysis/celloracle/2024_05_01/grn.csv"

In [None]:
# Process base GRN
p2g = pd.read_csv(path_p2g)
tfb = pd.read_csv(path_tfb)
if (p2g.shape[0] == 0) or (tfb.shape[0] == 0):
    grn = pd.DataFrame(columns=['source', 'target', 'score', 'pval'])
    grn.to_csv(path_out, index=False)
    exit()
tfb['score'] = 1
p2g = p2g[['cre', 'gene']]
base_grn = pd.merge(
    p2g,
    tfb
    .pivot(index='cre', columns='tf')
    .fillna(0)
    .droplevel(0, axis=1)
    .reset_index()
)
base_grn = base_grn.rename(columns={'cre': 'peak_id', 'gene': 'gene_short_name'})
base_grn['peak_id'] = base_grn['peak_id'].str.replace('-', '_')

In [None]:
# Init oracle object
oracle = co.Oracle()

In [None]:
oracle.adata = mu.read(path_mdata)['rna'].copy()
oracle

Oracle object

Meta data
    celloracle version used for instantiation: 0.16.0
    n_cells: 123
    n_genes: 512
    cluster_name: None
    dimensional_reduction_name: None
    n_target_genes_in_TFdict: 0 genes
    n_regulatory_in_TFdict: 0 genes
    n_regulatory_in_both_TFdict_and_scRNA-seq: 0 genes
    n_target_genes_both_TFdict_and_scRNA-seq: 0 genes
    k_for_knn_imputation: NA
Status
    Gene expression matrix: Ready
    BaseGRN: Not imported
    PCA calculation: Not finished
    Knn imputation: Not finished
    GRN calculation for simulation: Not finished

In [None]:
oracle.adata.obsm['X_umap'] = np.zeros((oracle.adata.shape[0], 2))
oracle.embedding_name = 'X_umap'
oracle

Oracle object

Meta data
    celloracle version used for instantiation: 0.16.0
    n_cells: 123
    n_genes: 512
    cluster_name: None
    dimensional_reduction_name: X_umap
    n_target_genes_in_TFdict: 0 genes
    n_regulatory_in_TFdict: 0 genes
    n_regulatory_in_both_TFdict_and_scRNA-seq: 0 genes
    n_target_genes_both_TFdict_and_scRNA-seq: 0 genes
    k_for_knn_imputation: NA
Status
    Gene expression matrix: Ready
    BaseGRN: Not imported
    PCA calculation: Not finished
    Knn imputation: Not finished
    GRN calculation for simulation: Not finished

In [None]:
oracle.adata.layers['imputed_count'] = oracle.adata.X

In [None]:
oracle.adata.obs['cluster'] = 'celltype'
oracle.cluster_column_name = 'cluster'
oracle

Oracle object

Meta data
    celloracle version used for instantiation: 0.16.0
    n_cells: 123
    n_genes: 512
    cluster_name: cluster
    dimensional_reduction_name: X_umap
    n_target_genes_in_TFdict: 0 genes
    n_regulatory_in_TFdict: 0 genes
    n_regulatory_in_both_TFdict_and_scRNA-seq: 0 genes
    n_target_genes_both_TFdict_and_scRNA-seq: 0 genes
    k_for_knn_imputation: NA
Status
    Gene expression matrix: Ready
    BaseGRN: Not imported
    PCA calculation: Not finished
    Knn imputation: Not finished
    GRN calculation for simulation: Not finished

In [None]:
oracle.pcs = np.zeros((oracle.adata.shape[0], 2))
oracle.knn = True
oracle.k_knn_imputation = True
oracle

Total number of TF was 25. Although we can go to the GRN calculation with this data, but the TF number is small.


In [None]:
oracle.import_TF_data(TF_info_matrix=base_grn)
oracle

TF dict already exists. The old TF dict data was deleted. 

Total number of TF was 25. Although we can go to the GRN calculation with this data, but the TF number is small.


Oracle object

Meta data
    celloracle version used for instantiation: 0.16.0
    n_cells: 123
    n_genes: 512
    cluster_name: cluster
    dimensional_reduction_name: X_umap
    n_target_genes_in_TFdict: 58 genes
    n_regulatory_in_TFdict: 25 genes
    n_regulatory_in_both_TFdict_and_scRNA-seq: 25 genes
    n_target_genes_both_TFdict_and_scRNA-seq: 58 genes
    k_for_knn_imputation: True
Status
    Gene expression matrix: Ready
    BaseGRN: Ready
    PCA calculation: Done
    Knn imputation: Done
    GRN calculation for simulation: Not finished

In [None]:
# Model TF ~ G
print('Modeling GRN...')
links = oracle.get_links(
    cluster_name_for_GRN_unit="cluster",
    alpha=alpha,
    n_jobs=os.cpu_count(),
)
print('Modeling Done!')

Modeling GRN...


  0%|          | 0/1 [00:00<?, ?it/s]

Modeling Done!


In [34]:
if top_n is None:
    top_n = links.shape[0]

AttributeError: 'Links' object has no attribute 'shape'

In [35]:
links.

<celloracle.network_analysis.links_object.Links at 0x1554ebfd1330>

In [36]:
print('Filtering links...')
links.filter_links(
    p=pthr,
    weight="coef_abs",
    threshold_number=top_n,
)
print('Filtering done!')

Filtering links...
Filtering done!


In [39]:
# Extract grn
grn = links.filtered_links['celltype'].dropna()[['source', 'target', 'coef_mean', 'p']]
grn = grn.rename(columns={'coef_mean': 'score', 'p': 'pval'})

In [40]:
# Write
grn.to_csv(path_out, index=False)

# Write command to run

```bash
python /cellar/users/aklie/opt/gene_program_evaluation/src/inference/grn_models/celloracle/workflow/scripts/mdl.py \
-m /cellar/users/aklie/data/datasets/neurips2021_small/analysis/celloracle/2024_05_01/celloracle.pre.h5mu \
-g /cellar/users/aklie/data/datasets/neurips2021_small/analysis/celloracle/2024_05_01/processed_peak_file.csv \
-t /cellar/users/aklie/data/datasets/neurips2021_small/analysis/celloracle/2024_05_01/celloracle.tfb.csv \
-a 10 \
-p 1 \
-n 10000 \
-o /cellar/users/aklie/data/datasets/neurips2021_small/analysis/celloracle/2024_05_01/grn.csv
```

# DONE!

---