In [2]:
# Cell 1: Setup and user-defined variables
import os

# Set these paths accordingly
data_dir = "/data/tmpA/andrem/linger/preprocessed_obj/AML12_DX_MO_scdart"            # Directory with RNA.txt, ATAC.txt, label.txt
datadir = "/data/benchmarks/andrem/linger"              # Directory with bulk GRN data (will look for data_bulk/)
outdir = "/data/tmpA/andrem/linger/out/AML12_DX_MO_scdart"            # Output directory
method = "LINGER"                        # or 'baseline'
genome = "hg38"
network = "cell population"
celltype = "all"
activation = "ReLU"
cuda_device = "0"

# Set CUDA device
os.environ["CUDA_VISIBLE_DEVICES"] = cuda_device
print(f"Using CUDA device(s): {cuda_device}")
os.makedirs(outdir, exist_ok=True)


Using CUDA device(s): 0


In [3]:
# Cell 2: Imports
import pandas as pd
from scipy.sparse import csc_matrix
import anndata
import scanpy as sc
#import muon as mu

from LingerGRN.preprocess import get_adata, preprocess
from LingerGRN.pseudo_bulk import pseudo_bulk
import LingerGRN.LINGER_tr as LINGER_tr
import LingerGRN.LL_net as LL_net
from LingerGRN.TF_activity import regulon, heatmap_cluster, master_regulator, box_comp


IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


In [14]:
# Cell 3: Load and merge input data
def clean_barcode_columns(columns):
    return columns.str.split('___').str[0].str.replace('-1$', '', regex=True)

print("Loading input data...")
label = pd.read_csv(os.path.join(data_dir, 'label.txt'), sep='\t')
RNA = pd.read_csv(os.path.join(data_dir, 'RNA.txt'), sep='\t', index_col=0)
ATAC = pd.read_csv(os.path.join(data_dir, 'ATAC.txt'), sep='\t', index_col=0)

#TODO meter isto correcto no preparate input => dps de verficar se isto é o correto
RNA.columns = clean_barcode_columns(RNA.columns)
ATAC.columns = clean_barcode_columns(ATAC.columns)
label = pd.read_csv(os.path.join(data_dir, 'label.txt'), sep='\t')
# Clean the "barcode" column and rename it to "barcode_use"
label['barcode_use'] = label['barcode'].str.split('___').str[0].str.replace('-1$', '', regex=True)
# Drop the original "index" column if it exists
label = label.drop(columns=['index'], errors='ignore')
# Reorder columns: "label", "barcode_use"
label = label[['label', 'barcode_use']]


matrix = csc_matrix(pd.concat([RNA, ATAC], axis=0).values)
features = pd.DataFrame(RNA.index.tolist() + ATAC.index.tolist(), columns=[1])
features[2] = ['Gene Expression'] * RNA.shape[0] + ['Peaks'] * ATAC.shape[0]
barcodes = pd.DataFrame(RNA.columns.values, columns=[0])


Loading input data...


In [16]:
# Cell 4: Create AnnData objects and filter
adata_RNA, adata_ATAC = get_adata(matrix, features, barcodes, label)

sc.pp.filter_cells(adata_RNA, min_genes=200)
sc.pp.filter_genes(adata_RNA, min_cells=3)
sc.pp.filter_cells(adata_ATAC, min_genes=200)
sc.pp.filter_genes(adata_ATAC, min_cells=3)

# Keep only intersected barcodes
selected = list(set(adata_RNA.obs['barcode']) & set(adata_ATAC.obs['barcode']))
adata_RNA = adata_RNA[[b in selected for b in adata_RNA.obs['barcode']]]
adata_ATAC = adata_ATAC[[b in selected for b in adata_ATAC.obs['barcode']]]

print(f"Filtered cells: {adata_RNA.n_obs} shared barcodes")


Trying to modify attribute `.obs` of view, initializing view as actual.
Trying to modify attribute `.obs` of view, initializing view as actual.
Trying to modify attribute `.obs` of view, initializing view as actual.
Trying to modify attribute `.obs` of view, initializing view as actual.


Filtered cells: 6074 shared barcodes


In [None]:
from LingerGRN.pseudo_bulk import *

samplelist = list(set(adata_ATAC.obs['sample'].values))  # unique sample IDs
singlepseudobulk = adata_RNA.obs['sample'].nunique() ** 2 > 100

TG_pseudobulk = pd.DataFrame([])
RE_pseudobulk = pd.DataFrame([])

for tempsample in samplelist:
    adata_RNAtemp = adata_RNA[adata_RNA.obs['sample'] == tempsample]
    adata_ATACtemp = adata_ATAC[adata_ATAC.obs['sample'] == tempsample]
    
    TG_pseudobulk_temp, RE_pseudobulk_temp = pseudo_bulk(adata_RNAtemp, adata_ATACtemp, singlepseudobulk)

    RE_pseudobulk_temp[RE_pseudobulk_temp > 100] = 100

    TG_pseudobulk = pd.concat([TG_pseudobulk, TG_pseudobulk_temp], axis=1)
    RE_pseudobulk = pd.concat([RE_pseudobulk, RE_pseudobulk_temp], axis=1)

# Save outputs
import os
if not os.path.exists('data/'):
    os.mkdir('data/')

#adata_ATAC.write('data/adata_ATAC.h5ad')
#adata_RNA.write('data/adata_RNA.h5ad')
pd.DataFrame(adata_ATAC.var['gene_ids']).to_csv(os.path.join(outdir, "Peaks.txt"), header=None, index=None)

TG_pseudobulk = TG_pseudobulk.fillna(0)
RE_pseudobulk = RE_pseudobulk.fillna(0)
TG_pseudobulk.to_csv('data/TG_pseudobulk.tsv', sep='\t')
RE_pseudobulk.to_csv('data/RE_pseudobulk.tsv', sep='\t')

print(f"✅ Pseudo-bulk completed: TG {TG_pseudobulk.shape}, RE {RE_pseudobulk.shape}")


Received a view of an AnnData. Making a copy.
Received a view of an AnnData. Making a copy.
Received a view of an AnnData. Making a copy.
Received a view of an AnnData. Making a copy.
Trying to modify attribute `.obs` of view, initializing view as actual.
Trying to modify attribute `.obs` of view, initializing view as actual.


✅ Pseudo-bulk completed: TG (17360, 229), RE (205003, 229)


In [33]:
from LingerGRN.preprocess import preprocess
import inspect

print(inspect.getfile(LINGER_tr.training))

/home/andrem/GRN-project/workflow/.snakemake/conda/76d78891a26d915bcb4ceb8a190f4007_/lib/python3.10/site-packages/LingerGRN/LINGER_tr.py


In [None]:
# Cell 6: Preprocess + train model
GRNdir = os.path.join(datadir, 'data_bulk/')
preprocess(TG_pseudobulk, RE_pseudobulk, GRNdir, genome, method, outdir)



Mapping gene expression...
Generate TF expression...
Generate RE chromatin accessibility...
Generate TF binding...


100%|██████████| 23/23 [12:14<00:00, 31.93s/it]


Generate Index...


100%|██████████| 12802/12802 [00:27<00:00, 466.77it/s]


TypeError: training() missing 1 required positional argument: 'species'

In [44]:

import importlib
importlib.reload(LINGER_tr)


<module 'LingerGRN.LINGER_tr' from '/home/andrem/GRN-project/workflow/.snakemake/conda/76d78891a26d915bcb4ceb8a190f4007_/lib/python3.10/site-packages/LingerGRN/LINGER_tr.py'>

In [45]:
LINGER_tr.training(GRNdir, method, outdir, activation, "hg38")

chr1


100%|██████████| 1318/1318 [42:47<00:00,  1.95s/it]


chr2


100%|██████████| 853/853 [26:10<00:00,  1.84s/it]


chr3


100%|██████████| 732/732 [22:44<00:00,  1.86s/it]


chr4


100%|██████████| 443/443 [12:42<00:00,  1.72s/it]


chr5


100%|██████████| 563/563 [16:45<00:00,  1.79s/it]


chr6


100%|██████████| 642/642 [20:40<00:00,  1.93s/it]


chr7


100%|██████████| 568/568 [17:30<00:00,  1.85s/it]


chr8


100%|██████████| 410/410 [12:37<00:00,  1.85s/it]


chr9


100%|██████████| 493/493 [16:42<00:00,  2.03s/it]


chr10


100%|██████████| 474/474 [14:29<00:00,  1.84s/it]


chr11


100%|██████████| 746/746 [25:32<00:00,  2.05s/it]


chr12


100%|██████████| 703/703 [23:15<00:00,  1.99s/it]


chr13


100%|██████████| 213/213 [06:20<00:00,  1.79s/it]


chr14


100%|██████████| 418/418 [13:21<00:00,  1.92s/it]


chr15


100%|██████████| 403/403 [12:46<00:00,  1.90s/it]


chr16


100%|██████████| 623/623 [22:34<00:00,  2.17s/it]


chr17


100%|██████████| 776/776 [29:17<00:00,  2.27s/it]


chr18


100%|██████████| 172/172 [04:55<00:00,  1.72s/it]


chr19


100%|██████████| 1046/1046 [39:51<00:00,  2.29s/it]


chr20


100%|██████████| 349/349 [11:44<00:00,  2.02s/it]


chr21


100%|██████████| 117/117 [03:46<00:00,  1.94s/it]


chr22


100%|██████████| 319/319 [11:32<00:00,  2.17s/it]


chrX


100%|██████████| 421/421 [11:26<00:00,  1.63s/it]


In [58]:
import importlib
importlib.reload(LingerGRN.LL_net)


<module 'LingerGRN.LL_net' from '/home/andrem/GRN-project/workflow/.snakemake/conda/76d78891a26d915bcb4ceb8a190f4007_/lib/python3.10/site-packages/LingerGRN/LL_net.py'>

In [None]:
# Cell 7: Build Cell Population GRN
LL_net.TF_RE_binding(GRNdir, adata_RNA, adata_ATAC, genome, method, outdir)



Generating cellular population TF binding strength ...


  0%|          | 0/23 [00:00<?, ?it/s]

Generating cellular population TF binding strength for chr1


100%|██████████| 26/26 [09:26<00:00, 21.77s/it]
  4%|▍         | 1/23 [09:45<3:34:31, 585.08s/it]

Generating cellular population TF binding strength for chr2


100%|██████████| 17/17 [04:16<00:00, 15.11s/it]
  9%|▊         | 2/23 [14:14<2:19:45, 399.33s/it]

Generating cellular population TF binding strength for chr3


100%|██████████| 14/14 [03:43<00:00, 15.94s/it]
 13%|█▎        | 3/23 [18:19<1:49:40, 329.03s/it]

Generating cellular population TF binding strength for chr4


100%|██████████| 8/8 [01:27<00:00, 10.90s/it]
 17%|█▋        | 4/23 [19:59<1:15:30, 238.44s/it]

Generating cellular population TF binding strength for chr5


100%|██████████| 11/11 [02:36<00:00, 14.22s/it]
 22%|██▏       | 5/23 [22:45<1:03:44, 212.48s/it]

Generating cellular population TF binding strength for chr6


100%|██████████| 12/12 [04:03<00:00, 20.26s/it]
 26%|██▌       | 6/23 [27:10<1:05:16, 230.39s/it]

Generating cellular population TF binding strength for chr7


100%|██████████| 11/11 [02:51<00:00, 15.61s/it]
 30%|███       | 7/23 [30:11<57:05, 214.12s/it]  

Generating cellular population TF binding strength for chr8


100%|██████████| 8/8 [02:03<00:00, 15.42s/it]
 35%|███▍      | 8/23 [32:23<46:59, 187.95s/it]

Generating cellular population TF binding strength for chr9


100%|██████████| 9/9 [03:02<00:00, 20.31s/it]
 39%|███▉      | 9/23 [36:31<48:14, 206.75s/it]

Generating cellular population TF binding strength for chr10


100%|██████████| 9/9 [02:13<00:00, 14.88s/it]
 43%|████▎     | 10/23 [38:59<40:52, 188.62s/it]

Generating cellular population TF binding strength for chr11


100%|██████████| 14/14 [06:30<00:00, 27.93s/it]
 48%|████▊     | 11/23 [45:48<51:11, 256.00s/it]

Generating cellular population TF binding strength for chr12


100%|██████████| 14/14 [05:02<00:00, 21.63s/it]
 52%|█████▏    | 12/23 [50:58<49:58, 272.57s/it]

Generating cellular population TF binding strength for chr13


100%|██████████| 4/4 [00:47<00:00, 11.87s/it]
 57%|█████▋    | 13/23 [51:57<34:37, 207.71s/it]

Generating cellular population TF binding strength for chr14


100%|██████████| 8/8 [02:21<00:00, 17.73s/it]
 61%|██████    | 14/23 [54:35<28:53, 192.62s/it]

Generating cellular population TF binding strength for chr15


100%|██████████| 8/8 [02:23<00:00, 17.94s/it]
 65%|██████▌   | 15/23 [57:04<23:56, 179.60s/it]

Generating cellular population TF binding strength for chr16


100%|██████████| 12/12 [06:00<00:00, 30.01s/it]
 70%|██████▉   | 16/23 [1:03:29<28:10, 241.48s/it]

Generating cellular population TF binding strength for chr17


100%|██████████| 15/15 [08:32<00:00, 34.15s/it]
 74%|███████▍  | 17/23 [1:12:41<33:28, 334.74s/it]

Generating cellular population TF binding strength for chr18


100%|██████████| 3/3 [00:30<00:00, 10.04s/it]
 78%|███████▊  | 18/23 [1:13:19<20:28, 245.72s/it]

Generating cellular population TF binding strength for chr19


100%|██████████| 20/20 [13:37<00:00, 40.89s/it]
 83%|████████▎ | 19/23 [1:27:16<28:12, 423.09s/it]

Generating cellular population TF binding strength for chr20


100%|██████████| 6/6 [02:05<00:00, 20.87s/it]
 87%|████████▋ | 20/23 [1:30:01<17:17, 345.74s/it]

Generating cellular population TF binding strength for chr21


100%|██████████| 2/2 [00:38<00:00, 19.26s/it]
 91%|█████████▏| 21/23 [1:30:48<08:32, 256.19s/it]

Generating cellular population TF binding strength for chr22


100%|██████████| 6/6 [03:08<00:00, 31.37s/it]
 96%|█████████▌| 22/23 [1:34:12<04:00, 240.44s/it]

Generating cellular population TF binding strength for chrX


100%|██████████| 8/8 [01:05<00:00,  8.21s/it]
100%|██████████| 23/23 [1:35:26<00:00, 248.97s/it]
  0%|          | 0/23 [00:17<?, ?it/s]


UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m. 
	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
	WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray.scalar was not an allowed global by default. Please use `torch.serialization.add_safe_globals([numpy.core.multiarray.scalar])` or the `torch.serialization.safe_globals([numpy.core.multiarray.scalar])` context manager to allowlist this global if you trust this class/function.

Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.

In [59]:
LL_net.cis_reg(GRNdir, adata_RNA, adata_ATAC, genome, method, outdir)


100%|██████████| 1318/1318 [00:01<00:00, 1189.20it/s]
100%|██████████| 853/853 [00:00<00:00, 1621.24it/s]
100%|██████████| 732/732 [00:00<00:00, 1509.12it/s]
100%|██████████| 443/443 [00:00<00:00, 1930.89it/s]
100%|██████████| 563/563 [00:00<00:00, 1874.48it/s]
100%|██████████| 642/642 [00:00<00:00, 1554.50it/s]
100%|██████████| 568/568 [00:00<00:00, 1928.83it/s]
100%|██████████| 410/410 [00:00<00:00, 1736.30it/s]
100%|██████████| 493/493 [00:00<00:00, 1157.01it/s]
100%|██████████| 474/474 [00:00<00:00, 1868.75it/s]
100%|██████████| 746/746 [00:00<00:00, 1414.50it/s]
100%|██████████| 703/703 [00:00<00:00, 1234.34it/s]
100%|██████████| 213/213 [00:00<00:00, 1595.60it/s]
100%|██████████| 418/418 [00:00<00:00, 1724.05it/s]
100%|██████████| 403/403 [00:00<00:00, 1269.07it/s]
100%|██████████| 623/623 [00:00<00:00, 1245.16it/s]
100%|██████████| 776/776 [00:00<00:00, 1058.98it/s]
100%|██████████| 172/172 [00:00<00:00, 2094.40it/s]
100%|██████████| 1046/1046 [00:01<00:00, 964.26it/s] 
100%|███

In [65]:
LL_net.trans_reg(GRNdir, method, outdir, genome)

Generate trans-regulatory netowrk ...


  0%|          | 0/23 [00:00<?, ?it/s]Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7fbdff983880>>
Traceback (most recent call last):
  File "/home/andrem/GRN-project/workflow/.snakemake/conda/76d78891a26d915bcb4ceb8a190f4007_/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 
100%|██████████| 1318/1318 [00:00<00:00, 1777.34it/s]
100%|██████████| 853/853 [00:00<00:00, 2087.70it/s]
100%|██████████| 732/732 [00:00<00:00, 1759.16it/s]
100%|██████████| 443/443 [00:00<00:00, 2007.39it/s]
 17%|█▋        | 4/23 [00:25<02:01,  6.40s/it]


KeyboardInterrupt: 

In [3]:
import os
import pandas as pd

# Define paths
#tfb_potential = os.path.join(outdir, 'cell_population_TF_RE_binding.txt')
#re_tg = os.path.join(outdir, 'cell_population_cis_regulatory.txt')
#tf_tg = os.path.join(outdir, 'cell_population_trans_regulatory.txt')

tfb_potential = outdir + 'cell_population_TF_RE_binding.txt'
re_tg = outdir + 'cell_population_cis_regulatory.txt'
tf_tg = outdir + 'cell_population_trans_regulatory.txt'

# 1. TF–RE binding matrix: rows = regions, columns = TFs
df_tf_re = pd.read_csv(tfb_potential, sep="\t", index_col=0)

# 2. cis-regulatory (region → gene with score): 3-column table
df_re_tg = pd.read_csv(re_tg, sep="\t", header=None, names=["region", "target_gene", "score"])

# 3. trans-regulatory matrix: rows = genes, columns = TFs
df_tf_tg = pd.read_csv(tf_tg, sep="\t", index_col=0)


In [4]:
# Filter to relevant genes only
df_tf_tg = df_tf_tg.loc[df_tf_tg.index.intersection(df_re_tg["target_gene"])]

# Build filtered long-format TF-gene DataFrame: keep TFs with value ≥ median
df_long = (
    df_tf_tg[df_tf_tg.ge(df_tf_tg.median(axis=1), axis=0)]
    .reset_index()
    .melt(id_vars="index", var_name="TF", value_name="trans_reg_score")
    .rename(columns={"index": "target_gene"})
    .dropna()
)

# Merge in region info
df_result = df_re_tg.merge(df_long, on="target_gene")[["region", "target_gene", "TF", "trans_reg_score"]]

In [6]:
df_result = df_re_tg.merge(df_long, on="target_gene")[["region", "target_gene", "TF", "score", "trans_reg_score"]]

: 

In [1]:
import mudata as mu

# Load the MuData object
mdata = mu.read_h5mu("/data/tmpA/andrem/linger/out/AML12_DX_MO_scdart/GRN_final_mudata.h5mu")

# View what's inside
print(mdata)

MuData object with n_obs × n_vars = 1 × 1
  uns:	'grn'
  1 modality
    dummy:	1 x 1


  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)


In [9]:
mdata.uns['grn']

Unnamed: 0,TF,Gene,Region,tf_tg_score,tg_re_score,tf_re_score
0,ATF6,AGL,chr1:100028729-100029229,0.000778,0.000039,0.991619
1,ATF6B,AGL,chr1:100028729-100029229,0.000353,0.000039,0.980047
2,CENPB,AGL,chr1:100028729-100029229,0.000352,0.000039,0.770478
3,GLIS3,AGL,chr1:100028729-100029229,0.000735,0.000039,0.741424
4,GZF1,AGL,chr1:100028729-100029229,0.000345,0.000039,0.755783
...,...,...,...,...,...,...
35643809,ZBTB16,WWC3,chrX:9995709-9996209,0.005868,0.000087,0.560788
35643810,CENPB,WWC3,chrX:9997469-9997969,0.005445,0.000037,0.239576
35643811,EWSR1,WWC3,chrX:9997469-9997969,0.005964,0.000037,-0.026696
35643812,TOPORS,WWC3,chrX:9997469-9997969,0.012320,0.000037,-0.197943


In [10]:
(mdata.uns['grn']['tf_re_score'] < 0 ).sum()

np.int64(68019)

In [4]:
import mudata as mu
import anndata as ad

# Load existing MuData object
mdata = mu.read("/data/tmpA/andrem/linger/GRN_final_mudata.h5mu")

# Paths to your RNA and ATAC .h5ad files
rna_path = "/data/tmpA/andrem/linger/out/AML12_DX_MO_scdart/tmp/linger_wrk/data/adata_RNA.h5ad"
atac_path = "/data/tmpA/andrem/linger/out/AML12_DX_MO_scdart/tmp/linger_wrk/data/adata_ATAC.h5ad"

# Load each modality
adata_rna = ad.read_h5ad(rna_path)
adata_atac = ad.read_h5ad(atac_path)

# Optional: ensure matching observation indices if needed
# adata_atac = adata_atac[adata_rna.obs_names]

# Create a new MuData object with both modalities
mdata_new = mu.MuData({'rna': adata_rna, 'atac': adata_atac})
# Add the GRN data to the MuData object
mdata_new.uns['grn'] = mdata.uns['grn']

# Save if desired
mdata_new.write("/data/tmpA/andrem/linger/out/AML12_DX_MO_scdart/mudata.h5mu")


  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)
  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)
  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)


In [24]:
import anndata as ad
meta = ad.read_h5ad("/data/benchmarks/andrem/input_rna/AML12_DX_MO_scbridge_filtered.h5ad")

In [1]:
import mudata as mu
md = mu.read("/data/tmpA/andrem/celloracle/out/AML12_REL_MO_scbridge_with_atac/mdata.h5mu")

  self._update_attr("var", axis=0, join_common=join_common)
  self._update_attr("obs", axis=1, join_common=join_common)


In [2]:
import pandas as pd
SCORE_COL = "Score"
TF2GENE_W_COL = "TF2Gene_W"
import numpy as np
def preprocess_celloracle(mudata):
    """
    Preprocess CellOracle MuData to extract GRN matrix.
    Assumes the GRN is stored in the 'links' attribute of the MuData object.
    """

    grn = pd.DataFrame(mudata.uns['celloracle_links'])

    # Clean the -logp column before normalization
    grn["-logp"] = grn["-logp"].replace([np.inf, -np.inf], np.nan)
    grn["-logp"] = grn["-logp"].fillna(0)

    # Optional: coerce -0.0 → 0.0 for cosmetic + numeric sanity
    grn["-logp"] = grn["-logp"].apply(lambda x: 0.0 if x == 0 else x)

    min_logp = grn["-logp"].min()
    max_logp = grn["-logp"].max()

    if max_logp != min_logp:
        grn[SCORE_COL] = (grn["-logp"] - min_logp) / (max_logp - min_logp)
    else:
        grn[SCORE_COL] = 0.0

    raw_score = grn["coef_mean"] * grn["-logp"]
    grn[TF2GENE_W_COL] = np.tanh(3 * raw_score)

    grn["Region"] = grn["chromosome"] + ":" + grn["start"].astype(str) + "-" + grn["end"].astype(str)

    grn = grn.rename(columns={
    "source": "TF",
    "target": "Gene",
    "chromosome": "Chromosome",
    "start": "Start",
    "end": "End"
    })

    return grn

In [4]:
grn = preprocess_celloracle(md)

In [13]:
grn['TF2Gene_W'].isna().sum()

np.int64(0)