In [33]:
import os
import glob
import mudata as mu
import scanpy as sc
import loompy as lp
import pandas as pd
from pyscenic.cli.utils import load_signatures
from scipy.stats import ttest_1samp
from pyscenic.utils import add_correlation

from tqdm import tqdm
tqdm.pandas()
import numpy as np
TINY = np.finfo(np.float32).tiny

In [72]:
def calc_p_value(importances):
    _, p_value = ttest_1samp(importances, 0)
    return p_value

def regulon2sadj(
    regulons,
):
    net_lst = []
    for tf in regulons:
        tf_name = tf.name.split("(")[0]
        tf_targets = tf.gene2weight
        for target, weight in tf_targets.items():
            net_lst.append([tf_name, target, weight])
    net = pd.DataFrame(net_lst, columns=["TF", "target", "importance"])
    return net

In [73]:
path_data = "/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/analysis/timecourse/A2_control/celloracle/2024_05_02/A2_control.h5mu"
path_loom = "/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/analysis/timecourse/A2_control/scenic/2024_05_02/rna.loom"
path_csvs = "/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/analysis/timecourse/A2_control/scenic/2024_05_02"
path_output = "/cellar/users/aklie/data/datasets/igvf_sc-islet_10X-Multiome/analysis/timecourse/A2_control/scenic/2024_05_02/scenic.h5mu"

In [60]:
# Read mdata to add objects to
data = mu.read(path_data)



In [61]:
print("Reading regulons...")
reg_csvs = sorted(glob.glob(os.path.join(path_csvs, "*reg.csv")))
all_edges = pd.DataFrame()
for reg_csv in reg_csvs:
    regulons = load_signatures(reg_csv)
    adj_df = regulon2sadj(regulons)
    all_edges = pd.concat([all_edges, adj_df])
all_edges.head()
print(f"Total edges: {len(all_edges)}")

Reading regulons...
Create regulons from a dataframe of enriched features.
Additional columns saved: []
Create regulons from a dataframe of enriched features.
Additional columns saved: []
Create regulons from a dataframe of enriched features.
Additional columns saved: []
Create regulons from a dataframe of enriched features.
Additional columns saved: []
Create regulons from a dataframe of enriched features.
Additional columns saved: []
Total edges: 41593


In [62]:
print("Grouping by source and target and filtering singlet edges...")
grouped = all_edges.groupby(['TF', 'target'])
filtered = grouped.filter(lambda x: len(x) > 1)
print(f"{len(all_edges) - len(filtered)} edges dropped")

Grouping by source and target and filtering singlet edges...
8849 edges dropped


In [63]:
print("Calculating mean importance for each edge...")
mean_importance = filtered.groupby(['TF', 'target'])['importance'].mean()
print(f"Total unique edges: {len(mean_importance)}")

Calculating mean importance for each edge...
Total unique edges: 9362


In [64]:
print("Calculating empirical p-value...")
p_values_series = filtered.groupby(['TF', 'target'])['importance'].progress_apply(calc_p_value)
p_values = p_values_series.values + TINY

Calculating empirical p-value...


  res = hypotest_fun_out(*samples, **kwds)
100%|██████████| 9362/9362 [00:03<00:00, 3001.59it/s]


In [65]:
print("Transforming values...")
neg_log_p = -np.log10(p_values)
normalized_importance = (mean_importance - mean_importance.min()) / (mean_importance.max() - mean_importance.min())

Transforming values...


In [66]:
print("Adding correlation...")
adata = sc.read_loom(path_loom, sparse=True)
filtered = add_correlation(filtered, adata.to_df())
mean_corr = filtered.groupby(['TF', 'target'])['rho'].mean()

Adding correlation...


In [67]:
consolidated = pd.DataFrame({
    'tf': mean_importance.index.get_level_values('TF'),
    'gene': mean_importance.index.get_level_values('target'),
    'weight_signed': np.nan,
    'weight_unsigned': mean_importance.values,
    'weight_minmax_normalized': normalized_importance.values,
    'pval': p_values,
    '-logp': neg_log_p,
    'description': np.nan,
    'corr': mean_corr.values,
    'cluster': 'global'
}).reset_index(drop=True)

In [68]:
# Remove self-loops
print("Removing self-loops...")
consolidated = consolidated[consolidated["tf"] != consolidated["gene"]]

Removing self-loops...


In [69]:
consolidated.to_csv(os.path.join(path_csvs, "full_grn.csv"))

In [70]:
grn = consolidated[["tf", "gene", "weight_unsigned", "pval", "cluster"]]
grn = grn.rename({"weight_unsigned": "score"}, axis=1)
grn.to_csv(os.path.join(path_csvs, "grn.csv"))
grn

Unnamed: 0,tf,gene,score,pval,cluster
0,ARID3A,AATK,1.001928,0.006029,global
1,ARID3A,ABCA3,0.552419,0.006068,global
2,ARID3A,ABCC8,0.062171,0.036842,global
3,ARID3A,ABCG1,0.201618,0.068889,global
4,ARID3A,ABLIM2,0.268125,0.066368,global
...,...,...,...,...,...
9357,ZNF782,GUCY1A2,0.822456,0.325448,global
9358,ZNF782,MYO1B,0.399926,0.202001,global
9359,ZNF782,NETO1,0.324993,0.110220,global
9360,ZNF782,PLEKHB2,0.266331,0.033623,global


In [71]:
data.uns["grn"] = grn

In [None]:
# Save data
data.write_h5mu(path_output)

# DONE!

---