# Dependencies and files

## Imports

In [1]:
import warnings
import scanpy as sc
import anndata as an
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import os
import decoupler as dc
from statsmodels.stats.multitest import multipletests

import time
from tqdm.notebook import tqdm
import requests

sc.settings.set_figure_params(dpi=80)
#sc.set_figure_params(facecolor="white", figsize=(8, 8))
warnings.simplefilter(action='ignore', category=FutureWarning)
sc.settings.verbosity = 3

# Global variables

In [2]:
datadir = "data/tf_activity"
os.makedirs(datadir, exist_ok=True)

# Load files

## Load pseudobulk

In [3]:
pb_dir_path = 'data/pseudobulks/'

In [4]:
adata_pb = sc.read_h5ad(pb_dir_path + 'pseudobulk_all_mammals.h5ad')
#adata_pb_norm_mean.var = anova_res
adata_pb

AnnData object with n_obs × n_vars = 89 × 9209
    obs: 'layer', 'sample_id', 'psbulk_cells', 'psbulk_counts', 'condition', 'lib_size', 'log_lib_size'
    var: 'feature_types', 'mt', 'hb', 'ribo'
    uns: 'X_pca_mean_norm', 'condition_colors', 'layer_colors', 'log1p', 'pca'
    obsm: 'X_pca', 'X_pca_mean_norm'
    varm: 'PCs', 'X_pca_mean_norm'
    layers: 'counts', 'mean_norm', 'psbulk_props'

# Calculate activity

Retrieve CollecTRI gene regulatory network

In [5]:
collectri = dc.op.collectri(organism='human')
collectri.head()

Unnamed: 0,source,target,weight,resources,references,sign_decision
0,MYC,TERT,1.0,DoRothEA-A;ExTRI;HTRI;NTNU.Curated;Pavlidis202...,10022128;10491298;10606235;10637317;10723141;1...,PMID
1,SPI1,BGLAP,1.0,ExTRI,10022617,default activation
2,SMAD3,JUN,1.0,ExTRI;NTNU.Curated;TFactS;TRRUST,10022869;12374795,PMID
3,SMAD4,JUN,1.0,ExTRI;NTNU.Curated;TFactS;TRRUST,10022869;12374795,PMID
4,STAT5A,IL2,1.0,ExTRI,10022878;11435608;17182565;17911616;22854263;2...,default activation


In [6]:
# Infer pathway activities with ulm
dc.mt.ulm(
    adata_pb,
    net=collectri,
    verbose=True
)

2025-06-11 23:30:56 | [INFO] ulm - Running ulm
2025-06-11 23:30:56 | [INFO] Extracted omics mat with 89 rows (observations) and 9209 columns (features)
2025-06-11 23:30:56 | [INFO] Network adjacency matrix has 3288 unique features and 549 unique sources
2025-06-11 23:30:56 | [INFO] ulm - fitting 549 univariate models of 9209 observations (targets) with 9207 degrees of freedom
2025-06-11 23:30:57 | [INFO] ulm - adjusting p-values by FDR
2025-06-11 23:30:57 | [INFO] ulm - done


In [7]:
acts = dc.pp.get_obsm(adata_pb, key='score_ulm')
acts

AnnData object with n_obs × n_vars = 89 × 549
    obs: 'layer', 'sample_id', 'psbulk_cells', 'psbulk_counts', 'condition', 'lib_size', 'log_lib_size'
    uns: 'X_pca_mean_norm', 'condition_colors', 'layer_colors', 'log1p', 'pca'
    obsm: 'X_pca', 'X_pca_mean_norm', 'score_ulm', 'padj_ulm'

## Save results

In [8]:
cont_dict = {
    'L1': 1,
    'L2': 2,
    'L3': 3,
    'L4': 4,
    'L5': 5,
    'L6': 6,
    'WM': 7
}

acts.obs['layer_c'] = acts.obs['layer'].map(cont_dict)
acts.obs.head()

Unnamed: 0,layer,sample_id,psbulk_cells,psbulk_counts,condition,lib_size,log_lib_size,layer_c
human_759_L1,L1,human_759,500.0,1400502.0,human,2801.004,7.937733,1
human_j12_L1,L1,human_j12,331.0,1168612.0,human,3530.549849,8.169209,1
human_j3_L1,L1,human_j3,110.0,536103.0,human,4873.663636,8.491601,1
human_j4_L1,L1,human_j4,238.0,1038124.0,human,4361.865546,8.380655,1
human_j6_L1,L1,human_j6,134.0,339404.0,human,2532.865672,7.837107,1


In [9]:
act_df = pd.DataFrame(acts.X, index=acts.obs_names, columns=acts.var_names)
act_df.T.to_csv('data/tf_activity/activations.csv')
acts.obs.to_csv('data/tf_activity/annotation.csv')

In [10]:
acts.write_h5ad('data/tf_activity/activity.h5ad')