In [1]:
import muon as mu
import mudata as md
import anndata as ad
import scanpy as sc
import decoupler as dc
import numpy as np
import pandas as pd
import os
import re

In [2]:
mu.set_options(pull_on_update = False)

<muon._core.config.set_options at 0x14754fb66930>

### Step 1. Load mudata objects

In [3]:
day_14_data_combined = mu.read_h5mu('D14_CITESeq_ALL.h5mu')
day_28_data_combined = mu.read_h5mu('D28_CITESeq_ALL.h5mu')

### Step 2. MSigDB Geneset AUCell Scoring

In [4]:
msigdb = dc.get_resource("MSigDB")
unique_collections = msigdb['collection'].unique().tolist()
#get desired pathways pathways
gs = msigdb.query("collection == ['kegg_pathways', 'hallmark', 'biocarta_pathways', 'go_biological_process', 'go_molecular_function', 'go_cellular_component']")
# Filter duplicates
gs = gs[~gs.duplicated(("geneset", "genesymbol"))]

In [5]:
# Run AUCell
day_14_data_combined.mod['rna'].X = day_14_data_combined.mod['rna'].layers['log_counts'].copy()
dc.run_aucell(
    day_14_data_combined.mod['rna'],
    gs,
    source="geneset",
    target="genesymbol",
    use_raw=False,
    seed=123,
    verbose=True
)
day_14_data_combined.mod['rna'].obsm['msigdb_aucell'] = day_14_data_combined.mod['rna'].obsm.pop('aucell_estimate')

day_28_data_combined.mod['rna'].X = day_28_data_combined.mod['rna'].layers['log_counts'].copy()
dc.run_aucell(
    day_28_data_combined.mod['rna'],
    gs,
    source="geneset",
    target="genesymbol",
    use_raw=False,
    seed=123,
    verbose=True
)
day_28_data_combined.mod['rna'].obsm['msigdb_aucell'] = day_28_data_combined.mod['rna'].obsm.pop('aucell_estimate')

Running aucell on mat with 10550 samples and 22345 targets for 10169 sources.


  0%|          | 0/10550 [00:00<?, ?it/s]

Running aucell on mat with 12837 samples and 23639 targets for 10184 sources.


  0%|          | 0/12837 [00:00<?, ?it/s]

### Step 3. Progeny Scoring

In [6]:
#Run progeny based analysis with weighted sum 
progeny = dc.get_progeny(organism='human', top=100)

dc.run_wsum(
    day_14_data_combined.mod['rna'],
    net=progeny,
    source='source',
    target='target',
    weight='weight',
    verbose=True,
    use_raw=False
)
day_14_data_combined.mod['rna'].obsm['progeny_wsum'] = day_14_data_combined.mod['rna'].obsm.pop('wsum_estimate')

dc.run_wsum(
    day_28_data_combined.mod['rna'],
    net=progeny,
    source='source',
    target='target',
    weight='weight',
    verbose=True,
    use_raw=False
)
day_28_data_combined.mod['rna'].obsm['progeny_wsum'] = day_28_data_combined.mod['rna'].obsm.pop('wsum_estimate')

Running wsum on mat with 10550 samples and 22345 targets for 14 sources.


  0%|          | 0/2 [00:00<?, ?it/s]

Running wsum on mat with 12837 samples and 23639 targets for 14 sources.


  0%|          | 0/2 [00:00<?, ?it/s]

### Step 4. Save MuData Object

In [7]:
with warnings.catch_warnings():
    # 1. Block the specific "Forward slashes" warning
    warnings.filterwarnings("ignore", category=FutureWarning, message=".*Forward slashes.*")
    
    # 2. Write the files
    day_14_data_combined.write_h5mu(d14_filename, compression='lzf')
    day_28_data_combined.write_h5mu(d28_filename, compression='lzf')

print(f"✅ Saved {d14_filename}")
print(f"✅ Saved {d28_filename}")

  return func(*args, **kwargs)
  return func(*args, **kwargs)


## Session Info

In [35]:
import session_info
session_info.show(excludes=['distributed'])