# seqFISH Mouse Organogenesis Imputed

- **Creator**: Sebastian Birk (<sebastian.birk@helmholtz-munich.de>)
- **Date of Creation:** 01.08.2024
- **Date of Last Modification:** 01.08.2024

## 1. Setup

Run this notebook in the nichecompass-reproducibility environment, installable from ```('../../../envs/environment.yaml')```.

### 1.1 Import Libraries

In [1]:
%load_ext autoreload
%autoreload 2

In [39]:
import warnings

import anndata as ad
import pandas as pd
import scipy.sparse as sp
import squidpy as sq
from nichecompass.models import NicheCompass

### 1.2 Define Parameters

In [55]:
dataset = "seqfish_mouse_organogenesis_imputed"
species = "mouse"
model_label = "reference"
counts_key = "counts"
cat_covariates_keys = "batch"
gp_names_key = "nichecompass_gp_names"
spatial_key = "spatial"
adj_key = "spatial_connectivities"
n_neighbors = 8
latent_key = "nichecompass_latent"

load_timestamp = "31072024_212252_2"
batches = ["batch1", "batch2", "batch3", "batch4", "batch5", "batch6"]

### 1.3 Run Notebook Setup

In [10]:
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### 1.4 Configure Paths

In [23]:
# Define paths
ga_data_folder_path = "../../datasets/ga_data"
gp_data_folder_path = "../../datasets/gp_data"
st_data_folder_path = "../../datasets/st_data"
st_data_gold_folder_path = st_data_folder_path + "/gold"
omnipath_lr_network_file_path = f"{gp_data_folder_path}/omnipath_lr_network.csv"
nichenet_lr_network_file_path = f"{gp_data_folder_path}/nichenet_lr_network_v2_{species}.csv"
nichenet_ligand_target_matrix_file_path = f"{gp_data_folder_path}/nichenet_ligand_target_matrix_v2_{species}.csv"
mebocost_enzyme_sensor_interactions_folder_path = f"{gp_data_folder_path}/metabolite_enzyme_sensor_gps"
gene_orthologs_mapping_file_path = f"{ga_data_folder_path}/human_mouse_gene_orthologs.csv"
artifacts_folder_path = f"../../artifacts"
model_folder_path = f"{artifacts_folder_path}/{dataset}/models/{model_label}/{load_timestamp}"
figure_folder_path = f"{artifacts_folder_path}/{dataset}/figures/{model_label}/{load_timestamp}"

## 2. Model

In [25]:
model = NicheCompass.load(dir_path=model_folder_path,
                          adata=None,
                          adata_file_name=f"{dataset}_{model_label}.h5ad",
                          gp_names_key=gp_names_key)

--- INITIALIZING NEW NETWORK MODULE: VARIATIONAL GENE PROGRAM GRAPH AUTOENCODER ---
LOSS -> include_edge_recon_loss: True, include_gene_expr_recon_loss: True, rna_recon_loss: nb
NODE LABEL METHOD -> one-hop-norm
ACTIVE GP THRESHOLD RATIO -> 0.01
LOG VARIATIONAL -> True
CATEGORICAL COVARIATES EMBEDDINGS INJECTION -> ['gene_expr_decoder']
ONE HOP GCN NORM RNA NODE LABEL AGGREGATOR
ENCODER -> n_input: 5000, n_cat_covariates_embed_input: 0, n_hidden: 1956, n_latent: 1856, n_addon_latent: 100, n_fc_layers: 1, n_layers: 1, conv_layer: gatv2conv, n_attention_heads: 4, dropout_rate: 0.0, 
COSINE SIM GRAPH DECODER -> dropout_rate: 0.0
MASKED TARGET RNA DECODER -> n_prior_gp_input: 1856, n_addon_gp_input: 100, n_cat_covariates_embed_input: 3, n_output: 5000
MASKED SOURCE RNA DECODER -> n_prior_gp_input: 1856, n_addon_gp_input: 100, n_cat_covariates_embed_input: 3, n_output: 5000


In [43]:
adata_batch_list = []

for batch in batches:
    print(f"\nProcessing batch {batch}...")
    print("Loading data...")
    adata_batch = ad.read_h5ad(
        f"{st_data_gold_folder_path}/{dataset}_{batch}.h5ad")
    print("Computing spatial neighborhood graph...")
    # Compute (separate) spatial neighborhood graphs
    sq.gr.spatial_neighbors(adata_batch,
                            coord_type="generic",
                            spatial_key=spatial_key,
                            n_neighs=n_neighbors)
    # Make adjacency matrix symmetric
    adata_batch.obsp[adj_key] = (
        adata_batch.obsp[adj_key].maximum(
            adata_batch.obsp[adj_key].T))
    adata_batch_list.append(adata_batch)
adata = ad.concat(adata_batch_list, join="inner")

# Combine spatial neighborhood graphs as disconnected components
batch_connectivities = []
len_before_batch = 0
for i in range(len(adata_batch_list)):
    if i == 0: # first batch
        after_batch_connectivities_extension = sp.csr_matrix(
            (adata_batch_list[0].shape[0],
            (adata.shape[0] -
            adata_batch_list[0].shape[0])))
        batch_connectivities.append(sp.hstack(
            (adata_batch_list[0].obsp[adj_key],
            after_batch_connectivities_extension)))
    elif i == (len(adata_batch_list) - 1): # last batch
        before_batch_connectivities_extension = sp.csr_matrix(
            (adata_batch_list[i].shape[0],
            (adata.shape[0] -
            adata_batch_list[i].shape[0])))
        batch_connectivities.append(sp.hstack(
            (before_batch_connectivities_extension,
            adata_batch_list[i].obsp[adj_key])))
    else: # middle batches
        before_batch_connectivities_extension = sp.csr_matrix(
            (adata_batch_list[i].shape[0], len_before_batch))
        after_batch_connectivities_extension = sp.csr_matrix(
            (adata_batch_list[i].shape[0],
            (adata.shape[0] -
            adata_batch_list[i].shape[0] -
            len_before_batch)))
        batch_connectivities.append(sp.hstack(
            (before_batch_connectivities_extension,
            adata_batch_list[i].obsp[adj_key],
            after_batch_connectivities_extension)))
    len_before_batch += adata_batch_list[i].shape[0]
connectivities = sp.vstack(batch_connectivities)
adata.obsp[adj_key] = connectivities


Processing batch batch1...
Loading data...
Computing spatial neighborhood graph...

Processing batch batch2...
Loading data...
Computing spatial neighborhood graph...

Processing batch batch3...
Loading data...
Computing spatial neighborhood graph...

Processing batch batch4...
Loading data...
Computing spatial neighborhood graph...

Processing batch batch5...
Loading data...
Computing spatial neighborhood graph...

Processing batch batch6...
Loading data...
Computing spatial neighborhood graph...


In [44]:
adata

AnnData object with n_obs × n_vars = 52568 × 29239
    obs: 'Area', 'celltype_mapped_refined', 'sample', 'batch'
    obsm: 'spatial'
    layers: 'counts'
    obsp: 'spatial_connectivities'

In [45]:
model.adata

AnnData object with n_obs × n_vars = 38383 × 5000
    obs: 'Area', 'celltype_mapped_refined', 'sample', 'batch', 'mapping_entity'
    var: 'n_cells', 'highly_variable', 'spatially_variable', 'gp_relevant', 'keep_gene'
    uns: 'moranI', 'nichecompass_active_gp_names', 'nichecompass_genes_idx', 'nichecompass_gp_names', 'nichecompass_latent', 'nichecompass_source_genes_idx', 'nichecompass_sources_categories_label_encoder', 'nichecompass_target_genes_idx', 'nichecompass_targets_categories_label_encoder', 'umap'
    obsm: 'X_umap', 'nichecompass_latent', 'spatial'
    varm: 'nichecompass_gp_sources', 'nichecompass_gp_sources_categories', 'nichecompass_gp_targets', 'nichecompass_gp_targets_categories'
    layers: 'counts'
    obsp: 'nichecompass_latent_connectivities', 'nichecompass_latent_distances', 'spatial_connectivities'

In [48]:
genes = model.adata.var_names
model.adata = adata[:, genes]

In [60]:
model.cat_covariates_keys_

['batch']

In [59]:
adata.obs["batch"].unique()

['embryo1_z2', 'embryo1_z5', 'embryo2_z2', 'embryo2_z5', 'embryo3_z2', 'embryo3_z5']
Categories (6, object): ['embryo1_z2', 'embryo1_z5', 'embryo2_z2', 'embryo2_z5', 'embryo3_z2', 'embryo3_z5']

In [61]:
print("\nComputing reference query latent embedding...")
model.adata.obsm[latent_key], _ = model.get_latent_representation(
   adata=model.adata,
   counts_key=counts_key,
   adj_key=adj_key,
   #cat_covariates_keys=cat_covariates_keys,
   only_active_gps=True,
   return_mu_std=True,
   node_batch_size=model.node_batch_size_)

print("\nComputing neighbor graph...")
# Use latent representation for UMAP generation
sc.pp.neighbors(model.adata,
                use_rep=latent_key,
                key_added=latent_key)

print("\nComputing UMAP embedding...")
sc.tl.umap(model.adata,
           neighbors_key=latent_key)


Computing reference query latent embedding...


KeyError: 'nichecompass_gp_names'