## Analysis of immunosuppressive paths in SCC datasets
<br>
<b>Description</b> : In this notebook we run Tangram2 CCC on SCC datasets<br>
<b>Author</b> : Hejin Huang (huang.hejin@gene.com)<br>

In [1]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import torch # Kept as it's used for tg.met.map_methods.TangramV2Map.run (device='cuda:0' implies torch usage)
import anndata as ad
import os.path as osp # Used for osp.join
import tangram2 as tg2


  warn(f"Failed to load image Python extension: {e}")


In [3]:
# --- Helper Functions ---
def pd_to_ad(dataframe: pd.DataFrame) -> ad.AnnData:
    """Converts a pandas DataFrame to an AnnData object."""
    adata = ad.AnnData(X=dataframe.values)
    adata.obs.index = dataframe.index
    adata.var.index = dataframe.columns
    return adata

def scc_label_correction(adata: ad.AnnData, old_label_col: str = 'level2_celltype', new_label_col: str = '_mod'):
    """Helper function to correct SCC cell type labels.
    Modifies the adata object in-place.
    """
    if new_label_col.startswith('_'):
        label_col = old_label_col + new_label_col
    else:
        label_col = new_label_col

    label_map = {l:l for l in adata.obs[old_label_col].unique() }
    label_map.update({
        'Mac': 'Macrophage',
        'Endothelial Cell': 'Endothelial',
        'CD1C': 'DC', 'ASDC': 'DC',
        'CLEC9A': 'DC', 'LC': 'Langerhans cells',
        'Tcell': 'T-cell', 'B cell': 'B-cell',
    })
    adata.obs[label_col] = adata.obs[old_label_col].map(label_map)

def read_h5ad_uniqify(path: str, tag: str = None) -> ad.AnnData:
    """Reads an h5ad file and ensures obs_names and var_names are unique.
    Optionally adds a tag prefix to obs_names.
    """
    adata = ad.read_h5ad(path)
    adata.obs_names_make_unique()
    adata.var_names_make_unique()
    if tag is not None:
        adata.obs_names = [f'{tag}_{x}' for x in adata.obs_names]
    return adata

In [4]:
# --- Data Loading and Initial Setup ---
REAL_DATA_SC_PTH = '/gstore/data/resbioai/tangram2_data/telegraph/res/ccc_data/original_dataset/scc_new.h5ad'
ad_sc_all = ad.read_h5ad(REAL_DATA_SC_PTH)

# Rank genes for highly variable gene selection and marker identification
sc.tl.rank_genes_groups(ad_sc_all, groupby="level3_celltype", use_raw=False)
markers_df = pd.DataFrame(ad_sc_all.uns["rank_genes_groups"]["names"]).iloc[0:100, :]
markers = list(np.unique(markers_df.melt().value.values))

# Define base directories for spatial data
SC_BASEDIR = '/gstore/data/resbioai/andera29/cci-explore/validation/pipeline/data/common/SCC/sc/'
SP_BASEDIR = '/gstore/data/resbioai/andera29/cci-explore/validation/pipeline/data/common/SCC/sp/'

# Prepare mapping paths
mapping_paths  = {x.split('_')[0]: {'sc':osp.join(SC_BASEDIR,x)} for x in os.listdir(SC_BASEDIR)}
for key in mapping_paths.keys():
    spatial_paths = {x.split('.')[0]:osp.join(SP_BASEDIR,x) for x in os.listdir(SP_BASEDIR) if x.startswith(key)}
    mapping_paths[key]['sp'] = spatial_paths



In [7]:
# --- Main Processing and Mapping Loop ---
ad_map_all = []
composition = []
mapping_result = []

for name,path_dict in mapping_paths.items():
    # Concatenate spatial AnnData objects for the current sample
    ad_sp = ad.concat([read_h5ad_uniqify(path,rep) for rep,path in path_dict['sp'].items()])
    output_name = name # Variable 'output_name' is assigned but not explicitly used in the provided snippet
    ad_sc = ad_sc_all.copy() # Use a copy to prevent unintended modifications to ad_sc_all

    scc_label_correction(ad_sc) # Apply label correction

    label_col = 'level2_celltype_mod' # Define label_col consistently

    # Prepare input dictionary for Tangram
    input_dict_1 = tg2.evalkit.met.utils.adatas_to_input({'from':ad_sc, 'to':ad_sp},
                                                    categorical_labels={'from':[label_col]},
                                                  )

    tg2.evalkit.met.pp.StandardTangram2.run(input_dict_1)

    # Run Tangram mapping
    map_res_1 = tg2.evalkit.met.map_methods.Tangram2Map.run(input_dict_1,
                                                num_epochs=1000,
                                                genes=markers,
                                             )

    input_dict_1.update(map_res_1)

    input_dict_1['w'].index = input_dict_1['w']['cell_type'] # Ensure 'w' index is set to cell_type

    tg2.evalkit.met.pp.StandardScanpy.run(input_dict_1, target_objs=['X_from'])

    # Run Interaction Model
    inter_res = tg2.ccc.TangramCCC.run(input_dict_1,
                                    n_epochs=1000,
                                    learning_rate=0.01)
    ad_map_all.append(inter_res)
    composition.append(input_dict_1['w'])
    mapping_result.append(input_dict_1.copy())

INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 1736 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Set Solid Seed
Set Solid Seed
Score: 0.839, KL reg: 3.586, Entropy reg: -10.168
Score: 0.885, KL reg: 3.091, Entropy reg: -8.533
Score: 0.886, KL reg: 3.091, Entropy reg: -8.412
Score: 0.887, KL reg: 3.091, Entropy reg: -8.375
Score: 0.887, KL reg: 3.091, Entropy reg: -8.361
Score: 0.887, KL reg: 3.091, Entropy reg: -8.356
Score: 0.887, KL reg: 3.091, Entropy reg: -8.350
Score: 0.887, KL reg: 3.091, Entropy reg: -8.351
Score: 0.887, KL reg: 3.091, Entropy reg: -8.348


INFO:root:Renormalizing Single cell data


Score: 0.887, KL reg: 3.091, Entropy reg: -8.347


INFO:root:Begin training with 1736 genes and rna_count_based density_prior in cells mode after renormalization


Set Solid Seed


INFO:root:Printing scores every 100 epochs.


Set Solid Seed
Score: 0.872, KL reg: 0.453, Entropy reg: -247831.516
Score: 0.895, KL reg: 0.000, Entropy reg: -207024.094
Score: 0.900, KL reg: 0.000, Entropy reg: -182258.438
Score: 0.903, KL reg: 0.000, Entropy reg: -159435.266
Score: 0.904, KL reg: 0.000, Entropy reg: -140998.422
Score: 0.905, KL reg: 0.000, Entropy reg: -125784.328
Score: 0.906, KL reg: 0.000, Entropy reg: -112538.234
Score: 0.907, KL reg: 0.000, Entropy reg: -101117.375
Score: 0.907, KL reg: 0.000, Entropy reg: -91896.070
Score: 0.907, KL reg: 0.000, Entropy reg: -85028.828


INFO:root:Saving results..
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params
------------------------------
------------------------------
254 K     Trainable params
0         Non-trainable params
254 K     Total params
1.016     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 1711 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Set Solid Seed
Set Solid Seed
Score: 0.761, KL reg: 4.193, Entropy reg: -11.515
Score: 0.811, KL reg: 3.091, Entropy reg: -9.054
Score: 0.814, KL reg: 3.091, Entropy reg: -8.849
Score: 0.815, KL reg: 3.091, Entropy reg: -8.787
Score: 0.816, KL reg: 3.091, Entropy reg: -8.744
Score: 0.816, KL reg: 3.091, Entropy reg: -8.725
Score: 0.816, KL reg: 3.091, Entropy reg: -8.716
Score: 0.816, KL reg: 3.091, Entropy reg: -8.710
Score: 0.816, KL reg: 3.091, Entropy reg: -8.704
Score: 0.816, KL reg: 3.091, Entropy reg: -8.705


INFO:root:Renormalizing Single cell data
INFO:root:Begin training with 1711 genes and rna_count_based density_prior in cells mode after renormalization


Set Solid Seed


INFO:root:Printing scores every 100 epochs.


Set Solid Seed
Score: 0.793, KL reg: 1.059, Entropy reg: -294931.375
Score: 0.826, KL reg: 0.005, Entropy reg: -226186.953
Score: 0.835, KL reg: 0.001, Entropy reg: -195327.000
Score: 0.839, KL reg: 0.000, Entropy reg: -174678.656
Score: 0.841, KL reg: 0.000, Entropy reg: -161281.094
Score: 0.843, KL reg: 0.000, Entropy reg: -150981.031
Score: 0.843, KL reg: 0.000, Entropy reg: -142112.578
Score: 0.844, KL reg: 0.000, Entropy reg: -134120.125
Score: 0.845, KL reg: 0.000, Entropy reg: -126777.078
Score: 0.845, KL reg: 0.000, Entropy reg: -120204.562


INFO:root:Saving results..
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params
------------------------------
------------------------------
254 K     Trainable params
0         Non-trainable params
254 K     Total params
1.016     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 1710 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Set Solid Seed
Set Solid Seed
Score: 0.835, KL reg: 3.548, Entropy reg: -9.865
Score: 0.905, KL reg: 3.091, Entropy reg: -7.625
Score: 0.906, KL reg: 3.091, Entropy reg: -7.537
Score: 0.906, KL reg: 3.091, Entropy reg: -7.505
Score: 0.906, KL reg: 3.091, Entropy reg: -7.493
Score: 0.906, KL reg: 3.091, Entropy reg: -7.488
Score: 0.906, KL reg: 3.091, Entropy reg: -7.484
Score: 0.906, KL reg: 3.091, Entropy reg: -7.482
Score: 0.906, KL reg: 3.091, Entropy reg: -7.481
Score: 0.907, KL reg: 3.091, Entropy reg: -7.481


INFO:root:Renormalizing Single cell data
INFO:root:Begin training with 1710 genes and rna_count_based density_prior in cells mode after renormalization


Set Solid Seed


INFO:root:Printing scores every 100 epochs.


Set Solid Seed
Score: 0.896, KL reg: 0.414, Entropy reg: -237227.031
Score: 0.923, KL reg: 0.002, Entropy reg: -188801.328
Score: 0.930, KL reg: 0.000, Entropy reg: -151511.750
Score: 0.932, KL reg: 0.000, Entropy reg: -132755.281
Score: 0.933, KL reg: 0.000, Entropy reg: -117091.297
Score: 0.933, KL reg: 0.000, Entropy reg: -102384.531
Score: 0.934, KL reg: 0.000, Entropy reg: -89664.250
Score: 0.934, KL reg: 0.000, Entropy reg: -78921.203
Score: 0.934, KL reg: 0.000, Entropy reg: -69480.594
Score: 0.935, KL reg: 0.000, Entropy reg: -61075.617


INFO:root:Saving results..
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params
------------------------------
------------------------------
254 K     Trainable params
0         Non-trainable params
254 K     Total params
1.016     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

INFO:root:Allocate tensors for mapping.
INFO:r

Set Solid Seed
Set Solid Seed
Score: 0.728, KL reg: 3.391, Entropy reg: -9.987
Score: 0.760, KL reg: 3.091, Entropy reg: -8.688
Score: 0.761, KL reg: 3.091, Entropy reg: -8.608
Score: 0.761, KL reg: 3.091, Entropy reg: -8.583
Score: 0.761, KL reg: 3.091, Entropy reg: -8.572
Score: 0.761, KL reg: 3.091, Entropy reg: -8.571
Score: 0.761, KL reg: 3.091, Entropy reg: -8.570
Score: 0.761, KL reg: 3.091, Entropy reg: -8.570
Score: 0.761, KL reg: 3.091, Entropy reg: -8.574
Score: 0.761, KL reg: 3.091, Entropy reg: -8.571


INFO:root:Renormalizing Single cell data
INFO:root:Begin training with 1705 genes and rna_count_based density_prior in cells mode after renormalization


Set Solid Seed


INFO:root:Printing scores every 100 epochs.


Set Solid Seed
Score: 0.758, KL reg: 0.262, Entropy reg: -241604.203
Score: 0.773, KL reg: 0.000, Entropy reg: -211330.766
Score: 0.780, KL reg: 0.000, Entropy reg: -170625.922
Score: 0.784, KL reg: 0.000, Entropy reg: -134817.312
Score: 0.786, KL reg: 0.000, Entropy reg: -119894.641
Score: 0.787, KL reg: 0.000, Entropy reg: -109845.406
Score: 0.787, KL reg: 0.000, Entropy reg: -99862.250
Score: 0.788, KL reg: 0.000, Entropy reg: -89523.641
Score: 0.788, KL reg: 0.000, Entropy reg: -79000.766
Score: 0.789, KL reg: 0.000, Entropy reg: -69949.375


INFO:root:Saving results..
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params
------------------------------
------------------------------
254 K     Trainable params
0         Non-trainable params
254 K     Total params
1.016     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 1680 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Set Solid Seed
Set Solid Seed
Score: 0.764, KL reg: 3.821, Entropy reg: -10.024
Score: 0.825, KL reg: 3.091, Entropy reg: -7.642
Score: 0.826, KL reg: 3.091, Entropy reg: -7.519
Score: 0.827, KL reg: 3.091, Entropy reg: -7.482
Score: 0.827, KL reg: 3.091, Entropy reg: -7.467
Score: 0.827, KL reg: 3.091, Entropy reg: -7.461
Score: 0.827, KL reg: 3.091, Entropy reg: -7.462
Score: 0.827, KL reg: 3.091, Entropy reg: -7.461
Score: 0.827, KL reg: 3.091, Entropy reg: -7.459
Score: 0.827, KL reg: 3.091, Entropy reg: -7.459


INFO:root:Renormalizing Single cell data
INFO:root:Begin training with 1680 genes and rna_count_based density_prior in cells mode after renormalization


Set Solid Seed


INFO:root:Printing scores every 100 epochs.


Set Solid Seed
Score: 0.819, KL reg: 0.695, Entropy reg: -242847.766
Score: 0.836, KL reg: 0.000, Entropy reg: -197204.500
Score: 0.844, KL reg: 0.000, Entropy reg: -164053.656
Score: 0.848, KL reg: 0.000, Entropy reg: -144409.266
Score: 0.849, KL reg: 0.000, Entropy reg: -134728.156
Score: 0.850, KL reg: 0.000, Entropy reg: -127494.648
Score: 0.850, KL reg: 0.000, Entropy reg: -120906.359
Score: 0.851, KL reg: 0.000, Entropy reg: -114703.617
Score: 0.851, KL reg: 0.000, Entropy reg: -108927.375
Score: 0.851, KL reg: 0.000, Entropy reg: -103863.141


INFO:root:Saving results..
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params
------------------------------
------------------------------
254 K     Trainable params
0         Non-trainable params
254 K     Total params
1.016     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

INFO:root:Allocate tensors for mapping.
INFO:r

Set Solid Seed
Set Solid Seed
Score: 0.700, KL reg: 3.895, Entropy reg: -10.727
Score: 0.754, KL reg: 3.091, Entropy reg: -8.380
Score: 0.755, KL reg: 3.091, Entropy reg: -8.272
Score: 0.755, KL reg: 3.091, Entropy reg: -8.241
Score: 0.755, KL reg: 3.091, Entropy reg: -8.224
Score: 0.755, KL reg: 3.091, Entropy reg: -8.214
Score: 0.755, KL reg: 3.091, Entropy reg: -8.209
Score: 0.755, KL reg: 3.091, Entropy reg: -8.203
Score: 0.755, KL reg: 3.091, Entropy reg: -8.199
Score: 0.755, KL reg: 3.091, Entropy reg: -8.195


INFO:root:Renormalizing Single cell data
INFO:root:Begin training with 1651 genes and rna_count_based density_prior in cells mode after renormalization


Set Solid Seed


INFO:root:Printing scores every 100 epochs.


Set Solid Seed
Score: 0.750, KL reg: 0.779, Entropy reg: -267269.812
Score: 0.768, KL reg: 0.001, Entropy reg: -221890.781
Score: 0.774, KL reg: 0.000, Entropy reg: -198885.344
Score: 0.779, KL reg: 0.000, Entropy reg: -179185.922
Score: 0.782, KL reg: 0.000, Entropy reg: -170266.062
Score: 0.784, KL reg: 0.000, Entropy reg: -167537.938
Score: 0.784, KL reg: 0.000, Entropy reg: -165522.125
Score: 0.784, KL reg: 0.000, Entropy reg: -163369.812
Score: 0.784, KL reg: 0.000, Entropy reg: -160873.938
Score: 0.785, KL reg: 0.000, Entropy reg: -158136.672


INFO:root:Saving results..
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params
------------------------------
------------------------------
254 K     Trainable params
0         Non-trainable params
254 K     Total params
1.016     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [8]:
# --- Process and Save Beta Results ---
beta_all = []
for i in range(len(ad_map_all)):
    inter_res = ad_map_all[i]
    beta = inter_res['beta'].to_dataframe()['beta'].copy()
    beta = beta.reset_index()
    beta['inter'] = beta['labels'].astype(str) + '_vs_' + beta['labels_'].astype(str)
    beta.drop(labels=['labels', 'labels_'], inplace=True, axis=1)
    beta_all.append(beta)



In [10]:
# # --- Save Results to CSV ---
# directory = '/gstore/data/resbioai/tangram2_data/telegraph/res/ccc_eval/real/scc_dataset/cci/'

# i = 0 # Initialize loop counter
# for name,path_dict in mapping_paths.items():
#     beta_all[i].to_csv(os.path.join(directory + 'all_to_one/level2_mod/', name + '.csv'), index=False)
#     composition[i].to_csv(os.path.join(directory + 'all_to_one/level2_mod/', name + '_composition.csv'), index=False)
#     i += 1