## Analysis of immunosuppressive paths between various cell types in BC datasets
<br>
<b>Description</b> : In this notebook we tried to investigate the cell-cell interaction of TNBC datasets. The notebook will output the interaction coefficients between different cell types<br>
<b>Author</b> : Hejin Huang (huang.hejin@gene.com)<br>

In [1]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import tangram2 as tg2
import anndata as ad

  warn(f"Failed to load image Python extension: {e}")


In [2]:
# --- Data Loading and Initial Preprocessing ---
REAL_DATA_SC_PTH = '../../data/original/bc/sc/bc.h5ad'
ad_sc_all = ad.read_h5ad(REAL_DATA_SC_PTH)

# Filter out 'Normal Epithelial' cells
ad_sc_all = ad_sc_all[ad_sc_all.obs.celltype_major != 'Normal Epithelial'].copy() # Ensure a copy

# Set celltype_mod, handling string conversion and specific re-labeling
ad_sc_all.obs['celltype_mod'] = ad_sc_all.obs['celltype_minor'].astype('string')
ad_sc_all.obs.loc[ad_sc_all.obs['celltype_subset'] == 'T_cells_c2_CD4+_T-regs_FOXP3', 'celltype_mod'] = 'T-regs'


In [3]:
# Rank genes for highly variable gene selection and marker identification
sc.tl.rank_genes_groups(ad_sc_all, groupby="celltype_subset", use_raw=False) # Should ideally be run after normalization/log1p
markers_df = pd.DataFrame(ad_sc_all.uns["rank_genes_groups"]["names"]).iloc[0:100, :]
markers = list(np.unique(markers_df.melt().value.values))

# Define spatial samples and cell subtypes
spatial = ['CID4465', 'CID44971']
T_subtype = list(ad_sc_all[ad_sc_all.obs.celltype_major == 'T-cells'].obs.celltype_mod.unique()) # Unused, consider removing if not needed later
cancer_subtype = list(ad_sc_all[ad_sc_all.obs.celltype_major == 'Cancer Epithelial'].obs.celltype_mod.unique())




In [4]:
# --- Main Processing and Mapping Loop ---
ad_map_all = []
composition = []
mapping_result = []

for sample in spatial[:1]:
    REAL_DATA_SP_PTH = '../../data/original/bc/sp/ad_sp_' + sample + '.h5ad'
    ad_sp = ad.read_h5ad(REAL_DATA_SP_PTH)

    # Filter single-cell data based on spatial data subtype
    # Ensure ad_sc is a writable copy before passing to pp_adata or modifying
    ad_sc = ad_sc_all[(ad_sc_all.obs['subtype'] == ad_sp.obs.subtype[0]) |
                      (ad_sc_all.obs['subtype'] == (str(ad_sp.obs.subtype[0]) + '+'))].copy()

    label_col = 'celltype_mod'

    input_dict_1 = tg2.evalkit.met.utils.adatas_to_input({'from': ad_sc, 'to': ad_sp},
                                                    categorical_labels={'from': [label_col]},
                                                  )

    tg2.evalkit.met.pp.StandardTangram2.run(input_dict_1)

    map_res_1 = tg2.evalkit.met.map_methods.Tangram2Map.run(input_dict_1,
                                                num_epochs=1000,
                                                genes=markers,
                                             )

    input_dict_1.update(map_res_1)

    # Coarse-grain other major cell types for D_from
    for major in ad_sc_all.obs.celltype_major.unique():
        if major not in ['Plasmablasts', 'T-cells', 'Myeloid']: # Exclude Normal Epithelial as it's filtered out globally
            subtype_names = list(ad_sc_all[ad_sc_all.obs.celltype_major == major].obs.celltype_mod.unique())
            # Ensure the major column exists before summing, or handle cases where no subtypes are found
            if not input_dict_1['D_from'][subtype_names].empty:
                input_dict_1['D_from'][major] = input_dict_1['D_from'][subtype_names].sum(axis=1)
                input_dict_1['D_from'] = input_dict_1['D_from'].drop(columns=subtype_names)

    # Adjust coefficients in 'w' for PDL1_cancer based on ratio of CD274+ cells
    input_dict_1['w'].index = input_dict_1['w']['cell_type']

    # Coarse-grain other major cell types for 'w'
    # It seems to be for collapsing other non-cancer/non-T/non-Endo types.
    for major in ad_sc_all.obs.celltype_major.unique():
        if major not in ['Plasmablasts', 'T-cells', 'Myeloid']:
            subtype_names = list(ad_sc_all[ad_sc_all.obs.celltype_major == major].obs.celltype_mod.unique())
            # Ensure the major category exists in 'w' before summing
            if not input_dict_1['w'].loc[subtype_names].empty:
                input_dict_1['w'].loc[major, 'coefficient'] = input_dict_1['w'].loc[subtype_names, 'coefficient'].sum()
                input_dict_1['w'] = input_dict_1['w'].drop(index=subtype_names)

    input_dict_1['w']['cell_type'] = input_dict_1['w'].index # Re-align 'cell_type' column with index

    tg2.evalkit.met.pp.StandardScanpy.run(input_dict_1, target_objs=['X_from']) # StandardScanpy expects X_from to be set

    inter_res = tg2.ccc.TangramCCC.run(input_dict_1,
                                                     n_epochs=1000,
                                                     learning_rate=0.01)
    ad_map_all.append(inter_res)
    composition.append(input_dict_1['w'])
    mapping_result.append(input_dict_1.copy())

INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 1922 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Set Solid Seed
Set Solid Seed
Score: 0.853, KL reg: 3.510, Entropy reg: -9.895
Score: 0.887, KL reg: 3.296, Entropy reg: -8.588
Score: 0.888, KL reg: 3.296, Entropy reg: -8.459
Score: 0.888, KL reg: 3.296, Entropy reg: -8.424
Score: 0.888, KL reg: 3.296, Entropy reg: -8.410
Score: 0.888, KL reg: 3.296, Entropy reg: -8.399
Score: 0.888, KL reg: 3.296, Entropy reg: -8.392
Score: 0.888, KL reg: 3.296, Entropy reg: -8.389
Score: 0.888, KL reg: 3.296, Entropy reg: -8.387


INFO:root:Renormalizing Single cell data


Score: 0.888, KL reg: 3.296, Entropy reg: -8.384


INFO:root:Begin training with 1922 genes and rna_count_based density_prior in cells mode after renormalization


Set Solid Seed


INFO:root:Printing scores every 100 epochs.


Set Solid Seed
Score: 0.878, KL reg: 0.188, Entropy reg: -274292.562
Score: 0.897, KL reg: 0.000, Entropy reg: -239984.906
Score: 0.903, KL reg: 0.000, Entropy reg: -199691.719
Score: 0.906, KL reg: 0.000, Entropy reg: -170169.438
Score: 0.908, KL reg: 0.000, Entropy reg: -149205.031
Score: 0.909, KL reg: 0.000, Entropy reg: -133186.531
Score: 0.910, KL reg: 0.000, Entropy reg: -120347.148
Score: 0.910, KL reg: 0.000, Entropy reg: -109242.773
Score: 0.910, KL reg: 0.000, Entropy reg: -99579.891
Score: 0.911, KL reg: 0.000, Entropy reg: -91565.938


INFO:root:Saving results..
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params
------------------------------
------------------------------
137 K     Trainable params
0         Non-trainable params
137 K     Total params
0.548     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

In [5]:
# --- Process and Save Beta Results ---

beta_all = []
for i in range(len(ad_map_all)):
    
    inter_res = ad_map_all[i]
    beta = inter_res['beta'].to_dataframe()['beta'].copy()
    beta = beta.reset_index()
    beta['inter'] = beta['labels'].astype(str) + '_vs_' + beta['labels_'].astype(str)
    beta.drop(labels = ['labels','labels_',],inplace = True,axis =1)
    beta_all.append(beta)

In [6]:
# for i in range(len(beta_all)):
#     beta_all[i].to_csv('../../data/analysis/bc/minor_celltype/'
#                        + spatial[i] + '.csv')
#     composition[i].to_csv('../../data/analysis/bc/minor_celltype/'
#                           + spatial[i] + '_composition.csv')