In [2]:
import anndata
import pandas as pd
import scimap as sm
import scanpy as sc
import warnings
import numpy as np
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
# Phenotype5 for grouped and Phenotype4 for subtyped immune cells
phenotype = 'Phenotype5'
path_to_h5ad = './../data/202503cells_spatial_count.h5ad'
adata = anndata.read_h5ad(path_to_h5ad)

In [4]:
### summarize immune cells

# Create Phenotype5 column being the same as Phenotype4
adata.obs['Phenotype5'] = adata.obs['Phenotype4']

# Add the new category if it's categorical
if isinstance(adata.obs['Phenotype5'].dtype, pd.CategoricalDtype):
    adata.obs['Phenotype5'] = adata.obs['Phenotype5'].cat.add_categories(
        ['Immune cells']
    )

# Update the category for Ki67+ entries
adata.obs.loc[
    (adata.obs['Phenotype5'].isin(['CD8+Tcells', 'CD4+Tcells', 'Dendritic Cells', 'Macrophages/Monocytes', 'Neutrophils', 'activated Macrophages/Monocytes'])), 
    'Phenotype5'
] = 'Immune cells'


In [5]:
# run spatial interaction analysis
spatial = sm.tl.spatial_interaction (adata,
                         x_coordinate='X_centroid',
                         y_coordinate='Y_centroid',
                         phenotype=phenotype,
                         method='delaunay', 
                         permutation=300,
                         imageid='image_ID',
                         cond_counts_threshold = 10,
                         subset=None,
                         pval_method='zscore',
                         normalization='conditional',
                         verbose=True,
                         label='scimap_delaunay_cozi')

Processing Image: ['TS-373_IMC77_B_001.csv']
Categories (1, object): ['TS-373_IMC77_B_001.csv']
Performing Delaunay triangulation to identify neighbours for every cell
Mapping phenotype to neighbors
Performing 300 permutations
Consolidating the permutation results
Processing Image: ['TS-373_IMC03_MGUS_002.csv']
Categories (1, object): ['TS-373_IMC03_MGUS_002.csv']
Performing Delaunay triangulation to identify neighbours for every cell
Mapping phenotype to neighbors
Performing 300 permutations
Consolidating the permutation results
Processing Image: ['TS-373_IMC04_MGUS_002.csv']
Categories (1, object): ['TS-373_IMC04_MGUS_002.csv']
Performing Delaunay triangulation to identify neighbours for every cell
Mapping phenotype to neighbors
Performing 300 permutations
Consolidating the permutation results
Processing Image: ['TS-373_IMC72_MGUS_001.csv']
Categories (1, object): ['TS-373_IMC72_MGUS_001.csv']
Performing Delaunay triangulation to identify neighbours for every cell
Mapping phenotype t

In [6]:
# normalize scores by the total number of cells in a sample
# get how many cells are in each sample
total_cells = adata.obs['image_ID'].value_counts()
total_cells

image_ID
TS-373_IMC28_B_002.csv       12605
TS-373_IMC72_MGUS_001.csv    11513
TS-373_IMC29_UB_002.csv      11288
TS-373_IMC89_B_001.csv       11013
TS-373_IMC21_UB_001.csv      10915
                             ...  
TS-373_IMC85_UB_001.csv       3420
TS-373_IMC89_B_002.csv        3302
TS-373_IMC45_B_002.csv        3297
TS-373_IMC81_B_002.csv        2674
TS-373_IMC59_MGUS_001.csv     1739
Name: count, Length: 152, dtype: int64

In [7]:
adata.obs['disease2'] = adata.obs['disease2'].astype('category')
adata.obs['disease2'] = adata.obs['disease2'].cat.reorder_categories(['MM_BD', 'MM_noBD', 'MGUS', 'SMM'])
adata.obs = adata.obs.sort_values('disease2')

save_df = spatial.uns['scimap_delaunay_cozi']
save_df['phenotype'] = save_df['phenotype'].astype(str)
save_df['neighbour_phenotype'] = save_df['neighbour_phenotype'].astype(str)
save_df['new_column'] = save_df['phenotype'] + "_" + save_df['neighbour_phenotype']
save_df = save_df.drop(columns=['phenotype', 'neighbour_phenotype']).set_index('new_column')

#normalize scores by sqr of total number of cells in a sample
save_df.columns = save_df.columns.str.replace('zscore_', '')
for col in save_df.columns:
    if col in total_cells:
        save_df[col] = save_df[col] / np.sqrt(total_cells[col])

df_out = save_df.transpose()
df_out.columns = df_out.columns.str.replace(r'\.0', '', regex=True)
df_out = df_out[~df_out.index.str.contains('pvalue|count')]
df_out.columns = df_out.columns.str.replace('zscore_', '')

df_out.to_csv('./../../Myeloma_Standal/results/colocalization_csv/250623COZI_grouped_immune_normalized.csv')


In [9]:
df_out

new_column,Adipocytes_Adipocytes,Adipocytes_Endothelial cells,Adipocytes_HSCs,Adipocytes_Immune cells,Adipocytes_MPO+,Adipocytes_Osteoblast lineage,Adipocytes_Osteoclasts,Adipocytes_Osteocyte,Adipocytes_Plasma Cells/MM cells,Adipocytes_Unknown,...,Unknown_Adipocytes,Unknown_Endothelial cells,Unknown_HSCs,Unknown_Immune cells,Unknown_MPO+,Unknown_Osteoblast lineage,Unknown_Osteoclasts,Unknown_Osteocyte,Unknown_Plasma Cells/MM cells,Unknown_Unknown
TS-373_IMC77_B_001.csv,0.054949,-0.024512,-0.026232,-0.057748,0.014985,-0.011259,-0.012124,-0.164186,0.009796,0.061592,...,0.100022,0.077495,0.259313,-0.322964,0.140312,-0.001270,0.213667,0.203864,0.220131,0.574460
cond_cells_percentage_TS-373_IMC77_B_001.csv,0.459770,0.022989,0.000000,0.908046,0.379310,0.011494,0.000000,0.011494,0.195402,0.919540,...,0.113267,0.018148,0.007509,0.795995,0.439925,0.008135,0.006884,0.055069,0.173342,0.923029
TS-373_IMC03_MGUS_002.csv,0.005190,-0.091782,-0.132071,-0.027103,0.002523,-0.035993,-0.006825,-0.134189,0.029947,0.041051,...,0.026412,0.128062,0.048612,-0.104185,0.047474,0.069154,-0.708324,0.210480,0.065099,0.344115
cond_cells_percentage_TS-373_IMC03_MGUS_002.csv,0.240741,0.018519,0.148148,0.907407,0.537037,0.037037,0.000000,0.055556,0.537037,0.759259,...,0.063479,0.090684,0.084913,0.851608,0.516900,0.037098,0.000000,0.085738,0.466612,0.832646
TS-373_IMC04_MGUS_002.csv,0.092674,-0.134992,-0.166576,-0.113323,0.011489,-0.091432,-0.012778,-0.114675,0.066032,0.058970,...,0.111155,0.087349,0.024307,-0.252438,0.077312,0.031167,0.182979,0.388691,0.105496,0.513692
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
cond_cells_percentage_TS-373_IMC53_B_002.csv,0.716374,0.283626,0.090643,0.608187,0.146199,0.035088,0.000000,0.011696,0.967836,0.228070,...,0.280769,0.226923,0.076923,0.657692,0.311538,0.115385,0.003846,0.023077,0.800000,0.611538
TS-373_IMC47_B_002.csv,0.058939,0.041461,-0.130158,-0.067712,0.013603,-0.039793,-0.010097,-0.044875,-0.017220,0.076529,...,0.066281,0.089815,0.075265,-0.069994,0.141143,-0.004703,-0.079356,0.155312,-0.120735,0.373885
cond_cells_percentage_TS-373_IMC47_B_002.csv,0.500000,0.152174,0.043478,0.923913,0.391304,0.021739,0.000000,0.065217,0.739130,0.760870,...,0.165738,0.097493,0.025070,0.850975,0.373259,0.015320,0.001393,0.027855,0.647632,0.817549
TS-373_IMC83_B_001.csv,0.032446,0.027082,-0.151089,-0.070039,0.036588,-0.022594,-0.005878,-0.011633,0.041516,0.086875,...,0.059530,0.021383,0.029439,-0.150473,0.106937,0.075734,-0.023560,-0.102567,-0.007110,0.300203
