In [None]:
import numpy as np
import pandas as pd
import anndata

import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc
import squidpy as sq

from shapely.geometry import Point, Polygon

In [None]:
adata = sc.read_10x_h5(
    filename="cell_feature_matrix.h5"
)

In [None]:
df = pd.read_csv(
    "cells.csv"
)

In [None]:
annot = pd.read_csv(
    "sample-annotation-xenium-explorer-decidua.csv"
)

In [None]:
sc.pl.highest_expr_genes(adata, n_top=30)

In [None]:
sc.pp.calculate_qc_metrics(adata, percent_top=(10, 20, 50, 150), inplace=True)

In [None]:
sc.pl.violin(
    adata,
    ["n_genes_by_counts", "total_counts"],
    jitter=0.4,
    multi_panel=True,
    size=0
)

In [None]:
adata.obs

In [None]:
annot = annot.drop_duplicates()

In [None]:
df.set_index(adata.obs_names, inplace=True)
adata.obs = df.copy()

In [None]:
adata.obsm["spatial"] = adata.obs[["x_centroid", "y_centroid"]].copy().to_numpy()

In [None]:
# process the annotation data to create polygons
regions = {}
for selection, group in annot.groupby('Selection'):
    coords = group[['X', 'Y']].values
    polygon = Polygon(coords)
    regions[selection] = polygon



# initialize a column in adata.obs to store the selection
adata.obs['Selection'] = np.nan

# iterate over cells and assign the selection based on the region
for idx, cell in adata.obs.iterrows():
    cell_coords = Point(cell['x_centroid'], cell['y_centroid'])
    for selection, polygon in regions.items():
        if polygon.contains(cell_coords):
            adata.obs.at[idx, 'Selection'] = selection
            break

In [None]:
# split the 'Selection' column correctly using regular expressions
split_data = adata.obs['Selection'].str.extract(r'(\S+)\s+(\S+)\s+(\S+)')

# rename the columns to 'core', 'id', and 'tissue'
split_data.columns = ['core', 'id', 'tissue']

# assign the new columns to adata.obs
adata.obs[['core', 'id', 'tissue']] = split_data

adata.obs['core'] = pd.to_numeric(adata.obs['core'], errors='coerce')

print(adata.obs[['Selection', 'core', 'id', 'tissue']].head())

In [None]:
adata.obs['core'].unique()

In [None]:
sc.tl.pca(adata)

In [None]:
sc.pl.pca_variance_ratio(adata, n_pcs=50, log=True)

In [None]:
sc.pl.pca(
    adata,
    color=['tissue'],
    dimensions=[(0, 1), (2, 3), (4, 5), (6, 7)],
    ncols=2,
    size=2,
)

In [None]:
adata.obs['Selection'].value_counts()

In [None]:
# remove low count cores
adata = adata[~adata.obs['core'].isin([14,15,16,42,25,26,27,32,38,41])]

In [None]:
adata

In [None]:
cprobes = (
    adata.obs["control_probe_counts"].sum() / adata.obs["total_counts"].sum() * 100
)
cwords = (
    adata.obs["control_codeword_counts"].sum() / adata.obs["total_counts"].sum() * 100
)
print(f"Negative DNA probe count % : {cprobes}")
print(f"Negative decoding count % : {cwords}")

In [None]:
sc.pp.filter_cells(adata, min_counts=10)
sc.pp.filter_genes(adata, min_cells=5)

In [None]:
adata.obs['Selection'].value_counts()

In [None]:
adata.obs['id'].value_counts()

In [None]:
adata.layers["counts"] = adata.X.copy()
sc.pp.normalize_total(adata, inplace=True, target_sum=None)
sc.pp.log1p(adata)
sc.pp.scale(adata)

In [None]:
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)

In [None]:
X = adata.obsm['X_pca']

for r in (0.3, 0.5, 0.7):
    key = f'leiden_r{r:.1f}'
    sc.tl.leiden(adata, neighbors_key='wnn', resolution=r, key_added=key, random_state=0)

    y = adata.obs[key].to_numpy()
    s = silhouette_score(X, y, metric='cosine')
    print(f"{key}: n_clusters={np.unique(y).size}  silhouette={s:.4f}")

    # UMAP
    sc.pl.umap(adata, color=key, legend_loc='on data')

    adata.obs[key] = adata.obs[key].astype('category')
    sc.tl.rank_genes_groups(adata, groupby=key, method='wilcoxon')
    sc.pl.rank_genes_groups_dotplot(adata, n_genes=5, standard_scale='var',
                                    swap_axes=True, dendrogram=False)

In [None]:
adata.obs['leiden'] = adata.obs['leiden_r0.5'].copy()

In [None]:
sc.pl.umap(
    adata,
    color=[
        "total_counts",
        #"n_genes_by_counts",
        "leiden",
    ],
    wspace=0.4, ncols=1
)

In [None]:
sc.pl.umap(
    adata,
    color=[
        "total_counts",
        #"n_genes_by_counts",
        "leiden",
    ],
    wspace=0.4, ncols=1
)

In [None]:
sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon')
result = adata.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
sc.pl.dotplot(adata, var_names=['HLA-G', 'ARG1', 'IL1RL1','IL1R2','CD68','IGF1','PGR','ESR1', 'IGFBP5', 'ACTA2', 'CD3E',
               'CD3D', 'CD14', 'SPP1',  'PTGER2', 'THY1', 'FOLR2',
               'FLT1', 'XCL1', 'CD4', 'CD8A', 'COL18A1',"XCR1",'SELL','VWF','EPCAM','CRISPLD2','MYH11','MYLK','CSF2RB',
                               'EPOR',
                               'PTPRC','CD83','CD19','MS4A1','CCL21','CD1C','CLEC10A','MS4A2','PCOLCE','SNTN','CD83'],
              groupby='leiden'
              ,standard_scale="var"
             )

In [None]:
pd.crosstab(adata.obs['leiden'], adata.obs['tissue'])

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=10, dendrogram=False,standard_scale="var",
                                swap_axes=True)

In [None]:
new_cluster_names = {
    "0": "NK", "1": "FIB", "2": "MYELO",
    "3": "FIB", "4": "EPI",
    "5": "FIB", "6": "EPI",
    "7": "FIB", "8": "ENDO",
    "9": "EVT", "10": "EPI", "11": "MURAL", "12": "low_q", "13": "TCELL",
    '14':'BCELL','15':'LYMPH','16':'DC2','17':'MAST','18':'FIB','19':'DC1',
    '20':'EPI'
}

adata.obs['celltype'] = adata.obs.leiden.astype("str").values
adata.obs.celltype = adata.obs.celltype.replace(new_cluster_names)
adata.obs.celltype = adata.obs.celltype.astype("category")

In [None]:
pd.crosstab(adata.obs['celltype'], adata.obs['tissue'])

### subcluster MAC

In [None]:
#subcluster MACs
adata_subset = adata[adata.obs['celltype'].isin(['MYELO','DC1','DC2']),:]

# Remove unused categories from 'celltype_wnn' in mdata_myelo
adata_subset.obs['celltype'] = adata_subset.obs['celltype'].cat.remove_unused_categories()

In [None]:
adata_subset

In [None]:
sc.pp.neighbors(adata_subset)
sc.tl.umap(adata_subset)

In [None]:
X = adata.obsm['X_pca']

for r in (0.3, 0.5, 0.7):
    key = f'subcluster_leiden_r{r:.1f}'
    sc.tl.leiden(adata_subset, neighbors_key='wnn', resolution=r, key_added=key, random_state=0)

    y = adata_subset.obs[key].to_numpy()
    s = silhouette_score(X, y, metric='cosine')
    print(f"{key}: n_clusters={np.unique(y).size}  silhouette={s:.4f}")

    # UMAP
    sc.pl.umap(adata_subset, color=key, legend_loc='on data')

    adata_subset.obs[key] = adata_subset.obs[key].astype('category')
    sc.tl.rank_genes_groups(adata_subset, groupby=key, method='wilcoxon')
    sc.pl.rank_genes_groups_dotplot(adata_subset, n_genes=5, standard_scale='var',
                                    swap_axes=True, dendrogram=False)

In [None]:
adata_subset.obs['leiden_subcluster'] = adata_subset.obs['subcluster_leiden_r0.3'].copy()

In [None]:
sc.pl.dotplot(adata_subset, var_names=['CD14','MKI67',
                                      'PTPRC','FOLR2','SPP1','NLRP3','TNF','LUCAT1','SELL','C1QA',
                                      'WARS','CTSD','CSF2RB','CD209','CD163','LYVE1','CD28','CXCL10','CXCL9',
                                      'MARCO','MMP9','FLT1','CSF3','IL4I1','MRC1','CXCR4','ENTPD1','CXCL2',
                                      'VSIG4','CD1C','XCR1','VCAN','IL10','CD19',
                                     'PTGS2',
                                      'CXCL2','IDO1','APOBEC3A','FCGR3A',
                                      'CCR7','CD19'],
              groupby='leiden_subcluster'
              ,standard_scale="var"
             )

In [None]:
sc.tl.dendrogram(adata_subset, groupby='leiden_subcluster')
sc.pl.dotplot(adata_subset, var_names=['PTPRC','CD14','ARG1','HIF1A','PGR','IL1R2','MKI67',
                                      'PTPRC','FOLR2','SPP1','NLRP3','TNF','LUCAT1','SELL','C1QA',
                                      'WARS','CTSD','CSF2RB','IL10','HLA-G','FSTL3','CCL19','CD83',
                                      'CSF3','MS4A1','CD19','SPIB','LILRA4','total_counts'],
              groupby='leiden_subcluster'
              ,standard_scale="var", dendrogram=True
             )

In [None]:
sc.tl.rank_genes_groups(adata_subset, 'leiden_subcluster', method='wilcoxon')
result = adata_subset.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
pd.crosstab(adata_subset.obs['leiden_subcluster'], adata_subset.obs['tissue'])

In [None]:
adata_subset.obs

In [None]:
#more markers (MMP9 not detected on protein)
sc.pl.rank_genes_groups_dotplot(adata_subset, n_genes=5, dendrogram=False,standard_scale="var",
                                swap_axes=True)

In [None]:
#change name of decPAM to TR_MAC
new_cluster_names = {
    "0": "decPAM1", "1": "decPAM1", "2": "decBAM",
    "3": "DC2", "4": "pMac",
    "5": "decPAM2", "6": "DC1",
    "7": "PLASMA", "8": "FIB"
}

adata_subset.obs['celltype_lvl2'] = adata_subset.obs.leiden_subcluster.astype("str").values
adata_subset.obs.celltype_lvl2 = adata_subset.obs.celltype_lvl2.replace(new_cluster_names)
adata_subset.obs.celltype_lvl2 = adata_subset.obs.celltype_lvl2.astype("category")

In [None]:
adata_subset.uns.pop('celltype_lvl2_colors', None)

In [None]:
sq.pl.spatial_scatter(
    adata_subset,
    library_id="spatial",
    shape=None,
    color=['celltype_lvl2'],
    wspace=0.1, 
    ncols=1, 
    figsize=(20, 10),
    size=8
)

In [None]:
pd.crosstab(adata_subset.obs['celltype_lvl2'], adata_subset.obs['Selection'])
pd.crosstab(adata_subset.obs['celltype_lvl2'], adata_subset.obs['tissue'])

In [None]:
adata.obs['celltype_lvl2']=adata.obs['celltype']

In [None]:
adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].astype('category')
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].astype('category')

# Combine unique categories from both
combined_categories = pd.Index(adata.obs['celltype_lvl2'].cat.categories).union(
    pd.Index(adata_subset.obs['celltype_lvl2'].cat.categories))

# Set the combined categories to both
adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].cat.set_categories(combined_categories)
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].cat.set_categories(combined_categories)

# Identify common indices
common_indices = adata_subset.obs.index.intersection(adata.obs.index)

# Transfer annotations only for the common indices
adata.obs.loc[common_indices, 'celltype_lvl2'] = adata_subset.obs.loc[common_indices, 'celltype_lvl2']

In [None]:
adata.uns.pop('celltype_lvl2_colors', None)
sq.pl.spatial_scatter(
    adata,
    library_id="spatial",
    shape=None,
    color=['celltype_lvl2'],
    wspace=0.1, 
    ncols=1, 
    figsize=(20, 10),
    size=8
)

### subcluster DC2

In [None]:
#subcluster DC
adata_subset = adata[adata.obs['celltype_lvl2'].isin(['DC2']),:]

adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].cat.remove_unused_categories()

In [None]:
adata_subset

In [None]:
sc.pp.neighbors(adata_subset)
sc.tl.umap(adata_subset)

In [None]:
X = adata.obsm['X_pca']

for r in (0.3, 0.5, 0.7):
    key = f'subcluster_leiden_r{r:.1f}'
    sc.tl.leiden(adata_subset, neighbors_key='wnn', resolution=r, key_added=key, random_state=0)

    y = adata_subset.obs[key].to_numpy()
    s = silhouette_score(X, y, metric='cosine')
    print(f"{key}: n_clusters={np.unique(y).size}  silhouette={s:.4f}")

    # UMAP
    sc.pl.umap(adata_subset, color=key, legend_loc='on data')

    adata_subset.obs[key] = adata_subset.obs[key].astype('category')
    sc.tl.rank_genes_groups(adata_subset, groupby=key, method='wilcoxon')
    sc.pl.rank_genes_groups_dotplot(adata_subset, n_genes=5, standard_scale='var',
                                    swap_axes=True, dendrogram=False)

In [None]:
adata_subset.obs['leiden_subcluster'] = adata_subset.obs['subcluster_leiden_r0.3'].copy()

In [None]:
sc.pl.dotplot(adata_subset, var_names=['CD14','MKI67','CLEC10A','cell_area','CD1C',
                                      'PDGFRA','SELL','FCN1','AQP9','S100A12','CCR2','FCGR3A',
                                      'total_counts'],
              groupby='leiden_subcluster'
              ,standard_scale="var"
              ,swap_axes=True
             )

In [None]:
sc.tl.rank_genes_groups(adata_subset, 'leiden_subcluster', method='wilcoxon')
result = adata_subset.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
pd.crosstab(adata_subset.obs['leiden_subcluster'], adata_subset.obs['Selection'])

In [None]:
adata_subset.uns.pop('leiden_subcluster_colors', None)
sq.pl.spatial_scatter(
    adata_subset,
    library_id="spatial",
    shape=None,
    color=['leiden_subcluster'],
    wspace=0.1, 
    ncols=1, 
    figsize=(20, 10),
    size=8
)

In [None]:
new_cluster_names = {
    "0": "Mono", "1": "low_q", "2": "DC2"
}

adata_subset.obs['celltype_lvl2'] = adata_subset.obs.leiden_subcluster.astype("str").values
adata_subset.obs.celltype_lvl2 = adata_subset.obs.celltype_lvl2.replace(new_cluster_names)
adata_subset.obs.celltype_lvl2 = adata_subset.obs.celltype_lvl2.astype("category")

In [None]:
pd.crosstab(adata_subset.obs['celltype_lvl2'], adata_subset.obs['Selection'])

In [None]:
adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].astype('category')
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].astype('category')

# Combine unique categories from both
combined_categories = pd.Index(adata.obs['celltype_lvl2'].cat.categories).union(
    pd.Index(adata_subset.obs['celltype_lvl2'].cat.categories))

# Set the combined categories to both
adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].cat.set_categories(combined_categories)
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].cat.set_categories(combined_categories)

# Identify common indices
common_indices = adata_subset.obs.index.intersection(adata.obs.index)

# Transfer annotations only for the common indices
adata.obs.loc[common_indices, 'celltype_lvl2'] = adata_subset.obs.loc[common_indices, 'celltype_lvl2']

In [None]:
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].cat.remove_unused_categories()

In [None]:
adata.uns.pop('celltype_lvl2_colors', None)
sq.pl.spatial_scatter(
    adata,
    library_id="spatial",
    shape=None,
    color=['celltype', 'celltype_lvl2'],
    wspace=0.1, 
    ncols=1, 
    figsize=(20, 10),
    size=8
)

In [None]:
pd.crosstab(adata.obs['celltype_lvl2'], adata.obs['Selection'])

### subcluster NK/Tcell

In [None]:
adata_subset = adata[adata.obs['celltype_lvl2'].isin(['NK']),:]

adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].cat.remove_unused_categories()

In [None]:
adata_subset

In [None]:
sc.pp.neighbors(adata_subset)
sc.tl.umap(adata_subset)

In [None]:
X = adata.obsm['X_pca']

for r in (0.3, 0.5, 0.7):
    key = f'subcluster_leiden_r{r:.1f}'
    sc.tl.leiden(adata_subset, neighbors_key='wnn', resolution=r, key_added=key, random_state=0)

    y = adata_subset.obs[key].to_numpy()
    s = silhouette_score(X, y, metric='cosine')
    print(f"{key}: n_clusters={np.unique(y).size}  silhouette={s:.4f}")

    # UMAP
    sc.pl.umap(adata_subset, color=key, legend_loc='on data')

    adata_subset.obs[key] = adata_subset.obs[key].astype('category')
    sc.tl.rank_genes_groups(adata_subset, groupby=key, method='wilcoxon')
    sc.pl.rank_genes_groups_dotplot(adata_subset, n_genes=5, standard_scale='var',
                                    swap_axes=True, dendrogram=False)

In [None]:
adata_subset.obs['leiden_subcluster'] = adata_subset.obs['subcluster_leiden_r0.3'].copy()

In [None]:
sc.pl.dotplot(adata_subset, var_names=['MKI67','CD3D','CD3E','GNLY','IL7R','CTLA4','FOXP3',
                                      'CD68','CD14','CD4','CD8A','XCL1','SPP1','FOLR2','MARCO','MMP9',
                                      'CXCR4','CD2','THY1','PTPRC','FSTL3','TRAC','LILRB1','GZMB',
                                      'GZMA','PRF1','ITGAM','ITGAE','ENTPD1','KLRD1','KLRB1',
                                      'FCGR3A','SELL','KIR2DL4','CCL5','C1QA','MRC1'],
              groupby='leiden_subcluster'
              ,standard_scale="var"
             )

In [None]:
sc.pl.dotplot(adata_subset, var_names=['ENTPD1','XCL1','PECAM1','VWF','CD34','CD3E','CD14','CD68',
                                      'GZMB','PRF1','CXCL9','CXCL10','NKG7','total_counts','FCGR3A'],
              groupby='leiden_subcluster'
              ,standard_scale="var"
             )

In [None]:
sc.pl.violin(adata_subset, keys='total_counts', groupby='leiden_subcluster', size=0)

In [None]:
sc.tl.dendrogram(adata_subset, groupby='leiden_subcluster')
sc.pl.dotplot(adata_subset, var_names=['MKI67','CD3D','CD3E','GNLY','IL7R','CTLA4','FOXP3',
                                      'CD68','CD14','CD4','CD8A','XCL1','SPP1','FOLR2','MARCO','MMP9',
                                      'CXCR4','CD2','THY1','PTPRC','FSTL3','TRAC','LILRB1','GZMB',
                                      'GZMA','PRF1','ITGAM','ITGAE','ENTPD1'],
              groupby='leiden_subcluster'
              ,standard_scale="var", dendrogram=True, swap_axes=False
             )

In [None]:
sc.tl.rank_genes_groups(adata_subset, 'leiden_subcluster', method='wilcoxon')
result = adata_subset.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
pd.crosstab(adata_subset.obs['leiden_subcluster'], adata_subset.obs['tissue'])

In [None]:
sc.pl.rank_genes_groups_dotplot(adata_subset, n_genes=5, dendrogram=False,standard_scale="var",
                                swap_axes=True)

In [None]:
new_cluster_names = {
    "0": "NK_CD39+", "1": "NK_CD39-", "2": "NK_CD39+",
    "3": "NKp",'4':'NK_CD39+','5':'NK_CD39-'
}

adata_subset.obs['celltype_lvl2'] = adata_subset.obs.leiden_subcluster.astype("str").values
adata_subset.obs.celltype_lvl2 = adata_subset.obs.celltype_lvl2.replace(new_cluster_names)
adata_subset.obs.celltype_lvl2 = adata_subset.obs.celltype_lvl2.astype("category")

In [None]:
pd.crosstab(adata_subset.obs['celltype_lvl2'], adata_subset.obs['Selection'])

In [None]:
pd.crosstab(adata_subset.obs['celltype_lvl2'], adata_subset.obs['tissue'])

In [None]:
pd.crosstab(adata_subset.obs['celltype_lvl2'], adata_subset.obs['Selection'])

In [None]:
adata_subset.uns.pop('celltype_lvl2_colors', None)

In [None]:
sq.pl.spatial_scatter(
    adata_subset,
    library_id="spatial",
    shape=None,
    color=['celltype_lvl2'],
    wspace=0.1, 
    ncols=1, 
    figsize=(20, 10),
    size=8
)

In [None]:
adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].astype('category')
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].astype('category')

# combine unique categories from both
combined_categories = pd.Index(adata.obs['celltype_lvl2'].cat.categories).union(
    pd.Index(adata_subset.obs['celltype_lvl2'].cat.categories))

# set the combined categories to both
adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].cat.set_categories(combined_categories)
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].cat.set_categories(combined_categories)

# identify common indices
common_indices = adata_subset.obs.index.intersection(adata.obs.index)

# transfer annotations only for the common indices
adata.obs.loc[common_indices, 'celltype_lvl2'] = adata_subset.obs.loc[common_indices, 'celltype_lvl2']

In [None]:
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].cat.remove_unused_categories()

In [None]:
pd.crosstab(adata.obs['celltype_lvl2'], adata.obs['tissue'])

## subcluster T cells

In [None]:
#subcluster T cells
adata_subset = adata[adata.obs['celltype_lvl2'].isin(['TCELL']),:]

# Remove unused categories from 'celltype_wnn' in mdata_myelo
adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].cat.remove_unused_categories()

In [None]:
adata_subset

In [None]:
sc.pp.neighbors(adata_subset)
sc.tl.umap(adata_subset)

In [None]:
X = adata.obsm['X_pca']

for r in (0.3, 0.5, 0.7):
    key = f'subcluster_leiden_r{r:.1f}'
    sc.tl.leiden(adata_subset, neighbors_key='wnn', resolution=r, key_added=key, random_state=0)

    y = adata_subset.obs[key].to_numpy()
    s = silhouette_score(X, y, metric='cosine')
    print(f"{key}: n_clusters={np.unique(y).size}  silhouette={s:.4f}")

    # UMAP
    sc.pl.umap(adata_subset, color=key, legend_loc='on data')

    adata_subset.obs[key] = adata_subset.obs[key].astype('category')
    sc.tl.rank_genes_groups(adata_subset, groupby=key, method='wilcoxon')
    sc.pl.rank_genes_groups_dotplot(adata_subset, n_genes=5, standard_scale='var',
                                    swap_axes=True, dendrogram=False)

In [None]:
adata_subset.obs['leiden_subcluster'] = adata_subset.obs['subcluster_leiden_r0.3'].copy()

In [None]:
adata_subset.uns.pop('leiden_subcluster_colors', None)

In [None]:
sc.pl.dotplot(adata_subset, var_names=['MKI67','PTPRC',
                                      'CD4','CD8A','FOXP3','CTLA4','CD14','CD3E','THY1','PDGFRA','PDCD1',
                                      'ARG1','SLC4A1','CD1C','XCR1','NKG7','GNLY','ENTPD1','XCL1'],
              groupby='leiden_subcluster'
              ,standard_scale="var"
             )

In [None]:
sc.tl.rank_genes_groups(adata_subset, 'leiden_subcluster', method='wilcoxon')
result = adata_subset.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
pd.crosstab(adata_subset.obs['leiden_subcluster'], adata_subset.obs['Selection'])
pd.crosstab(adata_subset.obs['leiden_subcluster'], adata_subset.obs['tissue'])

In [None]:
sc.pl.rank_genes_groups_dotplot(adata_subset, n_genes=5, dendrogram=False,standard_scale="var",
                                swap_axes=True)

In [None]:
new_cluster_names = {
    "0": "TCELL_CD4+", "1": "TCELL_CD8+", "2": "OTHER",'3':'OTHER','4':'OTHER','5':'Treg','6':'OTHER'
}

adata_subset.obs['celltype_lvl2'] = adata_subset.obs.leiden_subcluster.astype("str").values
adata_subset.obs.celltype_lvl2 = adata_subset.obs.celltype_lvl2.replace(new_cluster_names)
adata_subset.obs.celltype_lvl2 = adata_subset.obs.celltype_lvl2.astype("category")

In [None]:
adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].astype('category')
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].astype('category')

# Combine unique categories from both
combined_categories = pd.Index(adata.obs['celltype_lvl2'].cat.categories).union(
    pd.Index(adata_subset.obs['celltype_lvl2'].cat.categories))

# Set the combined categories to both
adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].cat.set_categories(combined_categories)
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].cat.set_categories(combined_categories)

# Identify common indices
common_indices = adata_subset.obs.index.intersection(adata.obs.index)

# Transfer annotations only for the common indices
adata.obs.loc[common_indices, 'celltype_lvl2'] = adata_subset.obs.loc[common_indices, 'celltype_lvl2']

In [None]:
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].cat.remove_unused_categories()

### subcluster low_q

In [None]:
adata_subset = adata[adata.obs['celltype'].isin(['low_q']),:]

adata_subset.obs['celltype'] = adata_subset.obs['celltype'].cat.remove_unused_categories()

In [None]:
adata_subset

In [None]:
sc.pp.neighbors(adata_subset)
sc.tl.umap(adata_subset)

In [None]:
X = adata.obsm['X_pca']

for r in (0.3, 0.5, 0.7):
    key = f'subcluster_leiden_r{r:.1f}'
    sc.tl.leiden(adata_subset, neighbors_key='wnn', resolution=r, key_added=key, random_state=0)

    y = adata_subset.obs[key].to_numpy()
    s = silhouette_score(X, y, metric='cosine')
    print(f"{key}: n_clusters={np.unique(y).size}  silhouette={s:.4f}")

    # UMAP
    sc.pl.umap(adata_subset, color=key, legend_loc='on data')

    adata_subset.obs[key] = adata_subset.obs[key].astype('category')
    sc.tl.rank_genes_groups(adata_subset, groupby=key, method='wilcoxon')
    sc.pl.rank_genes_groups_dotplot(adata_subset, n_genes=5, standard_scale='var',
                                    swap_axes=True, dendrogram=False)

In [None]:
adata_subset.obs['leiden_subcluster'] = adata_subset.obs['subcluster_leiden_r0.3'].copy()

In [None]:
sc.tl.rank_genes_groups(adata_subset, 'leiden_subcluster', method='wilcoxon')
result = adata_subset.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
pd.crosstab(adata_subset.obs['leiden_subcluster'], adata_subset.obs['tissue'])

In [None]:
adata_subset.uns.pop('leiden_subcluster_colors', None)
sq.pl.spatial_scatter(
    adata_subset,
    library_id="spatial",
    shape=None,
    color=['leiden_subcluster'],
    wspace=0.1, 
    ncols=1, 
    figsize=(20, 10),
    size=8
)

In [None]:
sc.tl.dendrogram(adata_subset, groupby='leiden_subcluster')
sc.pl.dotplot(adata_subset, var_names=['MKI67','MMP9','CXCL9','CXCL10','NLRP3','LUCAT1','SPP1',
                                      'AQP9','APOBEC3A','ARG1'],
              groupby='leiden_subcluster'
              ,standard_scale="var"
              ,swap_axes=False
              ,dendrogram=False
             )

In [None]:
new_cluster_names = {
    "0": "OTHER", "1": "OTHER", "2": "NEUTRO",'3':'OTHER','4':'OTHER','5':'OTHER','6':'OTHER','7':'OTHER',
    '8':'OTHER','9':'OTHER'
}

adata_subset.obs['celltype_lvl2'] = adata_subset.obs.leiden_subcluster.astype("str").values
adata_subset.obs.celltype_lvl2 = adata_subset.obs.celltype_lvl2.replace(new_cluster_names)
adata_subset.obs.celltype_lvl2 = adata_subset.obs.celltype_lvl2.astype("category")

In [None]:
pd.crosstab(adata_subset.obs['celltype_lvl2'], adata_subset.obs['Selection'])

In [None]:
pd.crosstab(adata_subset.obs['celltype_lvl2'], adata_subset.obs['tissue'])

In [None]:
adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].astype('category')
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].astype('category')

combined_categories = pd.Index(adata.obs['celltype_lvl2'].cat.categories).union(
    pd.Index(adata_subset.obs['celltype_lvl2'].cat.categories))

adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].cat.set_categories(combined_categories)
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].cat.set_categories(combined_categories)

common_indices = adata_subset.obs.index.intersection(adata.obs.index)


adata.obs.loc[common_indices, 'celltype_lvl2'] = adata_subset.obs.loc[common_indices, 'celltype_lvl2']

In [None]:
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].cat.remove_unused_categories()

In [None]:
adata.uns.pop('celltype_lvl2_colors', None)
sq.pl.spatial_scatter(
    adata,
    library_id="spatial",
    shape=None,
    color=['celltype', 'celltype_lvl2'],
    wspace=0.1, 
    ncols=1, 
    figsize=(20, 10),
    size=8
)

In [None]:
pd.crosstab(adata.obs['celltype_lvl2'], adata.obs['tissue'])

### subcluster decBAM

In [None]:
adata_subset = adata[adata.obs['celltype_lvl2'].isin(['decBAM']),:]

adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].cat.remove_unused_categories()

In [None]:
adata_subset

In [None]:
sc.pp.neighbors(adata_subset)
sc.tl.umap(adata_subset)

In [None]:
X = adata.obsm['X_pca']

for r in (0.3, 0.5, 0.7):
    key = f'subcluster_leiden_r{r:.1f}'
    sc.tl.leiden(adata_subset, neighbors_key='wnn', resolution=r, key_added=key, random_state=0)

    y = adata_subset.obs[key].to_numpy()
    s = silhouette_score(X, y, metric='cosine')
    print(f"{key}: n_clusters={np.unique(y).size}  silhouette={s:.4f}")

    # UMAP
    sc.pl.umap(adata_subset, color=key, legend_loc='on data')

    adata_subset.obs[key] = adata_subset.obs[key].astype('category')
    sc.tl.rank_genes_groups(adata_subset, groupby=key, method='wilcoxon')
    sc.pl.rank_genes_groups_dotplot(adata_subset, n_genes=5, standard_scale='var',
                                    swap_axes=True, dendrogram=False)

In [None]:
adata_subset.obs['leiden_subcluster'] = adata_subset.obs['subcluster_leiden_r0.3'].copy()

In [None]:
sc.tl.rank_genes_groups(adata_subset, 'leiden_subcluster', method='wilcoxon')
result = adata_subset.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
pd.crosstab(adata_subset.obs['leiden_subcluster'], adata_subset.obs['tissue'])

In [None]:
adata_subset.uns.pop('leiden_subcluster_colors', None)
sq.pl.spatial_scatter(
    adata_subset,
    library_id="spatial",
    shape=None,
    color=['leiden_subcluster'],
    wspace=0.1, 
    ncols=1, 
    figsize=(20, 10),
    size=8
)

In [None]:
sc.tl.dendrogram(adata_subset, groupby='leiden_subcluster')
sc.pl.dotplot(adata_subset, var_names=['MKI67','MMP9','CXCL9','CXCL10','NLRP3','LUCAT1','SPP1',
                                      'AQP9','APOBEC3A','ARG1','SPP1','C1QA','FOLR2','CD209','MARCO',
                                      'MRC1','CD163','CD14','CTSD','CXCL2','SEMA3C','C15orf48','S100A12',
                                      'TNF'],
              groupby='leiden_subcluster'
              ,standard_scale="var"
              ,swap_axes=False
              ,dendrogram=False
             )

In [None]:
new_cluster_names = {
    "0": "decPAM1", "1": "decBAM1", "2": "decBAM2",'3':'decPAM1'
}

adata_subset.obs['celltype_lvl2'] = adata_subset.obs.leiden_subcluster.astype("str").values
adata_subset.obs.celltype_lvl2 = adata_subset.obs.celltype_lvl2.replace(new_cluster_names)
adata_subset.obs.celltype_lvl2 = adata_subset.obs.celltype_lvl2.astype("category")

In [None]:
pd.crosstab(adata_subset.obs['celltype_lvl2'], adata_subset.obs['Selection'])

In [None]:
pd.crosstab(adata_subset.obs['celltype_lvl2'], adata_subset.obs['tissue'])

In [None]:
adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].astype('category')
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].astype('category')

# combine unique categories from both
combined_categories = pd.Index(adata.obs['celltype_lvl2'].cat.categories).union(
    pd.Index(adata_subset.obs['celltype_lvl2'].cat.categories))

# set the combined categories to both
adata_subset.obs['celltype_lvl2'] = adata_subset.obs['celltype_lvl2'].cat.set_categories(combined_categories)
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].cat.set_categories(combined_categories)

# identify common indices
common_indices = adata_subset.obs.index.intersection(adata.obs.index)

# transfer annotations only for the common indices
adata.obs.loc[common_indices, 'celltype_lvl2'] = adata_subset.obs.loc[common_indices, 'celltype_lvl2']

In [None]:
adata.obs['celltype_lvl2'] = adata.obs['celltype_lvl2'].cat.remove_unused_categories()

In [None]:
adata.uns.pop('celltype_lvl2_colors', None)
sq.pl.spatial_scatter(
    adata,
    library_id="spatial",
    shape=None,
    color=['celltype', 'celltype_lvl2'],
    wspace=0.1, 
    ncols=1, 
    figsize=(20, 10),
    size=8
)

In [None]:
pd.crosstab(adata.obs['celltype_lvl2'], adata.obs['Selection'])

In [None]:
sc.tl.rank_genes_groups(adata, 'celltype_lvl2', method='wilcoxon')
result = adata.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
sc.tl.dendrogram(adata, groupby='celltype_lvl2')

In [None]:
sc.pl.rank_genes_groups_dotplot(adata, n_genes=10, dendrogram=True,standard_scale="var",
                                swap_axes=True)

In [None]:
new_cluster_names = {
    "LYMPH": "Lymph", "ENDO": "Endo", "MURAL": "Mural", "MAST": "Mast",
    "EVT": "EVT",'Mono':'Mono','NEUTRO':'Neutro','DC2':'DC2','low_q':'Other',
    'DC1':'DC1','pMac':'pMac','decPAM2':'decPAM2',
    'PLASMA':'decPAM1','decBAM1':'decBAM1',
    'decBAM2':'decBAM2','TCELL_CD4+':'Tcell_CD4+','TCELL_CD8+':'Tcell_CD8+','Treg':'Treg'
    ,'decPAM1':'decPAM1','BCELL':'B cell','OTHER':'Other',
    'NKp':'pNK','NK_CD39+':'NK_CD39+','NK_CD39-':'NK_CD39-','FIB':'Fib'
    
}

adata.obs['celltype_lvl1'] = adata.obs.celltype_lvl2.astype("str").values
adata.obs.celltype_lvl1 = adata.obs.celltype_lvl1.replace(new_cluster_names)
adata.obs.celltype_lvl1 = adata.obs.celltype_lvl1.astype("category")

In [None]:
sc.tl.rank_genes_groups(adata, 'celltype_lvl1', method='wilcoxon')
result = adata.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
#sc.tl.dendrogram(adata, groupby='celltype_lvl1')
sc.pl.dotplot(adata, var_names=['total_counts','MKI67','CD3D','CD3E','GNLY','IL7R','CTLA4','FOXP3',
                                      'CD68','CD14','CD4','CD8A','XCL1','SPP1','FOLR2','MARCO','MMP9',
                                      'CXCR4','CD2','THY1','PTPRC','FSTL3','TRAC','LILRB1','GZMB',
                                      'GZMA','PRF1','ITGAM','ITGAE','ENTPD1','HIF1A'],
              groupby='celltype_lvl1'
              ,standard_scale="var", dendrogram=False, swap_axes=False
             )

In [None]:
sc.pl.dotplot(adata, var_names=['HIF1A','CD209','C1QA','EPO','EPOR','FLT1','FOLR2','IDO1',
                               'ISG15','IL10','SPP1','ITGB1','LILRB1','LUCAT1','NLRP3','MTOR',
                               'PTGER2','PTGES','PTGS2','RICTOR','RPTOR','TGFB1','VEGFA','XCL1','WARS','XCR1',
                               'TNF','RPIA','CXCL2'],
              groupby='celltype_lvl1'
              ,standard_scale="var", dendrogram=False, swap_axes=True
             )

In [None]:
sc.pl.dotplot(adata, var_names=['HIF1A','CD209','C1QA','EPO','EPOR','FLT1','FOLR2','IDO1',
                               'ISG15','IL10','SPP1','ITGB1','LILRB1','LUCAT1','NLRP3','MTOR',
                               'PTGER2','PTGES','PTGS2','RICTOR','RPTOR','TGFB1','VEGFA','XCL1','WARS','XCR1',
                               'TNF','RPIA','CXCL2'],
              groupby='celltype_lvl1'
              ,standard_scale="var", dendrogram=False, swap_axes=True
             )

## add peri/endovascular mask

In [None]:
perivasc = pd.read_csv(
    "coords-perivascular-xenium.csv", header=2
)

In [None]:
perivasc = perivasc.drop_duplicates()

In [None]:
# create polygons from csv file
regions = {}
for selection, group in perivasc.groupby('Selection'):
    coords = group[['X', 'Y']].values
    polygon = Polygon(coords)
    regions[selection] = polygon

# initialize boolean
adata.obs['perivascular'] = False

# assign cells within polygons to perivascular = True
for idx, cell in adata.obs.iterrows():
    cell_coords = Point(cell['x_centroid'], cell['y_centroid'])
    for selection, polygon in regions.items():
        if polygon.contains(cell_coords):
            adata.obs.at[idx, 'perivascular'] = True
            break

In [None]:
sum(adata.obs['perivascular'])

In [None]:
vasc = pd.read_csv(
    "coords-vascular-decidua.csv", header=2
)

vasc = vasc.drop_duplicates()

# create polygons from csv file
regions = {}
for selection, group in vasc.groupby('Selection'):
    coords = group[['X', 'Y']].values
    polygon = Polygon(coords)
    regions[selection] = polygon

# initialize boolean
adata.obs['vascular'] = False

# assign cells within polygons to perivascular = True
for idx, cell in adata.obs.iterrows():
    cell_coords = Point(cell['x_centroid'], cell['y_centroid'])
    for selection, polygon in regions.items():
        if polygon.contains(cell_coords):
            adata.obs.at[idx, 'vascular'] = True
            break

sum(adata.obs['vascular'])

In [None]:
endovasc = pd.read_csv(
    "coords-endovascular-decidua-xenium.csv", header=2
)

endovasc = endovasc.drop_duplicates()

# create polygons from csv file
regions = {}
for selection, group in endovasc.groupby('Selection'):
    coords = group[['X', 'Y']].values
    polygon = Polygon(coords)
    regions[selection] = polygon

# initialize boolean
adata.obs['endovasc'] = False

# assign cells within polygons to perivascular = True
for idx, cell in adata.obs.iterrows():
    cell_coords = Point(cell['x_centroid'], cell['y_centroid'])
    for selection, polygon in regions.items():
        if polygon.contains(cell_coords):
            adata.obs.at[idx, 'endovasc'] = True
            break

sum(adata.obs['endovasc'])


In [None]:
pd.crosstab(adata.obs['celltype_lvl2'], adata.obs['perivascular'])


In [None]:
pd.crosstab(adata.obs['celltype_lvl2'], adata.obs['endovasc'])


In [None]:
pd.crosstab(adata.obs['Selection'], adata.obs['endovasc'])


In [None]:
adata_subset = adata[adata.obs['core'].isin([48])]


In [None]:
adata_subset.obs


In [None]:
adata_subset_EVT = adata_subset[adata_subset.obs['celltype_lvl1'].isin(['EVT'])]


In [None]:
adata_subset_EVT


In [None]:
adata_subset_EVT.obs['endovasc'] = adata_subset_EVT.obs['endovasc'].astype('category')


In [None]:
sc.tl.rank_genes_groups(adata_subset_EVT, 'endovasc', method='t-test_overestim_var')
result = adata_subset_EVT.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
sc.pl.dotplot(adata, var_names=['LAMA4','MMP12','GPC3','ENTPD1','B3GNT2','C15orf48','TFPI','GATA2','PPARG'],
              groupby='celltype_lvl1'
              ,standard_scale="var", dendrogram=False, swap_axes=True
             )

In [None]:
adata[adata.obs['celltype_lvl1'].isin(['EVT','iEVT'])].obs


In [None]:
adata_EVT = adata[adata.obs['celltype_lvl1'].isin(['EVT'])]
adata_EVT = adata_EVT[adata_EVT.obs['endovasc'].isin([True])]


In [None]:
new_cluster_names = {
    "EVT":"eEVT"
}


adata_EVT.obs['celltype_lvl1'] = adata_EVT.obs.celltype_lvl1.astype("str").values
adata_EVT.obs.celltype_lvl1 = adata_EVT.obs.celltype_lvl1.replace(new_cluster_names)
adata_EVT.obs.celltype_lvl1 = adata_EVT.obs.celltype_lvl1.astype("category")
adata.obs["celltype_lvl1"] = adata.obs["celltype_lvl1"].cat.add_categories("eEVT")
for cell_id, celltype_lvl1 in zip(adata_EVT.obs_names, adata_EVT.obs["celltype_lvl1"]):
    if cell_id in adata.obs.index:
        adata.obs.loc[cell_id, "celltype_lvl1"] = celltype_lvl1

In [None]:
adata_EVT.obs

In [None]:
adata.obs

In [None]:
new_cluster_names = {
    "B cell": "B cell", "DC1": "DC1", "DC2": "DC2",
    "EPI": "EPI", "EVT": "iEVT",'eEVT':'eEVT',
    "Endo": "Endo",'Fib':'Fib','Lymph':'Lymph','Mast':'Mast','Mono':'Mono',
    'Mural':'Mural','NK_CD39+':'NK_CD39+','NK_CD39-':'NK_CD39-',
    'Neutro':'Neutro','Other':'Other','Tcell_CD4+':'Tcell_CD4+','Tcell_CD8+':'Tcell_CD8+'
    ,'Treg':'Treg','decPAM2':'decPAM2',
    'decBAM1':'decBAM1','decBAM2':'decBAM2','pMac':'pMac',
    'pNK':'pNK','decPAM1':'decPAM1'
    
}

adata.obs['celltype_lvl1'] = adata.obs.celltype_lvl1.astype("str").values
adata.obs.celltype_lvl1 = adata.obs.celltype_lvl1.replace(new_cluster_names)
adata.obs.celltype_lvl1 = adata.obs.celltype_lvl1.astype("category")

In [None]:
new_cluster_names = {
    "B cell": "B cell", "DC1": "Myeloid", "DC2": "Myeloid",
    "EPI": "EPI", "iEVT": "EVT",'eEVT':'EVT',
    "Endo": "Endo",'Fib':'Fib','Lymph':'Lymph','Mast':'Myeloid','Mono':'Myeloid',
    'Mural':'Mural','NK_CD39+':'NK','NK_CD39-':'NK',
    'Neutro':'Myeloid','Other':'Other','Tcell_CD4+':'T cell','Tcell_CD8+':'T cell'
    ,'Treg':'T cell','decPAM2':'Myeloid',
    'decBAM1':'Myeloid','decBAM2':'Myeloid','pMac':'Myeloid',
    'pNK':'NK','decPAM1':'Myeloid'
    
}

adata.obs['celltype_lvl0'] = adata.obs.celltype_lvl1.astype("str").values
adata.obs.celltype_lvl0 = adata.obs.celltype_lvl0.replace(new_cluster_names)
adata.obs.celltype_lvl0 = adata.obs.celltype_lvl0.astype("category")

In [None]:
new_cluster_names = {
    "B cell": "B cell", "DC1": "DC1", "DC2": "DC2",
    "EPI": "EPI", "iEVT": "EVT",'eEVT':'EVT',
    "Endo": "Endo",'Fib':'Fib','Lymph':'Lymph','Mast':'Mast','Mono':'Mono',
    'Mural':'Mural','NK_CD39+':'NK','NK_CD39-':'NK',
    'Neutro':'Neutro','Other':'Other','Tcell_CD4+':'T cell','Tcell_CD8+':'T cell'
    ,'Treg':'T cell','decPAM2':'Mac',
    'decBAM1':'Mac','decBAM2':'Mac','pMac':'Mac',
    'pNK':'NK','decPAM1':'Mac'
    
}

adata.obs['celltype_lvl05'] = adata.obs.celltype_lvl1.astype("str").values
adata.obs.celltype_lvl05 = adata.obs.celltype_lvl05.replace(new_cluster_names)
adata.obs.celltype_lvl05 = adata.obs.celltype_lvl05.astype("category")

In [None]:
new_cluster_names = {
    "B cell": "Immune", "DC1": "Immune", "DC2": "Immune",
    "EPI": "EPI", "iEVT": "EVT",'eEVT':'EVT',
    "Endo": "Endo",'Fib':'Fib','Lymph':'Lymph','Mast':'Immune','Mono':'Immune',
    'Mural':'Mural','NK_CD39+':'Immune','NK_CD39-':'Immune',
    'Neutro':'Immune','Other':'Other','Tcell_CD4+':'Immune','Tcell_CD8+':'Immune'
    ,'Treg':'Immune','decPAM2':'Immune',
    'decBAM1':'Immune','decBAM2':'Immune','pMac':'Immune',
    'pNK':'Immune','decPAM1':'Immune'
    
}

adata.obs['celltype_lvl00'] = adata.obs.celltype_lvl1.astype("str").values
adata.obs.celltype_lvl00 = adata.obs.celltype_lvl00.replace(new_cluster_names)
adata.obs.celltype_lvl00 = adata.obs.celltype_lvl00.astype("category")

In [None]:
sc.pl.dotplot(adata, var_names=['LAMA4','MMP12','GPC3','ENTPD1','B3GNT2','C15orf48','TFPI','GATA2','PPARG',
                               'FSTL3','FLT1','FBN1','ERVH48-1','MFAP5'],
              groupby='celltype_lvl1'
              ,standard_scale="var"
              #, dendrogram=False
              , swap_axes=True
             )

In [None]:
sc.pl.dotplot(adata, var_names=['LAMA4','MMP12','GPC3','ENTPD1','B3GNT2','C15orf48','TFPI','GATA2','PPARG',
                               'FSTL3','FLT1','FBN1','ERVH48-1','MFAP5'],
              groupby='celltype_lvl0'
              ,standard_scale="var"
              #, dendrogram=False
              , swap_axes=True
             )

In [None]:
sc.tl.rank_genes_groups(adata, 'celltype_lvl1', method='wilcoxon')
result = adata.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
sc.pl.rank_genes_groups_dotplot(
    adata, groupby="celltype_lvl1", standard_scale="var", n_genes=5, swap_axes=True, cmap='Blues',
    dendrogram=False
)

In [None]:
adata.write('xenium-decidua-annotated.h5ad')