#### Manual annotation of Xenium add-on panel healthy gut data 
- **Developed by:** Anna Maguza
- **Affilation:** Faculty of Medicine, WÃ¼rzburg University
- **Created date:** 2nd August 2024
- **Last modified date:** 18th October 2024

This notebook checks the expression of the known cell states markers.

##### Import packages

In [3]:
import squidpy as sq
import numpy as np
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import ListedColormap

In [4]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'magma_r', dpi_save = 300, vector_friendly = True, format = 'svg')

-----
anndata     0.10.5.post1
scanpy      1.9.8
-----
PIL                         10.2.0
anyio                       NA
arrow                       1.3.0
asciitree                   NA
asttokens                   NA
attr                        23.2.0
attrs                       23.2.0
babel                       2.14.0
backcall                    0.2.0
beta_ufunc                  NA
binom_ufunc                 NA
brotli                      1.1.0
certifi                     2023.11.17
cffi                        1.16.0
charset_normalizer          3.3.2
cloudpickle                 3.0.0
comm                        0.2.1
cycler                      0.12.1
cython_runtime              NA
dask                        2024.8.0
dask_image                  2023.08.1
datashader                  0.16.0
datatree                    0.0.13
dateutil                    2.8.2
debugpy                     1.8.0
decorator                   5.1.1
defusedxml                  0.7.1
docrep                   

### Read in data

In [5]:
adata = sc.read_h5ad('data/10X_Xenium_adult_colon_data/Annotation_prediction/Xenium_annotated_181024.h5ad')

In [6]:
fig_dir = 'data/10X_Xenium_adult_colon_data/Manual_annotation/figures/'

+ Visualize predicted annotations

In [None]:
with plt.rc_context():
    sc.set_figure_params(dpi=300, figsize=(15, 10))
    sc.pl.umap(adata, color = ['Cell_State'], ncols = 2, frameon = False, size = 1, show=False)
    plt.savefig(f"{fig_dir}/umap_predicted_annotations.png", bbox_inches="tight")

In [None]:
with plt.rc_context():
    sc.set_figure_params(dpi=300, figsize=(20, 20))
    sq.pl.spatial_scatter(
        adata,
        library_id="spatial",
        shape=None,
        color=["Cell_State"],
        wspace=12,
        size=0.2,
        frameon=False,
        alpha=1.0)
    plt.savefig(f"{fig_dir}/predicted_annotations_on_tissue.png", bbox_inches="tight")

### Normalize dataset

In [14]:
adata.layers["counts"] = adata.X.copy()
sc.pp.normalize_total(adata, inplace=True)
sc.pp.log1p(adata)

normalizing counts per cell
    finished (0:00:00)


+ Create a function which will visualize cell states and markers of this cells states on the umap and in spatial image, and then save the visualization

In [34]:
def visualize_cell_states_and_markers(adata, cell_state, markers):
    """
    Visualize a specific cell state and markers in UMAP and spatial images.
    
    Parameters:
    -----------
    adata : AnnData
        Annotated data matrix.
    cell_state : str
        The cell state to highlight.
    markers : list
        List of markers to visualize.
    """
    
    # Visualize cell state of interest
    adata.obs[cell_state] = (adata.obs['Cell_State'] == cell_state).astype(str)
    colors = ['#E4E0E1', '#640D5F']
    adata.uns[f'{cell_state}_colors'] = colors
    color_map = ListedColormap(colors)
    
    # Plot parameters
    n_cols = 2
    n_rows = 1 + len(markers)
    fig, axs = plt.subplots(n_rows, n_cols, figsize=(10*n_cols, 5*n_rows))
    
    # UMAP with cell state
    sc.pl.umap(adata, color=cell_state, palette=colors, ax=axs[0, 0], 
               show=False, frameon=False, size=1)
    
    # Spatial image with cell state
    sq.pl.spatial_scatter(
        adata,
        color=cell_state,
        library_id="spatial",
        shape=None,
        ax=axs[0, 1],
        size=0.2,
        alpha=1,
        frameon=False,
        cmap=color_map
    )
    
    # Plots with markers
    for i, marker in enumerate(markers, start=1):
        # UMAP
        sc.pl.umap(adata, color=marker, ax=axs[i, 0], show=False, 
                   frameon=False, size=1, cmap='magma_r')
        
        # Spatial
        sq.pl.spatial_scatter(
            adata,
            color=marker,
            library_id="spatial",
            shape=None,
            ax=axs[i, 1],
            size=0.2,
            alpha=1,
            frameon=False,
            cmap='magma_r'
        )
    
    # Save
    plt.tight_layout()
    plt.savefig(f"{fig_dir}/{cell_state}.png", bbox_inches="tight", dpi=300)
    plt.close(fig)
    
    # Remove added column
    adata.obs.drop(columns=[cell_state], inplace=True)

#### Epithelial cells

* Stem cells

In [39]:
cell_state='MTRNR2L12+ASS1+_SC'
markers=['LGR5', 'OLFM4', 'ASCL2', 'RGMB']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


In [38]:
cell_state='RPS10+_RPS17+_SC'
markers=['LGR5', 'OLFM4', 'ASCL2', 'RGMB']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


In [37]:
cell_state='FXYD3+_CKB+_SC'
markers=['LGR5', 'OLFM4', 'ASCL2', 'RGMB']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ TA

In [36]:
cell_state='TA'
markers=['MKI67', 'UBE2C', 'TOP2A']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ EEC cells

In [40]:
cell_state='EECs'
markers=['CHGA', 'NEUROD1']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ Goblet

In [41]:
cell_state='Goblet cells'
markers=['SPDEF', 'WFDC2']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ Tuft

In [42]:
cell_state='Tuft cells'
markers=['LRMP', 'TRPM5']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ Enterocytes

In [43]:
cell_state='Enterocyte'
markers=['FABP2', 'CEACAM1', 'EPCAM', 'ANPEP']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ Colonocytes

In [44]:
cell_state='Colonocyte'
markers=['CA2', 'SLC26A2']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ BEST4+ epithelial

In [45]:
cell_state='BEST4+ epithelial'
markers=['BEST4', 'OTOP2']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ Immature goblet cells

In [46]:
cell_state='Goblet cells'
markers=['KLK1', 'RETNLB', 'CLCA1']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ Microfold cells

In [47]:
cell_state='Microfold cell'
markers=['SPIB', 'CCL20']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ Deep crypt secretory

In [48]:
genes = ['REG4']
sc.tl.score_genes(adata, genes, score_name = 'REG4_score')
adata.obs['Cell_State'] = adata.obs['Cell_State'].cat.add_categories('Deep_crypt_secretory')
adata.obs.loc[adata.obs['REG4_score'] > 14, 'Cell_State'] = 'Deep_crypt_secretory' 

computing score 'REG4_score'
    finished: added
    'REG4_score', score of gene set (adata.obs).
    16 total control genes are used. (0:00:00)


In [49]:
cell_state='Deep_crypt_secretory'
markers=['REG4']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


#### T cells

In [50]:
cell_state='CD4 T'
markers=['CD2', 'CD3E', 'IL7R', 'CD8A']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


In [51]:
cell_state='CD8 T'
markers=['CD2', 'CD3E', 'IL7R', 'CD8A']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ T regulatory

In [52]:
cell_state='Tregs'
markers=['FOXP3', 'CTLA4', 'TIGIT']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ ILCs

In [55]:
cell_state='ILCs'
markers=['HPGDS', 'HPGDS', 'IL1RL1', ]
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ NK

In [56]:
cell_state='NK'
markers=['GZMA', 'NKG7', 'PRF1']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


### B cells lineage

In [57]:
cell_state='B cells'
markers=['PAX5', 'MS4A1']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ Pro-B

In [58]:
cell_state='B cells'
markers=['CD79B']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ Pre-B

In [59]:
cell_state='B cells'
markers=['VPREB3']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ Memmory B

In [60]:
cell_state='B cells'
markers=['SELL']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ Cycling B

In [61]:
cell_state='B cells'
markers=['MKI67', 'HMGB2', 'UBE2C']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


#### Endothelial lineage cells

In [63]:
cell_state='Mature venous EC'
markers=['ACKR1', 'VWF', 'CPE']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


In [64]:
cell_state='arterial capillary'
markers=['CA4']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


In [65]:
cell_state='LEC'
markers=['PROX1', 'LYVE1']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


### Neural lineage cells

In [66]:
cell_state='Glial cells'
markers=['ETV1']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


### Mesenchymal lineage cells

In [67]:
cell_state='Fibroblasts'
markers=['ADAM28', 'CCL11', 'CCL13', 'PDGFRA', 'BMP4', 'F3', 'MMP1']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


In [68]:
cell_state='Myofibroblasts'
markers=['ACTA2', 'TAGLN', 'HHIP', 'NPNT']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


In [69]:
cell_state='Pericytes'
markers=['NOTCH3', 'RGS5', 'PDGFRB', 'PLN', 'KCNAB1', 'ABCC8']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


#### Myeloid cells

In [70]:
cell_state='Monocytes'
markers=['FCN1']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


In [71]:
cell_state='Macrophages'
markers=['CD163', 'C1QB', 'C1QC', 'LYVE1','RNASE1']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


In [72]:
cell_state='Mast cells'
markers=['GATA2', 'CPA3', 'HPGDS']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


* Follicular DC

In [73]:
cell_state='DC'
markers=['CXCL13', 'CR2']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


In [74]:
cell_state='DC'
markers=['CLEC9A']
visualize_cell_states_and_markers(adata, cell_state=cell_state, markers=markers)

  color_vector = pd.Categorical(values.map(color_map))
  cax = scatter(
  _cax = scatter(


+ Create final image with annotation

In [75]:
adata.obs.loc[(adata.obs['C_scANVI'] == 'Adult Glia'), 'Cell_State'] = 'Glial cells'

adata.obs['Cell_State'] = adata.obs['Cell_State'].cat.add_categories(['Arterial capillary',
                                                                                  'Gamma delta T cells'])
adata.obs.loc[(adata.obs['C_scANVI'] == 'arterial capillary'), 'Cell_State'] = 'Arterial capillary'
adata.obs.loc[(adata.obs['C_scANVI'] == 'gdT'), 'Cell_State'] = 'Gamma delta T cells'

In [77]:
adata.obs['Cell_State'] = adata.obs['Cell_State'].cat.remove_unused_categories()

In [None]:
adata.obs['Cell_State'] = adata.obs['Cell_State'].astype('category')
adata.obs['Cell_State'] = adata.obs['Cell_State'].cat.reorder_categories(
    sorted(adata.obs['Cell_State'].cat.categories), 
    ordered=True
)

with plt.rc_context():
    sc.set_figure_params(dpi=300, figsize=(20, 20))
    sq.pl.spatial_scatter(
        adata,
        library_id="spatial",
        shape=None,
        color=["Cell_State"],
        wspace=12,
        size=0.2,
        frameon=False,
        alpha=1.0
    )
    plt.savefig(f"{fig_dir}/predicted_annotations_on_tissue_final.png", bbox_inches="tight")

In [None]:
adata.X = adata.layers["counts"].copy()

In [83]:
adata.write_h5ad('data/10X_Xenium_adult_colon_data/Annotation_prediction/Xenium_final_annotation_181024.h5ad')