### Notebook for the manual annotation of cell states for all skin

- **Developed by:** Anna Maguza
- **Würzburg Institute for Systems Immunology & Julius-Maximilian-Universität Würzburg**
- **Date:** 27th of December 2023

### Import required modules

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import anndata as ad

### Set up working environment

In [None]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'RdPu', dpi_save = 300, vector_friendly = True, format = 'svg')

### Read in Human skin dataset

In [None]:
adata= sc.read_h5ad('data_skin/SCC0120_1_Skin_scANVI_leiden_Anna.h5ad') 
adata

### Visualize the manifold

In [None]:
sc.set_figure_params(dpi=300, figsize=(7, 7))
sc.pl.umap(adata, frameon = False, color = ['donor', 'condition', 'n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_ribo'], size =10, legend_fontsize = 8, ncols = 3)

In [None]:
sc.set_figure_params(dpi=300, figsize=(5, 5))
sc.pl.umap(adata, frameon = False, color = ['leiden', 'C_scANVI'], size = 3, legend_fontsize = 8, ncols = 3, legend_loc = 'on data')

### Preprocess dataset for marker genes visualization

* Normalize and log transform

In [None]:
adata_log = ad.AnnData(X = adata.X,  var = adata.var, obs = adata.obs, obsm = adata.obsm)
sc.pp.normalize_total(adata_log, target_sum = 1e6, exclude_highly_expressed = True)
sc.pp.log1p(adata_log)
adata_log.layers["sqrt_norm"] = np.sqrt(
    sc.pp.normalize_total(adata_log, inplace = False)["X"]
)

* Identify differentially expressed genes

In [None]:
sc.tl.rank_genes_groups(adata_log, groupby="leiden", method="wilcoxon", n_genes = 100)
df = sc.get.rank_genes_groups_df(adata_log, group=None)

* Save DEGs

In [None]:
df.to_csv('data_skin/SCC0120_1_Skin_leiden_DEG.csv')

* Create a column to store annotations

In [None]:
adata_log.obs['predicted_cell_states'] = adata.obs['leiden'].copy()

### Visualize marker genes expression

## Epithelial Cells

* Keratinocytes

In [None]:
keratinocyte_genes = ['KRT1', 'KRT5', 'KRT14', 'KRT15', 'ALDH2', 'ATP1B1', 'CD44', 'TFRC', 'DEFB1', 
                      'FASN', 'GJB2', 'ITGA6', 'KRT10', 'KRT19', 'KRT8', 
                      'KRT2', 'UBE2C']

In [None]:
sc.tl.score_genes(adata_log, keratinocyte_genes, score_name = 'keratinocytes_cells_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['KRT1', 'KRT5', 'KRT14', 'KRT15', 'ALDH2', 'ATP1B1', 'CD44', 'TFRC', 'DEFB1', 
                      'FASN', 'GJB2', 'ITGA6', 'ITGB1', 'KRT10', 'KRT19', 'KRT8', 
                      'KRT2', 'SPRR2A', 'UBE2C', 'keratinocytes_cells_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['Keratinocytes'])

adata_log.obs['predicted_cell_states'][adata_log.obs['keratinocytes_cells_score'] > 1] = 'Keratinocytes'

In [None]:
adata_log.obs['keratinocytes'] = adata_log.obs['predicted_cell_states'] == 'Keratinocytes'
adata_log.obs['keratinocytes'] = adata_log.obs['keratinocytes'].astype(str)
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['leiden', 'C_scANVI', 'keratinocytes', 'keratinocytes_cells_score'], size = 5, legend_fontsize = 5, ncols = 4, color_map='magma_r')
del adata_log.obs['keratinocytes']

* Stem cells markers

In [None]:
stem_cells_genes = ['LGR5', 'LGR6', 'LRIG1', 'ABCB5', 'PRDM1', 'MYC', 
                    'CD200', 'CD34', 'ITGA6', 'THY1', 
                    'EZH2', 'GLI1', 'LHX2', 'NFATC1', 'TP63', 'POU5F1',
                    'PLET1', 'PROCR', 'SLAMF6', 'SOX2', 'SOX9', 'TCF7']

In [None]:
sc.tl.score_genes(adata_log, stem_cells_genes, score_name = 'stem_cells_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['LGR5', 'LGR6', 'KRT15', 'LRIG1', 'ABCB5', 'CTNNB1', 'PRDM1', 'MYC', 
                    'CD200', 'ITGB1', 'CD34', 'CD44', 'ITGA6', 'THY1', 'KRT19', 
                    'EZH2', 'FOS', 'GLI1', 'LHX2', 'NFATC1', 'TP63', 'POU5F1',
                    'PLET1', 'PROCR', 'SLAMF6', 'SOX2', 'SOX9', 'TCF7', 'stem_cells_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['Stem_cells'])

adata_log.obs['predicted_cell_states'][adata_log.obs['stem_cells_score'] > 1] = 'Stem_cells'

In [None]:
adata_log.obs['Stem_cells'] = adata_log.obs['predicted_cell_states'] == 'Stem_cells'
adata_log.obs['Stem_cells'] = adata_log.obs['Stem_cells'].astype(str)
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['leiden', 'C_scANVI', 'Stem_cells', 'stem_cells_score'], size = 5, legend_fontsize = 5, ncols = 4, color_map='magma_r')
del adata_log.obs['Stem_cells']

* Melanocytes markers

In [None]:
melanocytes_genes = ['MLANA', 'PMEL', 'DCT', 'MITF', 'APOD', 'ATP1B1', 'KIT',
                     'ME1', 'ME2', 'PHLDA1', 'S100A1', 'SOX10', 'TYR', 
                     'TYRP1', 'DCT']

In [None]:
sc.tl.score_genes(adata_log, melanocytes_genes, score_name = 'melanocytes_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['MLANA', 'PMEL', 'DCT', 'MITF', 'APOD', 'ATP1B1', 'KIT',
                     'ME1', 'ME2', 'CD63', 'PHLDA1', 'S100A1', 'SOX10', 'TYR', 
                     'TYRP1', 'DCT', 'melanocytes_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['Melanocytes'])

In [None]:
adata_log.obs.loc[(adata_log.obs['leiden'] == '20'), 'predicted_cell_states'] = 'Melanocytes'

adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.remove_categories(['20'])

In [None]:
adata_log.obs['Cell_States'] = None
# Make column categorical
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].astype('category')

# Copy 'Keratinocytes' to 'Cell_States' if 'predicted_cell_states' == 'Keratinocytes'
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].cat.add_categories(['Melanocytes'])
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].cat.add_categories(['Keratinocytes'])
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Keratinocytes'] = 'Keratinocytes'
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Melanocytes'] = 'Melanocytes'

In [None]:
sc.set_figure_params(dpi=500)
sc.pl.umap(adata_log, frameon = False, color = ['keratinocytes_cells_score', 'melanocytes_score', 'Cell_States'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

## Neuronal Cells

In [None]:
neuronal_genes = ['NRXN1', 'SCN7A', 'ANK3']
sc.tl.score_genes(adata_log, neuronal_genes, score_name = 'neuronal_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['NRXN1', 'SCN7A', 'ANK3', 'neuronal_score', 'leiden'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['Neuronal'])

adata_log.obs.loc[(adata_log.obs['leiden'] == '16'), 'predicted_cell_states'] = 'Neuronal'

adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.remove_categories(['16'])

In [None]:
adata_log.obs['Cell_States'] = None
# Make column categorical
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].astype('category')

# Copy 'Keratinocytes' to 'Cell_States' if 'predicted_cell_states' == 'Keratinocytes'
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].cat.add_categories(['Neuronal'])
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Neuronal'] = 'Neuronal'

In [None]:
sc.set_figure_params(dpi=400)
sc.pl.umap(adata_log, frameon = False, color = ['NRXN1', 'SCN7A', 'ANK3', 'neuronal_score', 'Cell_States'], size = 5, legend_fontsize = 5, ncols = 3, color_map='magma_r')

## Mesenchymal Cells

* Vascular endothelium

In [None]:
ve_genes = ['PECAM1', 'EMCN', 'CDH5', 'VWF', 'KDR', 'FLT1', 'TEK', 'CLDN5']

In [None]:
sc.tl.score_genes(adata_log, ve_genes, score_name = 've_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['PECAM1', 'EMCN', 'CDH5', 'VWF', 'KDR', 'FLT1', 'TEK', 'CLDN5', 've_score', 'leiden'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
artery_genes = ['GJA4', 'GJA5', 'HEY1', 'GATA2', 'SOX17', 'MECOM']
sc.tl.score_genes(adata_log, artery_genes, score_name = 'artery_score')

vein_genes = ['ACKR1', 'NR2F2', 'PLVAP']
sc.tl.score_genes(adata_log, vein_genes, score_name = 'vein_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['GJA4', 'GJA5', 'HEY1', 'GATA2', 'CXCR4', 'SOX17', 'MECOM', 'artery_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['ACKR1', 'NR2F2', 'PLVAP', 'vein_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['Artery_VE', 'Vein_VE'])
adata_log.obs.loc[(adata_log.obs['leiden'] == '9'), 'predicted_cell_states'] = 'Artery_VE'
adata_log.obs.loc[(adata_log.obs['leiden'] == '5'), 'predicted_cell_states'] = 'Vein_VE'

adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.remove_categories(['9', '5'])

* Lymphatic endothelium genes

In [None]:
le_genes = ['LYVE1', 'PDPN']

In [None]:
sc.tl.score_genes(adata_log, le_genes, score_name = 'le_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['LYVE1', 'PDPN', 'le_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['LE'])
adata_log.obs.loc[(adata_log.obs['leiden'] == '17'), 'predicted_cell_states'] = 'LE'

adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.remove_categories(['17'])

In [None]:
adata_log.obs['Cell_States'] = None
# Make column categorical
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].astype('category')

# Copy 'Keratinocytes' to 'Cell_States' if 'predicted_cell_states' == 'Keratinocytes'
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].cat.add_categories(['LE'])
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'LE'] = 'LE'

adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].cat.add_categories(['Artery_VE'])
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Artery_VE'] = 'Artery_VE'

adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].cat.add_categories(['Vein_VE'])
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Vein_VE'] = 'Vein_VE'

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['artery_score', 'vein_score', 'le_score', 'Cell_States'], size = 5, legend_fontsize = 5, ncols = 2, color_map='magma_r')

* Fibroblasts

In [None]:
fibroblasts_genes = ['MMP2', 'COL1A1', 'COL1A2', 'NT5E', 'COL6A1']

In [None]:
sc.tl.score_genes(adata_log, fibroblasts_genes, score_name = 'fibroblasts_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['MMP2', 'COL1A1', 'COL1A2', 'NT5E', 'COL6A1', 'fibroblasts_score'], size = 5, legend_fontsize = 5, ncols = 3, color_map='magma_r')

* Mesenchymal Stem Cells

Reference 1: CD105+, CD90+, CD73+, CD45−, CD34−, CD19−, HLA-DRA−, and CD11b− (Fan, 2022)

In [None]:
MSCs_genes1 = ['ENG', 'THY1', 'NT5E']
sc.tl.score_genes(adata_log, MSCs_genes1, score_name = 'MSCs_score1')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['ENG', 'THY1', 'NT5E', 'MSCs_score1', 'PTPRC', 'CD34', 'HLA-DRA', 'ITGAM'], size = 5, legend_fontsize = 5, ncols = 4, color_map='magma_r')


Reference 2: CCL13, NGFR, TFP2A, TBX5, IGF1, TMEM176A/B (Wang, 2021)

In [None]:
MSCs_genes2 = ['CCL13', 'NGFR', 'TFAP2A', 'TBX5', 'IGF1', 'TMEM176A', 'TMEM176B']
sc.tl.score_genes(adata_log, MSCs_genes2, score_name = 'MSCs_score2')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['CCL13', 'NGFR', 'TFAP2A', 'TBX5', 'IGF1', 'TMEM176A', 'TMEM176B', 'MSCs_score2'], size = 5, legend_fontsize = 5, ncols = 4, color_map='magma_r')

Reference 3: CD44+, CD73+, CD90+, CD105+, CD166+, SSEA-4+, Vimentin+, CD34-, CD45-, HLA-DR- (Ullah, 2015)

In [None]:
MSCs_genes3 = ['CD44', 'NT5E', 'THY1', 'ENG', 'ALCAM', 'POU5F1']
sc.tl.score_genes(adata_log, MSCs_genes3, score_name = 'MSCs_score3')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['CD44', 'NT5E', 'THY1', 'ENG', 'ALCAM', 'POU5F1', 'MSCs_score3', 'CD34', 'PTPRC', 'HLA-DRA'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

Reference 4: SOX2, ACTA2, CSPG1 (ACAN), CD200, ITGA8 and PDGFRA, CD34, SCA1 (ATXN1), HIC1, DPP4/CD26, DPT, EN1 (recommended by Samantha Morris)

In [None]:
MSCs_genes4 = ['SOX2', 'ACTA2', 'ACAN', 'CD200', 'ITGA8', 'PDGFRA', 'CD34', 'ATXN1', 'HIC1', 'DPP4', 'DPT', 'EN1']
sc.tl.score_genes(adata_log, MSCs_genes4, score_name = 'MSCs_score4')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['SOX2', 'ACTA2', 'ACAN', 'CD200', 'ITGA8', 'PDGFRA', 'CD34', 'ATXN1', 'HIC1', 'DPP4', 'DPT', 'EN1', 'MSCs_score4'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

* Label cluster 17 as MSCs-like cells

In [None]:
umap_coordinates = adata.obsm['X_umap']
leiden_clusters = adata.obs['leiden']
cluster_17_cells = adata[leiden_clusters == '17']
umap_coordinates_cluster_17 = umap_coordinates[adata.obs_names.isin(cluster_17_cells.obs_names)]


In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['Fibroblasts_PRR16+', 'Fibroblasts_ITGA6+', 'MSCs_like'])
adata_log.obs.loc[(adata_log.obs['leiden'] == '2'), 'predicted_cell_states'] = 'Fibroblasts_PRR16+'
adata_log.obs.loc[(adata_log.obs['leiden'] == '19'), 'predicted_cell_states'] = 'Fibroblasts_ITGA6+'

adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.remove_categories(['2', '19'])

In [None]:
#Step 1: Access UMAP coordinates and Leiden cluster assignments
umap_coordinates = adata_log.obsm['X_umap']
leiden_clusters = adata_log.obs['leiden']

# Step 2: Identify cells in cluster 2
cluster_2_cells = adata_log[leiden_clusters == '2']

# Step 3: Identify cells in cluster 2 and located further than 25 on x-coordinate
threshold = 25
cluster_2_filtered_cells_mask = umap_coordinates[adata_log.obs_names.isin(cluster_2_cells.obs_names), 0] > threshold
cluster_2_filtered_cells = cluster_2_cells[cluster_2_filtered_cells_mask]

# Step 4: Rename the clusters in 'predicted_cell_states' column
adata_log.obs.loc[cluster_2_filtered_cells.obs_names, 'predicted_cell_states'] = 'MSCs_like'

In [None]:
adata_log.obs['predicted_cell_states'].value_counts()

In [None]:
adata_log.obs['Cell_States'] = None
# Make column categorical
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].astype('category')

# Copy 'Keratinocytes' to 'Cell_States' if 'predicted_cell_states' == 'Keratinocytes'
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].cat.add_categories(['Fibroblasts_PRR16+'])
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Fibroblasts_PRR16+'] = 'Fibroblasts_PRR16+'

adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].cat.add_categories(['Fibroblasts_ITGA6+'])
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Fibroblasts_ITGA6+'] = 'Fibroblasts_ITGA6+'

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['Cell_States'], size = 5, legend_fontsize = 5, ncols = 3, color_map='magma_r')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['fibroblasts_score', 'leiden'], size = 5, legend_fontsize = 5, ncols = 3, color_map='magma_r')

* Adventitial fibroblasts

Reference 1: Vimentin, collagen type-1, CD29, CD44, and CD105 (Hoshino 2008)

In [None]:
adv_fibroblasts_genes = ['VIM', 'COL1A1', 'ITGB1', 'CD44', 'ENG']
sc.tl.score_genes(adata_log, adv_fibroblasts_genes, score_name = 'adv_fibroblasts_score1')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['VIM', 'COL1A1', 'ITGB1', 'CD44', 'ENG', 'adv_fibroblasts_score1'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

Reference 2: Collagen type-1, Enolase 2, Fibroblast specific protein 1, Gli1, PDGFRα, Patched-1 and patched-2, Sca1, and Tcf21 (Kuwabara 2017)

In [None]:
adv_fibroblasts_genes2 = ['COL1A1', 'ENO2', 'AIFM2', 'S100A4', 'GLI1', 'PTCH1', 'PTCH2', 'PDGFRA', 'ATXN1', 'TCF21']
sc.tl.score_genes(adata_log, adv_fibroblasts_genes2, score_name = 'adv_fibroblasts_score2')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['COL1A1', 'ENO2', 'AIFM2', 'S100A4', 'GLI1', 'PTCH1', 'PTCH2', 'PDGFRA', 'ATXN1', 'TCF21', 'adv_fibroblasts_score2'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'].value_counts()

In [None]:
# Extract MSCs_like cells as a separate AnnData object and save it to a file
MSCs_like_cells = adata_log[adata_log.obs['predicted_cell_states'] == 'MSCs_like']
MSCs_like_cells.write_h5ad('data_skin/MSCs_like_cells.h5ad')

* Vascular Smooth Muscle Cells

In [None]:
VSMC_genes = ['ACTA2', 'TAGLN', 'MYH11', 'PDGFRB', 'CNN1', 'TAGLN2', 'MYL9', 'RGS5', 'MYLK', 'HHIP', 'GJA4', 'NOX4']
sc.tl.score_genes(adata_log, VSMC_genes, score_name = 'vsmc_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['ACTA2', 'TAGLN', 'leiden', 'vsmc_score'], size = 5, legend_fontsize = 5, ncols = 2, color_map='magma_r')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['ACTA2', 'TAGLN', 'MYH11', 'PDGFRB', 'CNN1', 'TAGLN2', 'MYL9', 'RGS5', 'MYLK', 'HHIP', 'GJA4', 'NOX4', 'vsmc_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

* Compare vSMCs in leiden 0 to vSMCs in leiden 10

In [None]:
# Specify the two clusters you want to compare
cluster_0 = '0'
cluster_10 = '10'

# Filter the data to include only the cells from the specified clusters
adata_filtered = adata_log[(adata_log.obs['leiden'] == cluster_0) | (adata_log.obs['leiden'] == cluster_10)].copy()

# Perform the differential gene expression analysis for cluster 2 vs cluster 19 on the filtered data
sc.tl.rank_genes_groups(adata_filtered, groupby='leiden', groups=[cluster_0, cluster_10])

# Visualize the results for the specific comparison between cluster 2 and cluster 19
sc.pl.rank_genes_groups(adata_filtered, groupby='leiden', key='rank_genes_groups')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['leiden', 'NME2', 'GSTP1', 'FTX', 'UTRN'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['contractile_vSMCs', 'vSMCs'])
adata_log.obs.loc[(adata_log.obs['leiden'] == '0'), 'predicted_cell_states'] = 'contractile_vSMCs'
adata_log.obs.loc[(adata_log.obs['leiden'] == '10'), 'predicted_cell_states'] = 'vSMCs'

adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.remove_categories(['0', '10'])

In [None]:
adata_log.obs['Cell_States'] = None
# Make column categorical
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].astype('category')

# Copy 'Keratinocytes' to 'Cell_States' if 'predicted_cell_states' == 'Keratinocytes'
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].cat.add_categories(['contractile_vSMCs'])
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'contractile_vSMCs'] = 'contractile_vSMCs'

adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].cat.add_categories(['vSMCs'])
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'vSMCs'] = 'vSMCs'

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['Cell_States'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

* Pericytes

In [None]:
vSC = ['ANPEP', 'NT5E', 'THY1', 'ITGB1', 'CD44', 'ENG']
sc.tl.score_genes(adata_log, vSC, score_name = 'vSC_score')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['ANPEP', 'NT5E', 'THY1', 'ITGB1', 'CD44', 'ENG', 'vSC_score'], size = 5, legend_fontsize = 5, ncols = 3, color_map='magma_r')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['CIDEA', 'PRDM16', 'ZIC1', 'LHX8', 'EVA1A', 'EPSTI1'], size = 5, legend_fontsize = 5, ncols = 3, color_map='magma_r')

In [None]:
EPCs = ['BMP2', 'BMP4', 'EFNB2', 'NRP1', 'VEGFC', 'NOTCH1', 'PECAM1']

sc.tl.score_genes(adata_log, EPCs, score_name = 'EPCs_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['BMP2', 'BMP4', 'EFNB2', 'NRP1', 'VEGFC', 'NOTCH1', 'PECAM1', 'EPCs_score'], size = 5, legend_fontsize = 5, ncols = 3, color_map='magma_r')

+ Pericytes

In [None]:
pericytes_genes = ['KCNJ8', 'ABCC9', 'VTN', 'ANPEP', 'CD248']

sc.tl.score_genes(adata_log, pericytes_genes, score_name = 'pericytes_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['KCNJ8', 'ABCC9', 'VTN', 'ANPEP', 'CD248', 'pericytes_score'], size = 5, legend_fontsize = 5, ncols = 3, color_map='magma_r')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['KCNJ8', 'leiden', 'Cell_States'], size = 5, legend_fontsize = 5, ncols = 3, color_map='magma_r')

+ Mixed mesenchymal cells

In [None]:
# Specify the two clusters you want to compare
cluster_1 = '1'
cluster_7 = '7'
cluster_8 = '8'
cluster_18 = '18'

# Filter the data to include only the cells from the specified clusters
adata_filtered = adata_log[(adata_log.obs['leiden'] == cluster_1) | (adata_log.obs['leiden'] == cluster_7) | 
                           (adata_log.obs['leiden'] == cluster_8) | (adata_log.obs['leiden'] == cluster_18)].copy()

# Perform the differential gene expression analysis for cluster 2 vs cluster 19 on the filtered data
sc.tl.rank_genes_groups(adata_filtered, groupby='leiden', groups=[cluster_1, cluster_7, cluster_8, cluster_18])

# Visualize the results for the specific comparison between cluster 2 and cluster 19
sc.pl.rank_genes_groups(adata_filtered, groupby='leiden', key='rank_genes_groups')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['CLSTN2', 'APCDD1', 'RGS5', 'C11orf96'], size = 5, legend_fontsize = 5, ncols = 4, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['Mixed_pericytes_CLSTN2+',
                                                                                                    'Mixed_pericytes_APCDD1+', 
                                                                                                    'Mixed_pericytes_RGS5+',
                                                                                                    'Mixed_pericytes_C11orf96+'])
                                                                                                    
adata_log.obs.loc[(adata_log.obs['leiden'] == '1'), 'predicted_cell_states'] = 'Mixed_pericytes_CLSTN2+'
adata_log.obs.loc[(adata_log.obs['leiden'] == '7'), 'predicted_cell_states'] = 'Mixed_pericytes_APCDD1+'
adata_log.obs.loc[(adata_log.obs['leiden'] == '8'), 'predicted_cell_states'] = 'Mixed_pericytes_RGS5+'
adata_log.obs.loc[(adata_log.obs['leiden'] == '18'), 'predicted_cell_states'] = 'Mixed_pericytes_C11orf96+'

In [None]:
adata_log.obs['Cell_States'] = None
# Make column categorical
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].astype('category')

# Copy 'Keratinocytes' to 'Cell_States' if 'predicted_cell_states' == 'Keratinocytes'
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].cat.add_categories(['Mixed_pericytes_CLSTN2+',
                                                                                                    'Mixed_pericytes_APCDD1+', 
                                                                                                    'Mixed_pericytes_RGS5+',
                                                                                                    'Mixed_pericytes_C11orf96+'])
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Mixed_pericytes_CLSTN2+'] = 'Mixed_pericytes_CLSTN2+'
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Mixed_pericytes_APCDD1+'] = 'Mixed_pericytes_APCDD1+'
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Mixed_pericytes_RGS5+'] = 'Mixed_pericytes_RGS5+'
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Mixed_pericytes_C11orf96+'] = 'Mixed_pericytes_C11orf96+'

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['KCNJ8', 'Cell_States'], size = 5, legend_fontsize = 5, ncols = 3, color_map='magma_r')

+ Compare fibroblasts and mesenchymal clusters between each other

In [None]:
# Specify the two clusters you want to compare
cluster_1 = '1'
cluster_2 = '2'
cluster_7 = '7'
cluster_8 = '8'
cluster_18 = '18'
cluster_19 = '19'

# Filter the data to include only the cells from the specified clusters
adata_filtered = adata_log[(adata_log.obs['leiden'] == cluster_1) | (adata_log.obs['leiden'] == cluster_2) |
                           (adata_log.obs['leiden'] == cluster_7) | (adata_log.obs['leiden'] == cluster_8) |
                           (adata_log.obs['leiden'] == cluster_18) | (adata_log.obs['leiden'] == cluster_19)].copy()

# Perform the differential gene expression analysis for cluster 2 vs cluster 19 on the filtered data
sc.tl.rank_genes_groups(adata_filtered, groupby='leiden')

# Visualize the results for the specific comparison between cluster 2 and cluster 19
sc.pl.rank_genes_groups(adata_filtered, groupby='leiden', key='rank_genes_groups')

In [None]:
# Filter the data to include only the cells from the specified clusters
adata_filtered = adata_log[(adata_log.obs['leiden'] == cluster_1) | 
                           (adata_log.obs['leiden'] == cluster_7) | (adata_log.obs['leiden'] == cluster_8) |
                           (adata_log.obs['leiden'] == cluster_18) ].copy()

# Perform the differential gene expression analysis for cluster 2 vs cluster 19 on the filtered data
sc.tl.rank_genes_groups(adata_filtered, groupby='leiden')

# Visualize the results for the specific comparison between cluster 2 and cluster 19
sc.pl.rank_genes_groups(adata_filtered, groupby='leiden', key='rank_genes_groups')