### Notebook for the manual annotation of immune cell states for all skin

- **Developed by:** Anna Maguza
- **Würzburg Institute for Systems Immunology & Julius-Maximilian-Universität Würzburg**
- **Date:** 5th of March 2024

### Import required modules

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import anndata as ad

### Set up working environment

In [None]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'RdPu', dpi_save = 300, vector_friendly = True, format = 'svg')

### Read in Human skin dataset

In [None]:
adata= sc.read_h5ad('data_skin/SCC0120_1_Skin_scANVI_leiden_Anna.h5ad') 
adata

* Normalize and log transform

In [None]:
adata_log = ad.AnnData(X = adata.X,  var = adata.var, obs = adata.obs, obsm = adata.obsm)
sc.pp.normalize_total(adata_log, target_sum = 1e6, exclude_highly_expressed = True)
sc.pp.log1p(adata_log)
adata_log.layers["sqrt_norm"] = np.sqrt(
    sc.pp.normalize_total(adata_log, inplace = False)["X"]
)

In [None]:
adata_log.obs['predicted_cell_states'] = adata.obs['leiden'].copy()

### Add known annotations

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['Keratinocytes',
                                                                                                    'Melanocytes',
                                                                                                    'Neuronal', 
                                                                                                    'Artery_VE', 
                                                                                                    'Vein_VE',
                                                                                                    'LE',
                                                                                                    'Fibroblasts_PRR16+', 
                                                                                                    'Fibroblasts_ITGA6+',
                                                                                                    'contractile_vSMCs', 
                                                                                                    'vSMCs',
                                                                                                    'Mixed_pericytes_CLSTN2+',
                                                                                                    'Mixed_pericytes_APCDD1+', 
                                                                                                    'Mixed_pericytes_RGS5+',
                                                                                                    'Mixed_pericytes_C11orf96+'
                                                                                                    ])

In [None]:
keratinocyte_genes = ['KRT1', 'KRT5', 'KRT14', 'KRT15', 'ALDH2', 'ATP1B1', 'CD44', 'TFRC', 'DEFB1', 
                      'FASN', 'GJB2', 'ITGA6', 'KRT10', 'KRT19', 'KRT8', 
                      'KRT2', 'UBE2C']
sc.tl.score_genes(adata_log, keratinocyte_genes, score_name = 'keratinocytes_cells_score')

adata_log.obs['predicted_cell_states'][adata_log.obs['keratinocytes_cells_score'] > 1] = 'Keratinocytes'

In [None]:
melanocytes_genes = ['MLANA', 'PMEL', 'DCT', 'MITF', 'APOD', 'ATP1B1', 'KIT',
                     'ME1', 'ME2', 'PHLDA1', 'S100A1', 'SOX10', 'TYR', 
                     'TYRP1', 'DCT']
sc.tl.score_genes(adata_log, melanocytes_genes, score_name = 'melanocytes_score')

adata_log.obs.loc[(adata_log.obs['leiden'] == '20'), 'predicted_cell_states'] = 'Melanocytes'

condition = (adata_log.obs['leiden'] == '15') & (adata_log.obs['melanocytes_score'] > 2)
adata_log.obs.loc[condition, 'predicted_cell_states'] = 'Melanocytes'

In [None]:
adata_log.obs.loc[(adata_log.obs['leiden'] == '16'), 'predicted_cell_states'] = 'Neuronal'

In [None]:
adata_log.obs.loc[(adata_log.obs['leiden'] == '9'), 'predicted_cell_states'] = 'Artery_VE'
adata_log.obs.loc[(adata_log.obs['leiden'] == '5'), 'predicted_cell_states'] = 'Vein_VE'
adata_log.obs.loc[(adata_log.obs['leiden'] == '17'), 'predicted_cell_states'] = 'LE'

In [None]:
adata_log.obs.loc[(adata_log.obs['leiden'] == '2'), 'predicted_cell_states'] = 'Fibroblasts_PRR16+'
adata_log.obs.loc[(adata_log.obs['leiden'] == '19'), 'predicted_cell_states'] = 'Fibroblasts_ITGA6+'

In [None]:
adata_log.obs.loc[(adata_log.obs['leiden'] == '0'), 'predicted_cell_states'] = 'contractile_vSMCs'
adata_log.obs.loc[(adata_log.obs['leiden'] == '10'), 'predicted_cell_states'] = 'vSMCs'

In [None]:
adata_log.obs.loc[(adata_log.obs['leiden'] == '1'), 'predicted_cell_states'] = 'Mixed_pericytes_CLSTN2+'
adata_log.obs.loc[(adata_log.obs['leiden'] == '7'), 'predicted_cell_states'] = 'Mixed_pericytes_APCDD1+'
adata_log.obs.loc[(adata_log.obs['leiden'] == '8'), 'predicted_cell_states'] = 'Mixed_pericytes_RGS5+'
adata_log.obs.loc[(adata_log.obs['leiden'] == '18'), 'predicted_cell_states'] = 'Mixed_pericytes_C11orf96+'

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.remove_categories(['8', '7', '18', '1', '0', '20',
                                                                                                       '16', '9', '5', '17', '2', '19', '10'])

In [None]:
adata_log.obs['predicted_cell_states'].value_counts()

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['predicted_cell_states' ], size = 5, legend_fontsize = 5, ncols = 4, color_map='magma_r')

### Add immune cells

* Cytotoxic T cells

In [None]:
ct_genes = ['CD8A', 'CD8B']
th_genes = ['CD4', 'CD40LG']

sc.tl.score_genes(adata_log, ct_genes, score_name = 'Tc_score')
sc.tl.score_genes(adata_log, th_genes, score_name = 'Th_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['CD8A', 'CD8B', 'Tc_score', 'CD4', 'CD40LG', 'Th_score'], size = 5, legend_fontsize = 5, ncols = 3, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['Tc', 'Th'])
adata_log.obs.loc[(adata_log.obs['leiden'] == '4'), 'predicted_cell_states'] = 'Tc'
adata_log.obs.loc[(adata_log.obs['leiden'] == '3'), 'predicted_cell_states'] = 'Th'

adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.remove_categories(['4', '3'])

+ Divide cluster 11 into Tc and Th

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['predicted_cell_states' ], size = 5, legend_fontsize = 5, ncols = 4, color_map='magma_r')

In [None]:
umap_coordinates = adata.obsm['X_umap']
leiden_clusters = adata.obs['leiden']
cluster_14_cells = adata[leiden_clusters == '14']
umap_coordinates_cluster_14 = umap_coordinates[adata.obs_names.isin(cluster_14_cells.obs_names)]

In [None]:
cluster_6_cells = adata[leiden_clusters == '6']
umap_coordinates_cluster_6 = umap_coordinates[adata.obs_names.isin(cluster_6_cells.obs_names)]

In [None]:
# Step 1: Access UMAP coordinates and Leiden cluster assignments
umap_coordinates = adata_log.obsm['X_umap']
leiden_clusters = adata_log.obs['leiden']

# Step 2: Identify cells in cluster 11
cluster_11_cells = adata_log[leiden_clusters == '11']

# Step 3: Identify cells in cluster 11 that are higher than 14 on the y-coordinate and label them as 'Tc'
threshold = 16
# Mask for cells higher than the threshold on the y-coordinate
cluster_11_higher_y_mask = umap_coordinates[adata_log.obs_names.isin(cluster_11_cells.obs_names), 1] > threshold
adata_log.obs.loc[cluster_11_cells.obs_names[cluster_11_higher_y_mask], 'predicted_cell_states'] = 'Tc'

# Step 4: Label all other cells in cluster 11 as 'Th'
# Now checking if they're labeled as '11' (their default state) and not as 'Tc'
cluster_11_remaining_mask = (leiden_clusters == '11') & (adata_log.obs['predicted_cell_states'] != 'Tc')
adata_log.obs.loc[cluster_11_remaining_mask, 'predicted_cell_states'] = 'Th'

In [None]:
condition = (adata_log.obs['leiden'] == '11') & (adata_log.obs['Tc_score'] > 2)
adata_log.obs.loc[condition, 'predicted_cell_states'] = 'Tc'

condition = (adata_log.obs['leiden'] == '11') & (adata_log.obs['Th_score'] > 2)
adata_log.obs.loc[condition, 'predicted_cell_states'] = 'Th'

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['predicted_cell_states' ], size = 5, legend_fontsize = 5, ncols = 4, color_map='magma_r')

* Natural Killers

In [None]:
nk_genes = ['KLRD1', 'GNLY', 'PRF1', 'GZMB', 'FCGR3A']
sc.tl.score_genes(adata_log, nk_genes, score_name = 'nk_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['KLRD1', 'GNLY', 'PRF1', 'GZMB', 'FCGR3A', 'nk_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['NK_cells'])
adata_log.obs.loc[(adata_log.obs['leiden'] == '6'), 'predicted_cell_states'] = 'NK_cells'

adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.remove_categories(['6'])

In [None]:
condition = (adata_log.obs['leiden'] == '4') & (adata_log.obs['nk_score'] > 4)

adata_log.obs.loc[condition, 'predicted_cell_states'] = 'NK_cells'

condition = (adata_log.obs['leiden'] == '11') & (adata_log.obs['nk_score'] > 4)
adata_log.obs.loc[condition, 'predicted_cell_states'] = 'NK_cells'

* T-regulatory

In [None]:
t_reg = ['FOXP3', 'TIGIT', 'CTLA4']

In [None]:
sc.tl.score_genes(adata_log, t_reg, score_name = 't_reg_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['FOXP3', 'TIGIT', 'CTLA4', 't_reg_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['T_reg'])
adata_log.obs.loc[(adata_log.obs['leiden'] == '13'), 'predicted_cell_states'] = 'T_reg'
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.remove_categories(['13'])

* NKT

In [None]:
nkt_genes = ['NKG7', 'GNLY', 'CD8A']
sc.tl.score_genes(adata_log, nkt_genes, score_name = 'nkt_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['NKG7', 'GNLY', 'CD8A', 'nkt_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['NKT'])
condition = (adata_log.obs['leiden'] == '4') & (adata_log.obs['nkt_score'] > 4)
adata_log.obs.loc[condition, 'predicted_cell_states'] = 'NKT'

condition = (adata_log.obs['leiden'] == '11') & (adata_log.obs['nkt_score'] > 4)
adata_log.obs.loc[condition, 'predicted_cell_states'] = 'NKT'

* Plasma cells

In [None]:
plasma_genes = ['CD79A', 'JCHAIN', 'IGKC']

sc.tl.score_genes(adata_log, plasma_genes, score_name = 'plasma_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['CD79A', 'JCHAIN', 'IGKC', 'leiden', 'plasma_score'], size = 5, legend_fontsize = 5, ncols = 4, color_map='magma_r')

In [None]:
#adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['Plasma_cells'])
#condition = (adata_log.obs['leiden'] == '15') & (adata_log.obs['plasma_score'] > 2)

#adata_log.obs.loc[condition, 'predicted_cell_states'] = 'Plasma_cells'

In [None]:
adata_log.obs['predicted_cell_states'].value_counts()

In [None]:
adata_log.obs['Cell_States'] = None
# Make column categorical
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].astype('category')

# Copy 'Keratinocytes' to 'Cell_States' if 'predicted_cell_states' == 'Keratinocytes'
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].cat.add_categories(['Plasma_cells'])
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Plasma_cells'] = 'Plasma_cells'

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['plasma_score', 'Cell_States'], size = 5, legend_fontsize = 5, ncols = 4, color_map='magma_r')

* ILC

In [None]:
ilc_genes = ['S100A13', 'TLE1', 'AREG']

In [None]:
sc.tl.score_genes(adata_log, ilc_genes, score_name = 'ilc_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['S100A13', 'TLE1', 'AREG', 'ilc_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

* ILC1

In [None]:
ilc1_genes = ['CXCR3', 'CD3D', 'IKZF3']
sc.tl.score_genes(adata_log, ilc1_genes, score_name = 'ilc1_score')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['CXCR3', 'CD3D', 'IKZF3', 'ilc1_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

* ILC2

In [None]:
ilc2_genes = ['GATA3', 'KLRG1', 'HPGDS']
sc.tl.score_genes(adata_log, ilc2_genes, score_name = 'ilc2_score')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['GATA3', 'KLRG1', 'HPGDS', 'ilc2_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

* ILC3

In [None]:
ilc3_genes = ['IL4I1', 'RORC', 'KIT']
sc.tl.score_genes(adata_log, ilc3_genes, score_name = 'ilc3_score')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['IL4I1', 'RORC', 'KIT', 'ilc3_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['ILC3'])
adata_log.obs.loc[(adata_log.obs['leiden'] == '14'), 'predicted_cell_states'] = 'ILC3'
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.remove_categories(['14'])

* Mast cells


In [None]:
mast_genes = ['TPSAB1', 'TPSB2', 'CPA3']
sc.tl.score_genes(adata_log, mast_genes, score_name = 'mast_score')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['TPSAB1', 'TPSB2', 'CPA3', 'mast_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['Mast_cells'])
condition = (adata_log.obs['leiden'] == '12') & (adata_log.obs['mast_score'] > 2)
adata_log.obs.loc[condition, 'predicted_cell_states'] = 'Mast_cells'

* gamma delta T cells

In [None]:
gamma_delta_t_cells = ['TRDC', 'TRGC1', 'CCL5']

In [None]:
sc.tl.score_genes(adata_log, gamma_delta_t_cells, score_name = 'gamma_delta_t_cells_score')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['TRDC', 'TRGC1', 'CCL5', 'gamma_delta_t_cells_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

+ B cells

In [None]:
b_genes = ['CD79A', 'MS4A1', 'CD19']
sc.tl.score_genes(adata_log, b_genes, score_name = 'b_score')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['CD79A', 'MS4A1', 'CD19', 'b_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['B_cells'])
condition = (adata_log.obs['leiden'] == '15') & (adata_log.obs['b_score'] > 2)
adata_log.obs.loc[condition, 'predicted_cell_states'] = 'B_cells'

In [None]:
adata_log.obs['predicted_cell_states'].value_counts()

In [None]:
adata_log.obs['Cell_States'] = None
# Make column categorical
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].astype('category')

# Copy 'Keratinocytes' to 'Cell_States' if 'predicted_cell_states' == 'Keratinocytes'
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].cat.add_categories(['Tc', 'Th', 'NK_cells', 'T_reg', 'NKT', 'ILC3', 'Mast_cells', 'B_cells'])
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Tc'] = 'Tc'
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Th'] = 'Th'
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'NK_cells'] = 'NK_cells'
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'T_reg'] = 'T_reg'
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'NKT'] = 'NKT'
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'ILC3'] = 'ILC3'
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Mast_cells'] = 'Mast_cells'
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'B_cells'] = 'B_cells'

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['Tc_score', 'Th_score', 't_reg_score', 'nk_score',
                                                'nkt_score', 'ilc_score', 'ilc3_score', 'mast_score', 'b_score', 'Cell_States'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['CD8A', 'CD8B', 'CD4', 'CD40LG',
                                                'FOXP3', 'TIGIT', 'CTLA4', 'KLRD1', 'GNLY', 'PRF1', 'GZMB', 'FCGR3A',
                                                'NKG7', 'CD79A', 'JCHAIN', 'IGKC', 'MS4A1', 'CD19', 'IL4I1', 'RORC', 'KIT', 'S100A13', 'TLE1', 'AREG'], size = 5, legend_fontsize = 5, ncols = 6, color_map='magma_r')

### APCs

+ Macrophages

In [None]:
macrophage_genes = ['C1QA', 'CD68', 'TREM2']
sc.tl.score_genes(adata_log, macrophage_genes, score_name = 'macrophage_score')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['C1QA', 'CD68', 'TREM2', 'macrophage_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

+ Monocyte

In [None]:
monocyte_genes = ['S100A9', 'LYZ', 'FCN1']
sc.tl.score_genes(adata_log, monocyte_genes, score_name = 'monocyte_score')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['S100A9', 'LYZ', 'FCN1', 'monocyte_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

+ Mono-mac cells

In [None]:
monomac = ['TYROBP', 'C1QA', 'HMOX1']
sc.tl.score_genes(adata_log, monomac, score_name = 'monomac_score')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['TYROBP', 'C1QA', 'HMOX1', 'monomac_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

+ cycling DC

In [None]:
cycling_DC = ['MKI67', 'TOP2A', 'CLEC10A']
sc.tl.score_genes(adata_log, cycling_DC, score_name = 'cycling_DC_score')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['MKI67', 'TOP2A', 'CLEC10A', 'cycling_DC_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

+ DC

In [None]:
DC = ['CD1C', 'FCER1A', 'CLEC10A']
sc.tl.score_genes(adata_log, DC, score_name = 'DC_score')

sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['CD1C', 'FCER1A', 'CLEC10A', 'DC_score'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
adata_log.obs['predicted_cell_states'].value_counts()   

In [None]:
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.add_categories(['Monocytes', 'Macrophages', 'DCs'])
cluster_12_remaining_mask = (leiden_clusters == '12') & (adata_log.obs['predicted_cell_states'] != 'Mast_cells')
adata_log.obs.loc[cluster_12_remaining_mask, 'predicted_cell_states'] = 'Monocytes'

In [None]:
condition = (adata_log.obs['leiden'] == '12') & (adata_log.obs['macrophage_score'] > 4)
adata_log.obs.loc[condition, 'predicted_cell_states'] = 'Macrophages'

condition = (adata_log.obs['leiden'] == '12') & (adata_log.obs['DC_score'] > 4)
adata_log.obs.loc[condition, 'predicted_cell_states'] = 'DCs'

In [None]:
# remove categories 12 and 11
adata_log.obs['predicted_cell_states'] = adata_log.obs['predicted_cell_states'].cat.remove_categories(['12', '11'])

In [None]:
adata_log.obs['Cell_States'] = None
# Make column categorical
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].astype('category')

# Copy 'Keratinocytes' to 'Cell_States' if 'predicted_cell_states' == 'Keratinocytes'
adata_log.obs['Cell_States'] = adata_log.obs['Cell_States'].cat.add_categories(['Monocytes', 'Macrophages', 'DCs'])
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Monocytes'] = 'Monocytes'
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'Macrophages'] = 'Macrophages'
adata_log.obs['Cell_States'][adata_log.obs['predicted_cell_states'] == 'DCs'] = 'DCs'

In [None]:
adata_log.obs

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['macrophage_score', 'monocyte_score', 'DC_score', 'Cell_States'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['C1QA', 'CD68', 'TREM2', 'S100A9', 'LYZ', 'FCN1', 'CD1C', 'FCER1A', 'CLEC10A'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
# Copy predicted_cell_states to Cell_States
adata_log.obs['Cell_States'] = adata_log.obs['predicted_cell_states']

#delete cells with predicted_cell_states == 25
adata_log = adata_log[adata_log.obs['predicted_cell_states'] != '15']

In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['Cell_States'], size = 5, legend_fontsize = 5, ncols = 5, color_map='magma_r')

In [None]:
# create a barplot of the cell states in each condition 
adata_log.obs['Cell_States'].value_counts().plot(kind='bar', color = 'purple', figsize=(10, 6))

In [None]:
adata_log.obs['condition'].value_counts()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Assume 'Infected' is a condition indicating if a cell is infected or not
# We prepare the data by calculating the proportion of infected vs. non-infected cells within each Cell_State

# Counting the occurrences of each condition within each Cell_State
data_counts = adata_log.obs.groupby(['Cell_States', 'condition']).size().unstack(fill_value=0)

# Convert counts to proportions within each Cell_State
data_proportions = data_counts.div(data_counts.sum(axis=1), axis=0)

# Plotting
data_proportions.plot(kind='bar', stacked=True, figsize=(10, 7), color=['#1f77b4', '#ff7f0e'])  # Adjust colors as needed
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

+ Markers visualization

In [None]:
stem_cells_markers = ['KRT1', 'KRT5', 'KRT14', 'KRT15', 'ALDH2', 'ATP1B1', 'CD44', 'TFRC', 'DEFB1', 
                      'FASN', 'GJB2', 'ITGA6', 'KRT10', 'KRT19', 'KRT8', 
                      'KRT2', 'UBE2C', 'MLANA', 'PMEL', 'DCT', 'MITF', 'APOD', 'ATP1B1', 'KIT',
                     'ME1', 'ME2', 'PHLDA1', 'S100A1', 'SOX10', 'TYR', 
                     'TYRP1', 'DCT', 'NRXN1', 'SCN7A', 'ANK3', 'PECAM1', 'EMCN', 'CDH5', 'VWF', 'KDR', 'FLT1', 'TEK', 'CLDN5', 'GJA4', 'GJA5', 'HEY1', 'GATA2', 'SOX17', 'MECOM',
                     'ACKR1', 'NR2F2', 'PLVAP', 'LYVE1', 'PDPN', 'MMP2', 'COL1A1', 'COL1A2', 'NT5E', 'COL6A1',
                     'ACTA2', 'TAGLN', 'MYH11', 'PDGFRB', 'CNN1', 'TAGLN2', 'MYL9', 'RGS5', 'MYLK', 'HHIP', 'GJA4', 'NOX4',
                     'KCNJ8', 'ABCC9', 'VTN', 'ANPEP', 'CD248']

sc.pl.dotplot(adata_log, stem_cells_markers, groupby='Cell_States')

In [None]:
stem_cells_markers = ['CD8A', 'CD8B', 'CD4', 'CD40LG',
                    'FOXP3', 'TIGIT', 'CTLA4', 'KLRD1', 'GNLY', 'PRF1', 'GZMB', 'FCGR3A',
                    'NKG7', 'CD79A', 'JCHAIN', 'IGKC', 'MS4A1', 'CD19', 'IL4I1', 'RORC', 'KIT', 'S100A13', 'TLE1', 'AREG', 'C1QA', 'CD68', 'TREM2', 'S100A9', 'LYZ', 'FCN1', 'CD1C', 'FCER1A', 'CLEC10A']

sc.pl.dotplot(adata_log, stem_cells_markers, groupby='Cell_States')