## Notebook for the visualization of Stem Cells markers in Fetal Gut Stem cells

- **Developed by:** Anna Maguza
- **Place:** Wuerzburg Institute for System Immunology
- **Date:** 14th November 2023

### Load required modules

In [1]:
import numpy as np
import scipy as sp
import pandas as pd
import scanpy as sc

import matplotlib.pyplot as plt

In [2]:
%matplotlib inline
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["ps.fonttype"] = 42

In [3]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'magma_r', dpi_save = 300, vector_friendly = True, format = 'svg')

-----
anndata     0.9.2
scanpy      1.9.5
-----
PIL                         10.0.1
anyio                       NA
arrow                       1.3.0
asttokens                   NA
attr                        23.1.0
attrs                       23.1.0
babel                       2.13.0
backcall                    0.2.0
certifi                     2023.07.22
cffi                        1.16.0
charset_normalizer          3.3.0
colorama                    0.4.6
comm                        0.1.4
cycler                      0.10.0
cython_runtime              NA
dateutil                    2.8.2
debugpy                     1.8.0
decorator                   5.1.1
defusedxml                  0.7.1
executing                   2.0.0
fastjsonschema              NA
fqdn                        NA
h5py                        3.9.0
idna                        3.4
igraph                      0.11.2
ipykernel                   6.25.2
ipywidgets                  8.1.1
isoduration                 NA
jedi   

In [4]:
def X_is_raw(adata):
    return np.array_equal(adata.X.sum(axis=0).astype(int), adata.X.sum(axis=0))

### Data Upload


In [5]:
input_path = 'FetalSC_data/Fetal_healthy_stem_cells_leiden.h5ad'
adata = sc.read_h5ad(input_path)
adata

AnnData object with n_obs × n_vars = 7817 × 19868
    obs: 'Sample_ID', 'Cell Type', 'Study_name', 'Donor_ID', 'Diagnosis', 'Age', 'Region code', 'Fraction', 'Sex', 'Library_Preparation_Protocol', 'batch', 'Age_group', 'Location', 'Cell States', 'Cell States GCA', 'Chem', 'Layer', 'Cell States Kong', 'dataset', 'n_genes_by_counts', 'total_counts', 'total_counts_mito', 'pct_counts_mito', 'total_counts_ribo', 'pct_counts_ribo', 'Cell_ID', '_scvi_batch', '_scvi_labels', 'n_genes', 'n_counts', 'leiden', 'cluster'
    var: 'feature_types-0-0-0', 'gene_name-1-0-0', 'gene_id-0-0', 'GENE-1-0', 'n_counts', 'n_cells'
    uns: 'Age_colors', 'Age_group_colors', 'Donor_ID_colors', 'Library_Preparation_Protocol_colors', 'Sex_colors', '_scvi_manager_uuid', '_scvi_uuid', 'hvg', 'leiden', 'leiden_colors', 'neighbors', 'rank_genes_groups', 'umap'
    obsm: 'X_scVI', 'X_umap', '_scvi_extra_categorical_covs', '_scvi_extra_continuous_covs'
    obsp: 'connectivities', 'distances'

- Check if object is raw

In [12]:
X_is_raw(adata)

True

- Save raw counts

In [7]:
adata.raw = adata

- Normalize and log transform data

In [13]:
adata_log = adata.copy()
sc.pp.normalize_total(adata_log, target_sum = 1e6, exclude_highly_expressed = True)
sc.pp.log1p(adata_log)

normalizing counts per cell The following highly-expressed genes are not considered during normalization factor computation:
['AC104532.2', 'ACTA2', 'ACTB', 'ACTG2', 'ADAMDEC1', 'ADM', 'AFP', 'AGR2', 'AHSP', 'AL138963.3', 'APOA1', 'APOA4', 'APOC3', 'APOE', 'B2M', 'CAMP', 'CCK', 'CCL19', 'CCL2', 'CCL20', 'CCL21', 'CD69', 'CD74', 'CEBPD', 'CELA3A', 'CELA3B', 'CHGA', 'CLC', 'CLCA1', 'CLPS', 'COL1A1', 'COL1A2', 'COL3A1', 'COL6A2', 'CRABP1', 'CRYAB', 'CRYBA2', 'CST3', 'CSTB', 'CXCL10', 'CXCL13', 'CXCL14', 'CXCL8', 'DEFA5', 'DEFA6', 'DLK1', 'DNAJB1', 'DNASE1L3', 'EEF1A1', 'FABP1', 'FGL2', 'FOS', 'FTH1', 'FTL', 'GAL', 'GAPDH', 'GAST', 'GCG', 'GHRL', 'GIP', 'GNLY', 'GRP', 'GUCA2A', 'GUCA2B', 'HBA1', 'HBA2', 'HBB', 'HBE1', 'HBG1', 'HBG2', 'HBM', 'HBZ', 'HIST1H4C', 'HIST1H4H', 'HLA-DRA', 'HSP90AA1', 'HSP90B1', 'HSPA1A', 'HSPA1B', 'HSPA5', 'HSPA6', 'HSPB1', 'HSPH1', 'IGFBP3', 'IL7R', 'INSL5', 'ITLN1', 'JCHAIN', 'JUN', 'KLF6', 'LCN15', 'LGALS1', 'LGALS4', 'LTB', 'LYZ', 'MALAT1', 'MEG3', 'MLN', 'MR

### Check the enrichment of Fetal Stem Cells markers

In [19]:
df = adata_log.obs['Cell States'].value_counts()

In [None]:
stem_cells_markers = ['ASCL2', 'ATOH1', 'BMI1', 'CA12', 'CLU', 'GPX2', 'HMGCS2', 'LEFTY1', 'LGR5', 'LRIG1', 'MYC', 'OLFM4', 'SMOC2', 'TERT']

sc.pl.dotplot(adata_log, stem_cells_markers, groupby='Cell Type', cmap = 'magma_r', dot_max=1,
                   dot_min=0.01) 

In [11]:
stem_cells_markers = ['LGR5', 'BMI1', 'GJA1', 'TACSTD2', 'SOX2', 'NANOG', 'LY6H', 'SPP1', 'DCLK1',
                                            'CD44', 'DCLK1', 'TERT', 'ALCAM', 'ASCL2', 'BMPR1A', 'EPHB2']
# perform gene enrichment analysis on the stem cells markers
sc.tl.score_genes(adata_log, stem_cells_markers, score_name = "Stem_cells_markers_score")

computing score 'Stem_cells_markers_score'
    finished: added
    'Stem_cells_markers_score', score of gene set (adata.obs).
    500 total control genes are used. (0:00:00)


In [None]:
sc.set_figure_params(dpi=300)
sc.pl.umap(adata_log, frameon = False, color = ['cluster', 'LGR5', 'BMI1', 'GJA1', 'TACSTD2', 'SOX2', 'NANOG', 'LY6H', 'SPP1', 'DCLK1',
                                            'CD44', 'DCLK1', 'TERT', 'ALCAM', 'ASCL2', 'BMPR1A', 'EPHB2', 'Stem_cells_markers_score'], color_map='magma_r', size = 5, legend_fontsize = 5, ncols = 4)

In [None]:
# Create a violin plot
sc.pl.violin(adata_log, ['LGR5', 'BMI1', 'GJA1', 'TACSTD2', 'SOX2', 'NANOG', 'LY6H', 'SPP1'], groupby='cluster', rotation=90)

In [None]:
# Create a violin plot
sc.pl.violin(adata_log, ['DCLK1', 'CD44', 'DCLK1', 'TERT', 'ALCAM', 'ASCL2', 'BMPR1A', 'EPHB2'], groupby='cluster', rotation=90)

In [None]:
stem_cells_markers = ['LGR5', 'BMI1', 'GJA1', 'TACSTD2', 'SOX2', 'NANOG', 'LY6H', 'SPP1', 'DCLK1', 'CD44', 'DCLK1', 'TERT', 'ALCAM', 'ASCL2', 'BMPR1A', 'EPHB2', 'ACAD10', 
                      'ACVR1C', 'ARSE', 'ASCL2', 'ATP10B', 'C16orf89', 'C6orf136', 'CDCA7', 'CFTR', 'CHMP4C', 'CHP2', 'CLDN15', 'CLDN18', 'CLDN2', 'CPA6', 'DAPK2', 'DDC', 
                      'EFNA3', 'EPHB2', 'EVPL', 'F2RL1', 'FBLN2', 'FOXD2-AS1', 'GATA6-AS1', 'GDF15', 'GJB1', 'GJB2', 'GOLT1A', 'GPX2', 'HNF1A', 'HSD17B2', 'ITPKC', 
                      'LEFTY1', 'LIPG', 'MGST1', 'MSI1', 'MYOM3', 'NOX1', 'OLFM4', 'PCSK9', 'PDZD3', 'PHLDA1', 'PKP2', 'PLAGL2', 'PLEKHH1', 
                      'PPP1R1B', 'PTGDR', 'PTK7', 'RGMB', 'RNF157', 'RNF186', 'SFN', 'SLC27A2', 'SLC38A4', 'SLPI', 'SULT1B1', 'TAF4B', 'TANC1', 'TMEM171', 'TSPAN8', 
                      'URB1-AS1', 'ZBED9', 'ZNF296']

sc.pl.dotplot(adata_log, stem_cells_markers, groupby='cluster')

In [None]:
stem_cells_markers = ['LGR5', 'BMI1', 'GJA1', 'TACSTD2', 'SOX2', 'NANOG', 'LY6H', 'SPP1', 'DCLK1', 'CD44', 'DCLK1', 'TERT', 'ALCAM', 'ASCL2', 'BMPR1A', 'EPHB2', 'ACAD10', 
                      'ACVR1C', 'ARSE', 'ASCL2', 'ATP10B', 'C16orf89', 'C6orf136', 'CDCA7', 'CFTR', 'CHMP4C', 'CHP2', 'CLDN15', 'CLDN18', 'CLDN2', 'CPA6', 'DAPK2', 'DDC', 
                      'EFNA3', 'EPHB2', 'EVPL', 'F2RL1', 'FBLN2', 'FOXD2-AS1', 'GATA6-AS1', 'GDF15', 'GJB1', 'GJB2', 'GOLT1A', 'GPX2', 'HNF1A', 'HSD17B2', 'ITPKC', 
                      'LEFTY1', 'LIPG', 'MGST1', 'MSI1', 'MYOM3', 'NOX1', 'OLFM4', 'PCSK9', 'PDZD3', 'PHLDA1', 'PKP2', 'PLAGL2', 'PLEKHH1', 
                      'PPP1R1B', 'PTGDR', 'PTK7', 'RGMB', 'RNF157', 'RNF186', 'SFN', 'SLC27A2', 'SLC38A4', 'SLPI', 'SULT1B1', 'TAF4B', 'TANC1', 'TMEM171', 
                      'URB1-AS1', 'ZBED9', 'ZNF296']

sc.pl.dotplot(adata_log, stem_cells_markers, groupby='cluster')