### Notebook for the anndata file preparation of stem cells prior trajectory analysis
- **Developed by:** Anna Maguza
- **Place:** Wuerzburg Institute for System Immunology
- **Date:** 3rd November 2023

### Import packages

In [2]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Set up the cells

In [3]:
%matplotlib inline

In [4]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'magma_r', dpi_save = 300, vector_friendly = True, format = 'svg')

-----
anndata     0.9.2
scanpy      1.9.5
-----
PIL                         10.0.1
anyio                       NA
arrow                       1.3.0
asttokens                   NA
attr                        23.1.0
attrs                       23.1.0
babel                       2.13.0
backcall                    0.2.0
certifi                     2023.07.22
cffi                        1.16.0
charset_normalizer          3.3.0
colorama                    0.4.6
comm                        0.1.4
cycler                      0.10.0
cython_runtime              NA
dateutil                    2.8.2
debugpy                     1.8.0
decorator                   5.1.1
defusedxml                  0.7.1
executing                   2.0.0
fastjsonschema              NA
fqdn                        NA
h5py                        3.9.0
idna                        3.4
ipykernel                   6.25.2
ipywidgets                  8.1.1
isoduration                 NA
jedi                        0.19.1
jinja2 

In [5]:
def X_is_raw(adata):
    return np.array_equal(adata.X.sum(axis=0).astype(int), adata.X.sum(axis=0))

### Data Upload

In [6]:
input_path = 'Healthy_gut_reference_dataset/Integrated/Healthy_integrated_data_all_genes.h5ad'
adata = sc.read_h5ad(input_path)

In [9]:
df = adata.obs['Cell States'].value_counts()

In [11]:
# Extract stem cells (Stem cells OLFM4, Stem cells OLFM4 GSTA1)
stem_cells = ['Stem cells OLFM4', 'Stem cells OLFM4 GSTA1', 'Stem cells OLFM4 LGR5', 'Stem cells OLFM4 PCNA', 'Stem_Cells_GCA', 'Stem_Cells_ext']
adata_stem_cells = adata[adata.obs['Cell States'].isin(stem_cells), :]

  if not is_categorical_dtype(df_full[k]):


In [14]:
adata_stem_cells.var

Unnamed: 0_level_0,feature_types-0-0-0,gene_name-1-0-0,gene_id-0-0,GENE-1-0
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A1BG,Gene Expression,A1BG,ENSG00000121410,A1BG
A1BG-AS1,Gene Expression,A1BG-AS1,ENSG00000268895,A1BG-AS1
A1CF,Gene Expression,A1CF,ENSG00000148584,A1CF
A2M,Gene Expression,A2M,ENSG00000175899,A2M
A2M-AS1,Gene Expression,A2M-AS1,ENSG00000245105,A2M-AS1
...,...,...,...,...
ZXDC,Gene Expression,ZXDC,ENSG00000070476,ZXDC
ZYG11A,Gene Expression,ZYG11A,ENSG00000203995,ZYG11A
ZYG11B,Gene Expression,ZYG11B,ENSG00000162378,ZYG11B
ZYX,Gene Expression,ZYX,ENSG00000159840,ZYX


In [15]:
# Write stem cells to file
output_path = 'FetalSC_data/Healthy_stem_cells.h5ad'
adata_stem_cells.write_h5ad(output_path)

In [16]:
# Extract fetal stem cells (Fetal cells)
fetal_cells = ['Fetal Healthy']
adata_fetal_stem_cells = adata_stem_cells[adata_stem_cells.obs['Diagnosis'].isin(fetal_cells), :]

  if not is_categorical_dtype(df_full[k]):


In [18]:
# Write stem cells to file
output_path = 'FetalSC_data/Fetal_healthy_stem_cells.h5ad'
adata_fetal_stem_cells.write_h5ad(output_path)