### Notebook for the anndata file preparation of stem cells prior trajectory analysis
- **Developed by:** Anna Maguza
- **Place:** Wuerzburg Institute for System Immunology
- **Date:** 16th November 2023

### Import packages

In [1]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Set up the cells

In [2]:
%matplotlib inline

In [3]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'magma_r', dpi_save = 300, vector_friendly = True, format = 'svg')

-----
anndata     0.9.2
scanpy      1.9.5
-----
PIL                         10.0.1
anyio                       NA
arrow                       1.3.0
asttokens                   NA
attr                        23.1.0
attrs                       23.1.0
babel                       2.13.0
backcall                    0.2.0
certifi                     2023.07.22
cffi                        1.16.0
charset_normalizer          3.3.0
colorama                    0.4.6
comm                        0.1.4
cycler                      0.10.0
cython_runtime              NA
dateutil                    2.8.2
debugpy                     1.8.0
decorator                   5.1.1
defusedxml                  0.7.1
executing                   2.0.0
fastjsonschema              NA
fqdn                        NA
h5py                        3.9.0
idna                        3.4
igraph                      0.11.2
ipykernel                   6.25.2
ipywidgets                  8.1.1
isoduration                 NA
jedi   

In [4]:
def X_is_raw(adata):
    return np.array_equal(adata.X.sum(axis=0).astype(int), adata.X.sum(axis=0))

### Data Upload

In [8]:
input_path = 'Healthy_gut_reference_dataset/Integrated/Healthy_integrated_data_all_genes.h5ad'
adata = sc.read_h5ad(input_path)

In [10]:
input_path = 'FetalSC_data/Fetal_healthy_stem_cells_leiden.h5ad'
adata_fetalSC = sc.read_h5ad(input_path)

In [12]:
adata_fetalSC.obs['cluster'].value_counts()

cluster
MTRNR2L12+ASS1+_SC    3979
RPS10+_RPS17+_SC      3544
FXYD3+_CKB+_SC         294
Name: count, dtype: int64

In [13]:
# Extract the raw counts
adata = adata.raw.to_adata()
adata

AnnData object with n_obs × n_vars = 557099 × 26442
    obs: 'Sample_ID', 'Cell Type', 'Study_name', 'Donor_ID', 'Diagnosis', 'Age', 'Region code', 'Fraction', 'Sex', 'Library_Preparation_Protocol', 'batch', 'Age_group', 'Location', 'Cell States', 'Cell States GCA', 'Chem', 'Layer', 'Cell States Kong', 'dataset', 'n_genes_by_counts', 'total_counts', 'total_counts_mito', 'pct_counts_mito', 'total_counts_ribo', 'pct_counts_ribo', 'Cell_ID', '_scvi_batch', '_scvi_labels'
    var: 'feature_types-0-0-0', 'gene_name-1-0-0', 'gene_id-0-0', 'GENE-1-0'

In [15]:
X_is_raw(adata)

True

In [16]:
X_is_raw(adata_fetalSC)

True

In [17]:
# Extract fetal cells
adata_fetal = adata[adata.obs['Diagnosis'] == 'Fetal Healthy']

  if not is_categorical_dtype(df_full[k]):


### Prepare cluster names

In [18]:
# Extract the relevant data from adata_fetalSC
clusters = adata_fetalSC.obs['cluster']

# Join this data with adata_fetal
# This will align the 'cluster' data with the cell indices of adata_fetal
joined_data = adata_fetal.obs.join(clusters, how='left', rsuffix='_new')

# For cells that are in adata_fetalSC, update their 'Cell State' with the new cluster labels
# Cells not in adata_fetalSC will retain their original 'Cell State' values
adata_fetal.obs['Cell States'] = joined_data.apply(
    lambda row: row['cluster'] if pd.notna(row['cluster']) else row['Cell States'], axis=1
)

  adata_fetal.obs['Cell States'] = joined_data.apply(


In [19]:
df = adata_fetal.obs['Cell States'].value_counts()

In [21]:
# Write the file
output_path = 'FetalSC_data/Fetal_cells.h5ad'
adata_fetal.write_h5ad(output_path)