### Notebook for the anndata files preparation before CCI analysis of fetal SC with other fetal cells
- **Developed by:** Anna Maguza
- **Place:** Wuerzburg Institute for System Immunology
- **Date:** 20th November 2023

### Import packages

In [1]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import anndata
import scipy as sp

### Set up the cells

In [2]:
%matplotlib inline

In [3]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'magma_r', dpi_save = 300, vector_friendly = True, format = 'svg')

-----
anndata     0.9.2
scanpy      1.9.5
-----
PIL                         10.0.1
anyio                       NA
arrow                       1.3.0
asttokens                   NA
attr                        23.1.0
attrs                       23.1.0
babel                       2.13.0
backcall                    0.2.0
certifi                     2023.07.22
cffi                        1.16.0
charset_normalizer          3.3.0
colorama                    0.4.6
comm                        0.1.4
cycler                      0.10.0
cython_runtime              NA
dateutil                    2.8.2
debugpy                     1.8.0
decorator                   5.1.1
defusedxml                  0.7.1
executing                   2.0.0
fastjsonschema              NA
fqdn                        NA
h5py                        3.9.0
idna                        3.4
igraph                      0.11.2
ipykernel                   6.25.2
ipywidgets                  8.1.1
isoduration                 NA
jedi   

In [4]:
def X_is_raw(adata):
    return np.array_equal(adata.X.sum(axis=0).astype(int), adata.X.sum(axis=0))

### Data Upload

In [17]:
# Read the file
input = 'FetalSC_data/Fetal_cells.h5ad'
adata = sc.read_h5ad(input)

In [18]:
X_is_raw(adata)

True

### Prepare SC-Neuronal objects

In [7]:
adata

AnnData object with n_obs × n_vars = 231646 × 26442
    obs: 'Sample_ID', 'Cell Type', 'Study_name', 'Donor_ID', 'Diagnosis', 'Age', 'Region code', 'Fraction', 'Sex', 'Library_Preparation_Protocol', 'batch', 'Age_group', 'Location', 'Cell States', 'Cell States GCA', 'Chem', 'Layer', 'Cell States Kong', 'dataset', 'n_genes_by_counts', 'total_counts', 'total_counts_mito', 'pct_counts_mito', 'total_counts_ribo', 'pct_counts_ribo', 'Cell_ID', '_scvi_batch', '_scvi_labels'
    var: 'feature_types-0-0-0', 'gene_name-1-0-0', 'gene_id-0-0', 'GENE-1-0'

In [9]:
adata.obs['Cell Type'].value_counts()

Cell Type
Mesenchymal        146597
Epithelial          46023
Neuronal            17154
Endothelial          8163
Myeloid              7180
T cells              4140
Red blood cells      1346
B cells              1043
Name: count, dtype: int64

In [19]:
# Extract neuronal cells
neuronal = adata[adata.obs['Cell Type'] == 'Neuronal']

  if not is_categorical_dtype(df_full[k]):


In [20]:
# Extract all values from neuronal.obs['Cell States'] into list
cell_states = []
for i in neuronal.obs['Cell States']:
    cell_states.append(i)

In [21]:
# Add 'FXYD3+_CKB+_SC', 'ASS1+_SLC40A1+_SC', 'RPS10+_RPS17+_SC' to cell_states
cell_states.append('FXYD3+_CKB+_SC')
cell_states.append('MTRNR2L12+ASS1+_SC')
cell_states.append('RPS10+_RPS17+_SC')

In [22]:
# Conver cell_states to a list
cell_states = list(cell_states)

In [23]:
# Create a new object from adata that contains only cells from cell_states
neuronal = adata[adata.obs['Cell States'].isin(cell_states)]

  if not is_categorical_dtype(df_full[k]):


In [24]:
neuronal.obs['Cell States'].value_counts()

Cell States
MTRNR2L12+ASS1+_SC      3979
RPS10+_RPS17+_SC        3544
ENCC/glia Progenitor    3234
cycling ENCC/glia       2923
Branch B1 (eMN)         1923
Branch A1 (iMN)         1712
Neuroblast              1650
Glia 3 (BCAN+)          1448
Glia 1 (DHH+)            956
cycling neuroblast       689
Branch A2 (IPAN/IN)      682
Glia 2 (ELN+)            552
Branch B2 (eMN)          501
Differentiating glia     459
FXYD3+_CKB+_SC           294
Branch A3 (IPAN/IN)      247
Branch A4 (IN)           127
Branch B3 (IPAN)          51
Name: count, dtype: int64

In [25]:
# Save raw counts
neuronal.raw = neuronal

In [27]:
# Save the file
neuronal.write('FetalSC_data/FetalSC_and_other_fetal_cells/FetalSC_and_neuronal_cells_raw.h5ad')

+ Preparing object for CellChat

In [28]:
adata_new = anndata.AnnData(X = neuronal.X, obs = neuronal.obs, var = neuronal.var)
adata_new

AnnData object with n_obs × n_vars = 24971 × 26442
    obs: 'Sample_ID', 'Cell Type', 'Study_name', 'Donor_ID', 'Diagnosis', 'Age', 'Region code', 'Fraction', 'Sex', 'Library_Preparation_Protocol', 'batch', 'Age_group', 'Location', 'Cell States', 'Cell States GCA', 'Chem', 'Layer', 'Cell States Kong', 'dataset', 'n_genes_by_counts', 'total_counts', 'total_counts_mito', 'pct_counts_mito', 'total_counts_ribo', 'pct_counts_ribo', 'Cell_ID', '_scvi_batch', '_scvi_labels'
    var: 'feature_types-0-0-0', 'gene_name-1-0-0', 'gene_id-0-0', 'GENE-1-0'

In [29]:
adata_new.obs['Cell States'].cat.categories

Index(['Branch A1 (iMN)', 'Branch A2 (IPAN/IN)', 'Branch A3 (IPAN/IN)',
       'Branch A4 (IN)', 'Branch B1 (eMN)', 'Branch B2 (eMN)',
       'Branch B3 (IPAN)', 'Differentiating glia', 'ENCC/glia Progenitor',
       'FXYD3+_CKB+_SC', 'Glia 1 (DHH+)', 'Glia 2 (ELN+)', 'Glia 3 (BCAN+)',
       'MTRNR2L12+ASS1+_SC', 'Neuroblast', 'RPS10+_RPS17+_SC',
       'cycling ENCC/glia', 'cycling neuroblast'],
      dtype='object')

In [30]:
sc.pp.normalize_total(adata_new, target_sum = 1e6, exclude_highly_expressed = True)
sc.pp.log1p(adata_new)
adata_new.X = sp.sparse.csc_matrix(adata_new.X)

normalizing counts per cell The following highly-expressed genes are not considered during normalization factor computation:
['CHGA', 'DEFA5', 'DEFA6', 'FTL', 'HBA1', 'HBA2', 'HBB', 'HBG2', 'MALAT1', 'MT-ATP6', 'MT-CO1', 'MT-CO2', 'MT-CO3', 'MT-CYB', 'MT-ND3', 'MT-ND4', 'MT-ND4L', 'MTRNR2L12', 'NEAT1', 'SST', 'TAC1']
    finished (0:00:00)


In [31]:
# Save the file
adata_new.write('FetalSC_data/FetalSC_and_other_fetal_cells/FetalSC_and_neuronal_cells_CellChat.h5ad')

### Prepare SC-Myeloid objects

In [32]:
adata

AnnData object with n_obs × n_vars = 231646 × 26442
    obs: 'Sample_ID', 'Cell Type', 'Study_name', 'Donor_ID', 'Diagnosis', 'Age', 'Region code', 'Fraction', 'Sex', 'Library_Preparation_Protocol', 'batch', 'Age_group', 'Location', 'Cell States', 'Cell States GCA', 'Chem', 'Layer', 'Cell States Kong', 'dataset', 'n_genes_by_counts', 'total_counts', 'total_counts_mito', 'pct_counts_mito', 'total_counts_ribo', 'pct_counts_ribo', 'Cell_ID', '_scvi_batch', '_scvi_labels'
    var: 'feature_types-0-0-0', 'gene_name-1-0-0', 'gene_id-0-0', 'GENE-1-0'

In [33]:
adata.obs['Cell Type'].value_counts()

Cell Type
Mesenchymal        146597
Epithelial          46023
Neuronal            17154
Endothelial          8163
Myeloid              7180
T cells              4140
Red blood cells      1346
B cells              1043
Name: count, dtype: int64

In [34]:
# Extract neuronal cells
myeloid = adata[adata.obs['Cell Type'] == 'Myeloid']

  if not is_categorical_dtype(df_full[k]):


In [35]:
# Extract all values from neuronal.obs['Cell States'] into list
cell_states = []
for i in myeloid.obs['Cell States']:
    cell_states.append(i)

In [36]:
# Add 'FXYD3+_CKB+_SC', 'ASS1+_SLC40A1+_SC', 'RPS10+_RPS17+_SC' to cell_states
cell_states.append('FXYD3+_CKB+_SC')
cell_states.append('MTRNR2L12+ASS1+_SC')
cell_states.append('RPS10+_RPS17+_SC')

In [37]:
# Conver cell_states to a list
cell_states = list(cell_states)

In [38]:
# Create a new object from adata that contains only cells from cell_states
myeloid = adata[adata.obs['Cell States'].isin(cell_states)]

  if not is_categorical_dtype(df_full[k]):


In [39]:
myeloid.obs['Cell States'].value_counts()

Cell States
MTRNR2L12+ASS1+_SC               3979
RPS10+_RPS17+_SC                 3544
Macrophages                      3105
LYVE1+ Macrophage                1757
cDC2                             1232
Monocytes                         439
FXYD3+_CKB+_SC                    294
cDC1                              210
CLC+ Mast cell                    121
MPO+ mono-neutrophil              114
pDC                               102
Megakaryocyte                      56
Mast cell                          31
Lymphoid DC                         9
MMP9+ Inflammatory macrophage       4
Name: count, dtype: int64

In [40]:
# Save raw counts
myeloid.raw = myeloid

In [41]:
# Save the file
myeloid.write('FetalSC_data/FetalSC_and_other_fetal_cells/FetalSC_and_myeloid_cells_raw.h5ad')

+ Preparing object for CellChat

In [42]:
adata_new = anndata.AnnData(X = myeloid.X, obs = myeloid.obs, var = myeloid.var)
adata_new

AnnData object with n_obs × n_vars = 14997 × 26442
    obs: 'Sample_ID', 'Cell Type', 'Study_name', 'Donor_ID', 'Diagnosis', 'Age', 'Region code', 'Fraction', 'Sex', 'Library_Preparation_Protocol', 'batch', 'Age_group', 'Location', 'Cell States', 'Cell States GCA', 'Chem', 'Layer', 'Cell States Kong', 'dataset', 'n_genes_by_counts', 'total_counts', 'total_counts_mito', 'pct_counts_mito', 'total_counts_ribo', 'pct_counts_ribo', 'Cell_ID', '_scvi_batch', '_scvi_labels'
    var: 'feature_types-0-0-0', 'gene_name-1-0-0', 'gene_id-0-0', 'GENE-1-0'

In [43]:
adata_new.obs['Cell States'].cat.categories

Index(['CLC+ Mast cell', 'FXYD3+_CKB+_SC', 'LYVE1+ Macrophage', 'Lymphoid DC',
       'MMP9+ Inflammatory macrophage', 'MPO+ mono-neutrophil',
       'MTRNR2L12+ASS1+_SC', 'Macrophages', 'Mast cell', 'Megakaryocyte',
       'Monocytes', 'RPS10+_RPS17+_SC', 'cDC1', 'cDC2', 'pDC'],
      dtype='object')

In [44]:
sc.pp.normalize_total(adata_new, target_sum = 1e6, exclude_highly_expressed = True)
sc.pp.log1p(adata_new)
adata_new.X = sp.sparse.csc_matrix(adata_new.X)

normalizing counts per cell The following highly-expressed genes are not considered during normalization factor computation:
['ACTB', 'CD74', 'CLC', 'CST3', 'CXCL8', 'DEFA5', 'DEFA6', 'FOS', 'FTH1', 'FTL', 'HBA2', 'HBG2', 'HSP90AA1', 'HSPA1A', 'HSPA6', 'HSPB1', 'LYZ', 'MALAT1', 'MT-ATP6', 'MT-ATP8', 'MT-CO1', 'MT-CO2', 'MT-CO3', 'MT-CYB', 'MT-ND3', 'MT-ND4', 'MT-ND4L', 'MTRNR2L12', 'NEAT1', 'PF4', 'PPBP', 'PRG2', 'S100A12', 'S100A8', 'S100A9', 'SPP1', 'SST', 'TMSB4X', 'TPSAB1']
    finished (0:00:00)


In [45]:
# Save the file
adata_new.write('FetalSC_data/FetalSC_and_other_fetal_cells/FetalSC_and_myeloid_cells_CellChat.h5ad')