### Notebook to merge all cell compartment objects and format final object for production

- **Developed by:** Carlos Talavera-López Ph.D
- **Würzburg Institute for Systems Immunology & Julius-Maximilian-Universität Würzburg**
- v230811

### Import required modules

In [1]:
import anndata
import numpy as np
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt

### Set up working environment

In [2]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'magma_r', dpi_save = 300, vector_friendly = True, format = 'svg')

-----
anndata     0.9.2
scanpy      1.9.4
-----
PIL                 10.0.0
asttokens           NA
backcall            0.2.0
comm                0.1.4
cycler              0.10.0
cython_runtime      NA
dateutil            2.8.2
debugpy             1.6.7.post1
decorator           5.1.1
executing           1.2.0
h5py                3.9.0
importlib_resources NA
ipykernel           6.25.1
ipywidgets          8.1.0
jedi                0.19.0
joblib              1.3.2
kiwisolver          1.4.5
llvmlite            0.40.1
matplotlib          3.7.2
mpl_toolkits        NA
natsort             8.4.0
numba               0.57.1
numpy               1.24.4
packaging           23.1
pandas              2.0.3
parso               0.8.3
pexpect             4.8.0
pickleshare         0.7.5
pkg_resources       NA
platformdirs        3.10.0
prompt_toolkit      3.0.39
psutil              5.9.5
ptyprocess          0.7.0
pure_eval           0.2.2
pydev_ipython       NA
pydevconsole        NA
pydevd              2.9

In [3]:
def X_is_raw(adata):
    return np.array_equal(adata.X.sum(axis = 0).astype(int), adata.X.sum(axis = 0))

### Read in individual cell compartment objects

In [5]:
epi_mixed = sc.read_h5ad('../data/Epithelial_Mixed_states_locked_ctl230730.raw.h5ad')
del(epi_mixed.uns)
del(epi_mixed.obsm)
del(epi_mixed.obsp)
epi_mixed

AnnData object with n_obs × n_vars = 12256 × 27208
    obs: 'sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', '_scvi_batch', '_scvi_labels', 'IAV_score', 'group', 'C_scANVI', 'cell_type', 'leiden', 'leiden_states', 'seed_labels', 'Viral_score', 'C_scANVI_v2', 'mixed_states', 'cell_states'
    var: 'mt', 'ribo', 'n_cells_by_counts-V1', 'mean_counts-V1', 'pct_dropout_by_counts-V1', 'total_counts-V1', 'n_cells_by_counts-V2', 'mean_counts-V2', 'pct_dropout_by_counts-V2', 'total_counts-V2', 'n_cells_by_counts-V3', 'mean_counts-V3', 'pct_dropout_by_counts-V3', 'total_counts-V3', 'n_cells_by_counts-V4', 'mean_counts-V4', 'pct_dropout_by_counts-V4', 'tota

In [6]:
epi_goblet = sc.read_h5ad('../data/Epithelial_Goblet_states_locked_ctl230811.raw.h5ad')
del(epi_goblet.uns)
del(epi_goblet.obsm)
del(epi_goblet.obsp)
epi_goblet

AnnData object with n_obs × n_vars = 22192 × 27208
    obs: 'sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', '_scvi_batch', '_scvi_labels', 'IAV_score', 'group', 'C_scANVI', 'cell_type', 'leiden', 'leiden_states', 'seed_labels', 'Viral_score', 'C_scANVI_v2', 'cell_states'
    var: 'mt', 'ribo', 'n_cells_by_counts-V1', 'mean_counts-V1', 'pct_dropout_by_counts-V1', 'total_counts-V1', 'n_cells_by_counts-V2', 'mean_counts-V2', 'pct_dropout_by_counts-V2', 'total_counts-V2', 'n_cells_by_counts-V3', 'mean_counts-V3', 'pct_dropout_by_counts-V3', 'total_counts-V3', 'n_cells_by_counts-V4', 'mean_counts-V4', 'pct_dropout_by_counts-V4', 'total_counts-V4', 'n

In [7]:
epi_basal = sc.read_h5ad('../data/Epithelial_Basal_states_locked_ctl230810.raw.h5ad')
del(epi_basal.uns)
del(epi_basal.obsm)
del(epi_basal.obsp)
epi_basal

AnnData object with n_obs × n_vars = 43045 × 27208
    obs: 'sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', '_scvi_batch', '_scvi_labels', 'IAV_score', 'group', 'C_scANVI', 'cell_type', 'leiden', 'leiden_states', 'seed_labels', 'Viral_score', 'C_scANVI_v2', 'cell_states'
    var: 'mt', 'ribo', 'n_cells_by_counts-V1', 'mean_counts-V1', 'pct_dropout_by_counts-V1', 'total_counts-V1', 'n_cells_by_counts-V2', 'mean_counts-V2', 'pct_dropout_by_counts-V2', 'total_counts-V2', 'n_cells_by_counts-V3', 'mean_counts-V3', 'pct_dropout_by_counts-V3', 'total_counts-V3', 'n_cells_by_counts-V4', 'mean_counts-V4', 'pct_dropout_by_counts-V4', 'total_counts-V4', 'n

In [8]:
epi_club = sc.read_h5ad('../data/Epithelial_Club_states_locked_ctl230808.raw.h5ad')
del(epi_club.uns)
del(epi_club.obsm)
del(epi_club.obsp)
epi_club

AnnData object with n_obs × n_vars = 15435 × 27208
    obs: 'sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', '_scvi_batch', '_scvi_labels', 'IAV_score', 'group', 'C_scANVI', 'cell_type', 'leiden', 'leiden_states', 'seed_labels', 'Viral_score', 'C_scANVI_v2', 'cell_states'
    var: 'mt', 'ribo', 'n_cells_by_counts-V1', 'mean_counts-V1', 'pct_dropout_by_counts-V1', 'total_counts-V1', 'n_cells_by_counts-V2', 'mean_counts-V2', 'pct_dropout_by_counts-V2', 'total_counts-V2', 'n_cells_by_counts-V3', 'mean_counts-V3', 'pct_dropout_by_counts-V3', 'total_counts-V3', 'n_cells_by_counts-V4', 'mean_counts-V4', 'pct_dropout_by_counts-V4', 'total_counts-V4', 'n

In [9]:
epi_ciliated = sc.read_h5ad('../data/Epithelial_MultiC_states_locked_ctl230807.raw.h5ad')
del(epi_ciliated.uns)
del(epi_ciliated.obsm)
del(epi_ciliated.obsp)
epi_ciliated

AnnData object with n_obs × n_vars = 4645 × 27208
    obs: 'sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', '_scvi_batch', '_scvi_labels', 'IAV_score', 'group', 'C_scANVI', 'cell_type', 'leiden', 'leiden_states', 'seed_labels', 'Viral_score', 'C_scANVI_v2', 'mixed_states', 'fine_states', 'cell_states'
    var: 'mt', 'ribo', 'n_cells_by_counts-V1', 'mean_counts-V1', 'pct_dropout_by_counts-V1', 'total_counts-V1', 'n_cells_by_counts-V2', 'mean_counts-V2', 'pct_dropout_by_counts-V2', 'total_counts-V2', 'n_cells_by_counts-V3', 'mean_counts-V3', 'pct_dropout_by_counts-V3', 'total_counts-V3', 'n_cells_by_counts-V4', 'mean_counts-V4', 'pct_dropout_by_cou

### Normalise annotations for individual clustering labels 

In [29]:
epi_mixed.obs['cell_states'].value_counts()

cell_states
MHCII+Club                  2474
Club_DHRS9+                 1934
iavAPC_Epi                  1498
Basal_SERPINE1+             1401
Basal_SERPINE2+             1180
RARRES1+lipGoblet            913
iavClub_lip                  880
Basal_IGFBP6+                861
SCGB1+KRT5-FOXA1+iavClub     837
SupraB_KRT16+                278
Name: count, dtype: int64

In [30]:
epi_mixed.obs['leiden'].value_counts()

leiden
0    2474
1    1934
2    1498
3    1401
4    1180
5     913
6     880
7     861
8     837
9     278
Name: count, dtype: int64

In [31]:
epi_mixed.obs['leiden'] = epi_mixed.obs['cell_states'].cat.rename_categories(['Mixed_0', 'Mixed_1', 'Mixed_2', 'Mixed_3', 'Mixed_4', 'Mixed_5', 'Mixed_6', 'Mixed_7', 'Mixed_8', 'Mixed_9'])
epi_mixed.obs['leiden'].cat.categories

Index(['Mixed_0', 'Mixed_1', 'Mixed_2', 'Mixed_3', 'Mixed_4', 'Mixed_5',
       'Mixed_6', 'Mixed_7', 'Mixed_8', 'Mixed_9'],
      dtype='object')

In [28]:
epi_goblet.obs['leiden'] = epi_goblet.obs['cell_states'].cat.rename_categories(['Goblet_0','Goblet_1','Goblet_2','Goblet_3','Goblet_4', 'Goblet_5'])
epi_goblet.obs['leiden'].cat.categories

Index(['Goblet_0', 'Goblet_1', 'Goblet_2', 'Goblet_3', 'Goblet_4', 'Goblet_5'], dtype='object')

In [25]:
epi_basal.obs['leiden'] = epi_basal.obs['cell_states'].cat.rename_categories(['Basal_0','Basal_1','Basal_2','Basal_3','Basal_4','Basal_5', 'Basal_6'])
epi_basal.obs['leiden'].cat.categories

Index(['Basal_0', 'Basal_1', 'Basal_2', 'Basal_3', 'Basal_4', 'Basal_5',
       'Basal_6'],
      dtype='object')

In [21]:
epi_club.obs['leiden'] = epi_club.obs['cell_states'].cat.rename_categories(['Club_0','Club_1','Club_2','Club_3','Club_4','Club_5', 'Club_6'])
epi_club.obs['leiden'].cat.categories

Index(['Club_0', 'Club_1', 'Club_2', 'Club_3', 'Club_4', 'Club_5', 'Club_6'], dtype='object')

In [20]:
epi_ciliated.obs['leiden'] = epi_ciliated.obs['cell_states'].cat.rename_categories(['MultiC_0','MultiC_1','MultiC_2','MultiC_3','MultiC_4','MultiC_5'])
epi_ciliated.obs['leiden'].cat.categories

Index(['MultiC_0', 'MultiC_1', 'MultiC_2', 'MultiC_3', 'MultiC_4', 'MultiC_5'], dtype='object')

### Merge all objects into a single one

In [32]:
adata = epi_ciliated.concatenate(epi_club, epi_basal, epi_goblet, epi_mixed, batch_key = 'cell_type', batch_categories = ['ciliated', 'club', 'basal', 'goblet', 'mixed'], join = 'inner') 
adata


See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html


AnnData object with n_obs × n_vars = 97573 × 27208
    obs: 'sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', '_scvi_batch', '_scvi_labels', 'IAV_score', 'group', 'C_scANVI', 'cell_type', 'leiden', 'leiden_states', 'seed_labels', 'Viral_score', 'C_scANVI_v2', 'mixed_states', 'fine_states', 'cell_states'
    var: 'mt', 'ribo', 'n_cells_by_counts-V1', 'mean_counts-V1', 'pct_dropout_by_counts-V1', 'total_counts-V1', 'n_cells_by_counts-V2', 'mean_counts-V2', 'pct_dropout_by_counts-V2', 'total_counts-V2', 'n_cells_by_counts-V3', 'mean_counts-V3', 'pct_dropout_by_counts-V3', 'total_counts-V3', 'n_cells_by_counts-V4', 'mean_counts-V4', 'pct_dropout_by_co

### Clean objects from individual features in `adata.obs` and `adata.var`

In [33]:
adata.obs = adata.obs[['sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', 'IAV_score', 'group', 'Viral_score', 'cell_type', 'cell_states', 'leiden']]
adata

AnnData object with n_obs × n_vars = 97573 × 27208
    obs: 'sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', 'IAV_score', 'group', 'Viral_score', 'cell_type', 'cell_states', 'leiden'
    var: 'mt', 'ribo', 'n_cells_by_counts-V1', 'mean_counts-V1', 'pct_dropout_by_counts-V1', 'total_counts-V1', 'n_cells_by_counts-V2', 'mean_counts-V2', 'pct_dropout_by_counts-V2', 'total_counts-V2', 'n_cells_by_counts-V3', 'mean_counts-V3', 'pct_dropout_by_counts-V3', 'total_counts-V3', 'n_cells_by_counts-V4', 'mean_counts-V4', 'pct_dropout_by_counts-V4', 'total_counts-V4', 'n_cells_by_counts-V5', 'mean_counts-V5', 'pct_dropout_by_counts-V5', 'total_counts-V5', 'n_

In [34]:
adata.var = adata.var[['mt', 'ribo']]
adata

AnnData object with n_obs × n_vars = 97573 × 27208
    obs: 'sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', 'IAV_score', 'group', 'Viral_score', 'cell_type', 'cell_states', 'leiden'
    var: 'mt', 'ribo'

### Normalise cell states labels

In [35]:
adata.obs['cell_states'] = adata.obs['cell_states'].astype('category')
adata.obs['cell_states'].cat.categories

Index(['APOD+Ciliated', 'Basal_IGFBP6+', 'Basal_SERPINE1+', 'Basal_SERPINE2+',
       'CCDC3+Basal1', 'Club_DHRS9+', 'FB-like_Basal', 'IGFBP+Basal',
       'ImmuneClub', 'Ionocyte', 'KRT14+AQP1+Secretory', 'KRT14+Goblet',
       'KRT17+Goblet', 'MHCII+Club', 'MKI67+pBasal', 'MUC5B+Goblet',
       'NOTCH3+SupraB', 'NOTCH+Basal2', 'OASiav_Ciliated', 'OMG+Ciliated',
       'RARRES1+lipGoblet', 'S100A2+Basal', 'SCGB1+KRT5-FOXA1+iavClub',
       'SCGB1A1+Deutero', 'SCGB1A1+Goblet', 'SupraB_KRT16+', 'TCN1+Club',
       'TNC+Basal', 'iavAPC_Epi', 'iavClub_lip', 'iavGoblet', 'ifnBasal',
       'ifnGoblet', 'mixGoblet1', 'mixGoblet2', 'p53_Ciliated'],
      dtype='object')

In [36]:
adata.obs['cell_states'] = adata.obs['cell_states'].cat.rename_categories(['APOD+Ciliated', 'IGFBP6+Basal', 'SERPINE1+Basal', 'SERPINE2+Basal',
       'CCDC3+Basal1', 'DHRS9+Club', 'FB-like_Basal', 'IGFBP+Basal',
       'ImmuneClub', 'Ionocyte', 'KRT14+AQP1+Secretory', 'KRT14+Goblet',
       'KRT17+Goblet', 'MHCII+Club', 'MKI67+pBasal', 'MUC5B+Goblet',
       'NOTCH+Basal2', 'NOTCH3+SupraB', 'OASiav_Ciliated', 'OMG+Ciliated',
       'RARRES1+lip_Goblet', 'S100A2+Basal', 'SCGB1+KRT5-FOXA1+iav_Club',
       'SCGB1A1+Deutero', 'SCGB1A1+Goblet', 'KRT16+SupraB', 'TCN1+Club',
       'TNC+Basal', 'iavAPC_Epi', 'iav-lip_Club', 'iav_Goblet', 'ifn_Basal',
       'ifn_Goblet', 'mixed_Goblet1', 'mixed_Goblet2', 'p53_Ciliated'])
adata.obs['cell_states'].cat.categories

Index(['APOD+Ciliated', 'IGFBP6+Basal', 'SERPINE1+Basal', 'SERPINE2+Basal',
       'CCDC3+Basal1', 'DHRS9+Club', 'FB-like_Basal', 'IGFBP+Basal',
       'ImmuneClub', 'Ionocyte', 'KRT14+AQP1+Secretory', 'KRT14+Goblet',
       'KRT17+Goblet', 'MHCII+Club', 'MKI67+pBasal', 'MUC5B+Goblet',
       'NOTCH+Basal2', 'NOTCH3+SupraB', 'OASiav_Ciliated', 'OMG+Ciliated',
       'RARRES1+lip_Goblet', 'S100A2+Basal', 'SCGB1+KRT5-FOXA1+iav_Club',
       'SCGB1A1+Deutero', 'SCGB1A1+Goblet', 'KRT16+SupraB', 'TCN1+Club',
       'TNC+Basal', 'iavAPC_Epi', 'iav-lip_Club', 'iav_Goblet', 'ifn_Basal',
       'ifn_Goblet', 'mixed_Goblet1', 'mixed_Goblet2', 'p53_Ciliated'],
      dtype='object')

### Create a column for cell-compartment

In [37]:
trans_from = [['APOD+Ciliated','OASiav_Ciliated', 'OMG+Ciliated', 'p53_Ciliated'],
['IGFBP6+Basal', 'SERPINE1+Basal', 'SERPINE2+Basal', 'CCDC3+Basal1', 'FB-like_Basal', 'IGFBP+Basal', 'MKI67+pBasal', 'NOTCH+Basal2', 'S100A2+Basal','TNC+Basal', 'ifnBasal', 'ifn_Basal'],
['DHRS9+Club', 'ImmuneClub', 'MHCII+Club', 'SCGB1+KRT5-FOXA1+iavClub', 'TCN1+Club', 'iavClub_lip', 'SCGB1+KRT5-FOXA1+iav_Club', 'iav-lip_Club'], 
['KRT14+Goblet', 'KRT17+Goblet', 'MUC5B+Goblet', 'RARRES1+lipGoblet', 'SCGB1A1+Goblet', 'iavGoblet', 'ifnGoblet', 'mixGoblet1', 'mixGoblet2', 'RARRES1+lip_Goblet', 'iav_Goblet', 'ifn_Goblet', 'mixed_Goblet1', 'mixed_Goblet2'],
['NOTCH3+SupraB', 'KRT16+SupraB'],
['Ionocyte'],
['iavAPC_Epi'],
['SCGB1A1+Deutero'],
['KRT14+AQP1+Secretory']]

trans_to = ['Ciliated', 'Basal', 'Club', 'Goblet', 'SupraB', 'Ionocyte', 'Epi', 'Deuterosomal', 'Secretory']

adata.obs['cell_compartment'] = [str(i) for i in adata.obs['cell_states']]
for leiden,celltype in zip(trans_from, trans_to):
    for leiden_from in leiden:
        adata.obs['cell_compartment'][adata.obs['cell_compartment'] == leiden_from] = celltype

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adata.obs['cell_compartment'][adata.obs['cell_compartment'] == leiden_from] = celltype
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adata.obs['cell_compartment'][adata.obs['cell_compartment'] == leiden_from] = celltype
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adata.obs['cell_compartment'][adata.obs['cell_compartment'] == leiden_from] = celltype
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pa

In [38]:
adata.obs['cell_compartment'] = adata.obs['cell_compartment'].astype('category')
adata.obs['cell_compartment'].cat.categories

Index(['Basal', 'Ciliated', 'Club', 'Deuterosomal', 'Epi', 'Goblet',
       'Ionocyte', 'Secretory', 'SupraB'],
      dtype='object')

### Make labels for batch-correction

In [39]:
adata.obs['seed_labels'] = adata.obs['cell_compartment'].copy()
adata.obs['seed_labels'] = adata.obs['seed_labels'].astype('category')
adata.obs['seed_labels'].cat.categories

Index(['Basal', 'Ciliated', 'Club', 'Deuterosomal', 'Epi', 'Goblet',
       'Ionocyte', 'Secretory', 'SupraB'],
      dtype='object')

### Export object

In [40]:
X_is_raw(adata)

True

In [41]:
adata

AnnData object with n_obs × n_vars = 97573 × 27208
    obs: 'sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', 'IAV_score', 'group', 'Viral_score', 'cell_type', 'cell_states', 'leiden', 'cell_compartment', 'seed_labels'
    var: 'mt', 'ribo'

In [42]:
adata.write('../data/Marburg_cell_states_locked_ctl230901.raw.h5ad')