### Notebook to merge all cell compartment objects and format final object for production

- **Developed by:** Carlos Talavera-López Ph.D
- **Würzburg Institute for Systems Immunology & Julius-Maximilian-Universität Würzburg**
- v230811

### Import required modules

In [None]:
import anndata
import numpy as np
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt

### Set up working environment

In [None]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'magma_r', dpi_save = 300, vector_friendly = True, format = 'svg')

In [None]:
def X_is_raw(adata):
    return np.array_equal(adata.X.sum(axis=0).astype(int), adata.X.sum(axis = 0))

### Read in individual cell compartment objects

In [None]:
epi_mixed = sc.read_h5ad('../data/Epithelial_Mixed_states_locked_ctl230730.raw.h5ad')
del(epi_mixed.uns)
del(epi_mixed.obsm)
del(epi_mixed.obsp)
epi_mixed

In [None]:
epi_goblet = sc.read_h5ad('../data/Epithelial_Goblet_states_locked_ctl230811.raw.h5ad')
del(epi_goblet.uns)
del(epi_goblet.obsm)
del(epi_goblet.obsp)
epi_goblet

In [None]:
epi_basal = sc.read_h5ad('../data/Epithelial_Basal_states_locked_ctl230810.raw.h5ad')
del(epi_basal.uns)
del(epi_basal.obsm)
del(epi_basal.obsp)
epi_basal

In [None]:
epi_club = sc.read_h5ad('../data/Epithelial_Club_states_locked_ctl230808.raw.h5ad')
del(epi_club.uns)
del(epi_club.obsm)
del(epi_club.obsp)
epi_club

In [None]:
epi_ciliated = sc.read_h5ad('../data/Epithelial_MultiC_states_locked_ctl230807.raw.h5ad')
del(epi_ciliated.uns)
del(epi_ciliated.obsm)
del(epi_ciliated.obsp)
epi_ciliated

### Merge all objects into a single one

In [None]:
adata = epi_ciliated.concatenate(epi_club, epi_basal, epi_goblet, epi_mixed, batch_key = 'cell_type', batch_categories = ['ciliated', 'club', 'basal', 'goblet', 'mixed'], join = 'inner') 
adata

### Clean objects from individual features in `adata.obs` and `adata.var`

In [None]:
adata.obs = adata.obs[['sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', 'IAV_score', 'group', 'Viral_score', 'cell_type', 'cell_states']]
adata

In [None]:
adata.var = adata.var[['mt', 'ribo']]
adata

### Normalise cell states labels

In [None]:
adata.obs['cell_states'] = adata.obs['cell_states'].astype('category')
adata.obs['cell_states'].cat.categories

In [None]:
adata.obs['cell_states'] = adata.obs['cell_states'].cat.rename_categories(['APOD+Ciliated', 'IGFBP6+Basal', 'SERPINE1+Basal', 'SERPINE2+Basal',
       'CCDC3+Basal1', 'DHRS9+Club', 'FB-like_Basal', 'IGFBP+Basal',
       'ImmuneClub', 'Ionocyte', 'KRT14+AQP1+Secretory', 'KRT14+Goblet',
       'KRT17+Goblet', 'MHCII+Club', 'MKI67+pBasal', 'MUC5B+Goblet',
       'NOTCH+Basal2', 'NOTCH3+SupraB', 'OASiav_Ciliated', 'OMG+Ciliated',
       'RARRES1+lip_Goblet', 'S100A2+Basal', 'SCGB1+KRT5-FOXA1+iav_Club',
       'SCGB1A1+Deutero', 'SCGB1A1+Goblet', 'KRT16+SupraB', 'TCN1+Club',
       'TNC+Basal', 'iavAPC_Epi', 'iav-lip_Club', 'iav_Goblet', 'ifn_Basal',
       'ifn_Goblet', 'mixed_Goblet1', 'mixed_Goblet2', 'p53_Ciliated'])
adata.obs['cell_states'].cat.categories

### Create a column for cell-compartment

In [None]:
trans_from = [['APOD+Ciliated','OASiav_Ciliated', 'OMG+Ciliated', 'p53_Ciliated'],
['IGFBP6+Basal', 'SERPINE1+Basal', 'SERPINE2+Basal', 'CCDC3+Basal1', 'FB-like_Basal', 'IGFBP+Basal', 'MKI67+pBasal', 'NOTCH+Basal2', 'S100A2+Basal','TNC+Basal', 'ifnBasal', 'ifn_Basal'],
['DHRS9+Club', 'ImmuneClub', 'MHCII+Club', 'SCGB1+KRT5-FOXA1+iavClub', 'TCN1+Club', 'iavClub_lip', 'SCGB1+KRT5-FOXA1+iav_Club', 'iav-lip_Club'], 
['KRT14+Goblet', 'KRT17+Goblet', 'MUC5B+Goblet', 'RARRES1+lipGoblet', 'SCGB1A1+Goblet', 'iavGoblet', 'ifnGoblet', 'mixGoblet1', 'mixGoblet2', 'RARRES1+lip_Goblet', 'iav_Goblet', 'ifn_Goblet', 'mixed_Goblet1', 'mixed_Goblet2'],
['NOTCH3+SupraB', 'KRT16+SupraB'],
['Ionocyte'],
['iavAPC_Epi'],
['SCGB1A1+Deutero'],
['KRT14+AQP1+Secretory']]

trans_to = ['Ciliated', 'Basal', 'Club', 'Goblet', 'SupraB', 'Ionocyte', 'Epi', 'Deuterosomal', 'Secretory']

adata.obs['cell_compartment'] = [str(i) for i in adata.obs['cell_states']]
for leiden,celltype in zip(trans_from, trans_to):
    for leiden_from in leiden:
        adata.obs['cell_compartment'][adata.obs['cell_compartment'] == leiden_from] = celltype

In [None]:
adata.obs['cell_compartment'] = adata.obs['cell_compartment'].astype('category')
adata.obs['cell_compartment'].cat.categories

### Make labels for batch-correction

In [None]:
adata.obs['seed_labels'] = adata.obs['cell_compartment'].copy()
adata.obs['seed_labels'] = adata.obs['seed_labels'].astype('category')
adata.obs['seed_labels'].cat.categories

### Export object

In [None]:
X_is_raw(adata)

In [None]:
adata

In [None]:
adata.write('../data/Marburg_cell_states_locked_ctl230811.raw.h5ad')