### Notebook for the deconvolution of 10X Visium lung sample using `cell2location`

- **Developed by**: Carlos Talavera-López Ph.D
- **Institute of Computational Biology - COmputational Health Centre - Helmholtz Munchen**
- v221205

### Load required modules

In [None]:
import scvi
import anndata
import numpy as np
import scanpy as sc
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
os.environ["THEANO_FLAGS"] = 'device = cuda,floatX = float32,force_device = True'
import cell2location

### Set up working environment

In [None]:
%config InlineBackend.figure_format = 'retina'
%config InlineBackend.print_figure_kwargs = {'facecolor' : "w"}

In [None]:
results_folder = '/home/cartalop/data/carlos/single_cell/autotalker/public_datasets/meyer_lung/c2l/'
ref_run_name = f'{results_folder}/reference_signatures'
run_name = f'{results_folder}/cell2location_map'

In [None]:
plt.show()

sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 160, color_map = 'RdPu', dpi_save = 300, vector_friendly = True, format = 'svg', fontsize = 8)


### Load Visium sample

- The sample has already been formatted into an `anndata` object by the authors

In [None]:
adata_vis = sc.read_h5ad('/home/cartalop/data/carlos/single_cell/autotalker/public_datasets/meyer_lung/6332STDY9479172.h5ad')
adata_vis.obs['sample'] = '6332STDY10289520'
adata_vis

In [None]:
adata_vis.obs.head()

In [None]:
sc.pl.spatial(adata_vis, color = ['SLT: Pulmonary neuroendocrine', 'SLT: Vascular SMC 1', 'SLT: Pericyte', 'SLT: aDC 1'])

- Remove mitochondrial genes for downstream analysis

In [None]:
adata_vis.var['MT_gene'] = [gene.startswith('MT-') for gene in adata_vis.var_names]
adata_vis.obsm['MT'] = adata_vis[:, adata_vis.var['MT_gene'].values].X.toarray()
adata_vis = adata_vis[:, ~adata_vis.var['MT_gene'].values]
adata_vis

### Load scRNA-Seq reference

In [None]:
adata_ref = sc.read_h5ad('/home/cartalop/data/carlos/single_cell/lung/hlca/HLCA_raw_100K_subset.h5ad')
adata_ref

- Check if `anndata.X` is raw

In [None]:
def X_is_raw(adata):
    return np.array_equal(adata.X.sum(axis=0).astype(int), adata.X.sum(axis=0))

In [None]:
X_is_raw(adata_ref)

### Select informative genes in reference for deconvolutrion

In [None]:
from cell2location.utils.filtering import filter_genes
selected = filter_genes(adata_ref, cell_count_cutoff = 5, cell_percentage_cutoff2 = 0.03, nonz_mean_cutoff = 1.12)

- Filter object using these genes

In [None]:
adata_ref = adata_ref[:, selected].copy()
adata_ref

In [None]:
adata_ref.obs['ann_level_4'].value_counts()

### Estimation of reference cell type signatures (NB regression)

In [None]:

from cell2location.models import RegressionModel

RegressionModel.setup_anndata(
    adata = adata_ref,
    batch_key = 'sample',
    labels_key = 'ann_level_4',
    categorical_covariate_keys = ['sequencing_platform']
)
mod = RegressionModel(adata_ref)
mod.view_anndata_setup()

In [None]:
mod.train(
    max_epochs = 250,
    batch_size = 2500,
    train_size = 1,
    lr = 0.002,
)

In [None]:
mod.plot_history(20)

### Export cell type abundance estimates

In [None]:
adata_ref = mod.export_posterior(
    adata_ref,
    sample_kwargs = {
        'num_samples': 1000,
        'batch_size': 2500,
        'use_gpu': True
    },
)

### Save model

In [None]:
mod.save(f"{ref_run_name}", overwrite = True)

### Save up annotated Visium object

In [None]:
adata_file = f"{ref_run_name}/sc.h5ad"
adata_ref.write(adata_file)
adata_file

### Evaluate model QC

In [None]:
mod.plot_QC()

### Extract reference cell types signatures as a `pandas` dataframe

In [None]:
if 'means_per_cluster_mu_fg' in adata_ref.varm.keys():
    inf_aver = adata_ref.varm['means_per_cluster_mu_fg'][[f'means_per_cluster_mu_fg_{i}'
                                    for i in adata_ref.uns['mod']['factor_names']]].copy()
else:
    inf_aver = adata_ref.var[[f'means_per_cluster_mu_fg_{i}'
                                    for i in adata_ref.uns['mod']['factor_names']]].copy()
inf_aver.columns = adata_ref.uns['mod']['factor_names']
inf_aver.iloc[0:5, 0:5]

### Spatial mapping with `cell2location`

- Find shared genes and subset both anndata and reference signatures

In [None]:
intersect = np.intersect1d(adata_vis.var_names, inf_aver.index)
adata_vis = adata_vis[:, intersect].copy()
inf_aver = inf_aver.loc[intersect, :].copy()

In [None]:
from cell2location.models import Cell2location
Cell2location.setup_anndata(adata = adata_vis, batch_key = "sample")

mod = Cell2location(
    adata_vis, cell_state_df = inf_aver,
    N_cells_per_location = 30,
    detection_alpha = 200
)
mod.view_anndata_setup()

mod.train(
    max_epochs = 30000,
    batch_size = None,
    train_size = 1,
)

- Plot ELBO loss history during training, removing first 100 epochs from the plot

In [None]:
mod.plot_history(1000)
plt.legend(labels = ['full data training']);

### Exporting estimated posterior distributions of cell abundance and saving results

In [None]:
adata_vis = mod.export_posterior(
    adata_vis, sample_kwargs = {'num_samples': 1000, 'batch_size': mod.adata.n_obs, 'use_gpu': True}
)

In [None]:
mod.save(f"{run_name}", overwrite = True)

### Save `anndata` object with results

In [None]:
adata_file = f"{run_name}/6332STDY9479172_c2l_ctl221206.h5ad"
adata_vis.write(adata_file)
adata_file

In [None]:
mod.plot_QC()

In [None]:
fig = mod.plot_spatial_QC_across_batches()

### Visualising cell abundance in spatial coordinates

- Add 5% quantile, representing confident cell abundance, 'at least this amount is present', to `adata.obs` with cell names for plotting

In [None]:
adata_vis.obs[adata_vis.uns['mod']['factor_names']] = adata_vis.obsm['q05_cell_abundance_w_sf']

In [None]:
#from cell2location.utils import select_slide
#slide = select_slide(adata_vis, 'V1_Human_Lymph_Node')

slide = adata_vis.copy()

In [None]:
slide

In [None]:
# plot in spatial coordinates
with mpl.rc_context({'axes.facecolor':  'black',
                     'figure.figsize': [4.5, 5]}):
    colors = ['Alveolar macrophages', 'Alveolar fibroblasts', 'Classical monocytes', 'Ionocyte', 'Basal resting', 'Multiciliated', 'Tuft', 'Suprabasal']
    sc.pl.spatial(
        slide,
        cmap = 'magma',
        # show first 8 cell types
        color = colors,
        ncols = 4,
        size = 1.3,
        img_key = 'hires',
        # limit color scale at 99.2% quantile of cell abundance
        vmin = 0,
        vmax = 'p99.2'
    )

### Plot multiple cell types in one panel

In [None]:
from cell2location.plt import plot_spatial

clust_labels = ['Alveolar macrophages', 'Alveolar fibroblasts', 'Classical monocytes', 'Ionocyte']
clust_col = ['' + str(i) for i in clust_labels]

with mpl.rc_context({'figure.figsize': (10, 10)}):
    fig = plot_spatial(
        adata = slide,
        color = clust_col, labels = clust_labels,
        show_img = True,
        style = 'fast',
        max_color_quantile = 0.992,
        circle_diameter = 6,
        colorbar_position = 'right'
    )

In [None]:
slide