### Notebook for the label transfer to blood samples using `scANVI`

- **Developed by:** Carlos Talavera-López Ph.D
- **Würzburg Institute for Systems Immunology & Julius-Maximilian-Universität Würzburg**
- v230710

### Import required modules

In [None]:
import scvi
import anndata
import warnings
import numpy as np
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt

### Set up working environment

In [None]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'magma_r', dpi_save = 300, vector_friendly = True, format = 'svg')

In [None]:
warnings.simplefilter(action = 'ignore')
scvi.settings.seed = 1712
%config InlineBackend.print_figure_kwargs = {'facecolor' : "w"}
%config InlineBackend.figure_format = 'retina'

In [None]:
arches_params = dict(
    use_layer_norm = "both",
    use_batch_norm = "none",
    encode_covariates = True,
    dropout_rate = 0.2,
    n_layers = 2,
)

### Read in Healthy data

In [None]:
SCC0120_1_blood = sc.read_h5ad('../data/SCC0120_1_Blood_scANVI_states_ctl230704.h5ad')
SCC0120_1_blood

In [None]:
SCC0120_1_blood.obs['sample'].cat.categories

In [None]:
SCC0120_1_PBMC = SCC0120_1_blood[SCC0120_1_blood.obs['sample'].isin(['pbmc_1'])]
SCC0120_1_PBMC

In [None]:
SCC0120_2_PBMC = SCC0120_1_blood[SCC0120_1_blood.obs['sample'].isin(['pbmc_2'])]
SCC0120_2_PBMC

In [None]:
SCC0120_1_PBMC.obs['seed_labels'] = SCC0120_1_PBMC.obs['C_scANVI'].copy()
SCC0120_2_PBMC.obs['seed_labels'] = 'Unknown'

In [None]:
adata = SCC0120_1_PBMC.concatenate(SCC0120_1_PBMC, batch_key = 'pbmc_batch', batch_categories = ['pbmc_1', 'pbmc_2'], join = 'inner')
adata

In [None]:
adata.obs['seed_labels'].value_counts()

### Select HVGs

In [None]:
adata_raw = adata.copy()
adata.layers['counts'] = adata.X.copy()

sc.pp.highly_variable_genes(
    adata,
    flavor = "seurat_v3",
    n_top_genes = 7000,
    layer = "counts",
    batch_key = "pbmc_batch",
    subset = True
)

adata

### Transfer of annotation with scANVI

In [None]:
scvi.model.SCVI.setup_anndata(adata,
                        batch_key = 'pbmc_batch',
                        labels_key = 'seed_labels',
                        categorical_covariate_keys = ['donor', 'batch'],                    
                        continuous_covariate_keys = ['n_genes', 'n_counts'], 
                        layer = 'counts')

In [None]:
scvi_model = scvi.model.SCVI(adata, 
                             n_latent = 50, 
                             n_layers = 3,
                             dispersion = 'gene-batch',
                             gene_likelihood = 'nb')

In [None]:
scvi_model.train(use_gpu = False)

### Label transfer with `scANVI` 

In [None]:
scanvi_model = scvi.model.SCANVI.from_scvi_model(scvi_model, 'Unknown')

In [None]:
scanvi_model.train(use_gpu = False)

In [None]:
adata.obs["C_scANVI"] = scanvi_model.predict(adata)

- Extract latent representation

In [None]:
adata.obsm["X_scVI"] = scvi_model.get_latent_representation(adata)

- Visualise corrected dataset

In [None]:
sc.pp.neighbors(adata, use_rep = "X_scVI", n_neighbors = 50, metric = 'minkowski')
sc.tl.umap(adata, min_dist = 0.3, spread = 1, random_state = 1712)
sc.pl.umap(adata, frameon = False, color = ['pbmc_batch', 'condition', 'seed_labels', 'batch', 'C_scANVI'], size = 10, legend_fontsize = 5, ncols = 3)

### Export annotated object

In [None]:
adata_skin = anndata.AnnData(X = adata_raw.X, var = adata_raw.var, obs = adata.obs, obsm = adata.obsm) 
adata_skin

In [None]:
SCC0120_1_skin_annotated = adata_skin[adata_skin.obs['group'].isin(['SCC0120_1_skin'])]
SCC0120_1_skin_annotated