ATLAS LEVEL INTEGRATION OF LUNG DATA

In [None]:
#All the following script and comments have been made in accordance to single cell  data ##

#Importing packages # Make sure that you activate correct environment (conda activate scvi-env)
import os
import tempfile
import scanpy as sc
import scvi
import seaborn as sns
import torch
from rich import print
from scib_metrics.benchmark import Benchmarker

output = '/Users/srivalli/Documents/GitHub/Single-cell-data-analysis/SCVI tools/output/lung data'

save_dir =  '/Users/srivalli/Desktop/Heart'


In [None]:
#Data loading and reading

adata_path = os.path.join(save_dir, "lung_atlas.h5ad")

adata = sc.read(
    adata_path,
    backup_url="https://figshare.com/ndownloader/files/24539942",
)
adata

In [None]:
#DATA PREPROCESSING#

#Data backup
adata.raw = adata  

#Selecting high variable genes
sc.pp.highly_variable_genes(
    adata,
    flavor="seurat_v3",
    n_top_genes=2000,
    layer="counts",
    batch_key="batch",
    subset=True,
)

In [None]:
#INTEGRATION WITH SCVII#  Usually when annotated data isnt available or unable to help

#Data labelling by finding common axes in data

#Data setup according to SCVI model
scvi.model.SCVI.setup_anndata(adata, layer="counts", batch_key="batch")

#Setting up model with data
model = scvi.model.SCVI(adata, n_layers=2, n_latent=30, gene_likelihood="nb")

#Training model
model.train()

In [None]:
#Evaluation of latent representation
SCVI_LATENT_KEY = "X_scVI"

#Adding latent rep data to anndata
adata.obsm[SCVI_LATENT_KEY] = model.get_latent_representation()

In [7]:
#Clustering data to view in SCVI latent space
sc.pp.neighbors(adata, use_rep=SCVI_LATENT_KEY)
sc.tl.leiden(adata,max = 50)

In [None]:
#Data visualization 

#Alternate to UMAP
SCVI_MDE_KEY = "X_scVI_MDE"

#Data preperation
adata.obsm[SCVI_MDE_KEY] = scvi.model.utils.mde(adata.obsm[SCVI_LATENT_KEY])

#Plotting
sc.pl.embedding(
    adata,
    basis=SCVI_MDE_KEY,
    color=["batch", "leiden"],
    frameon=False,
    ncols=1,
)

In [None]:
adata.obs

In [None]:
#Data visualization based on annotated data
sc.pl.embedding(adata, basis=SCVI_MDE_KEY, color=["cell_type"], frameon=False, ncols=1)

In [None]:
#INTEGRATION WITH scANVI # Usually ehen annotated data is available giving whole information

#We specify keys for both interested obs and info of unlablled data also
scanvi_model = scvi.model.SCANVI.from_scvi_model(
    model,
    adata=adata,
    labels_key="cell_type",
    unlabeled_category="Unknown",
)

In [None]:
#Model training
scanvi_model.train(max_epochs=20, n_samples_per_label=100)

In [None]:
#Data visualization 

#Alternate to UMAP
SCANVI_LATENT_KEY = "X_scANVI"
adata.obsm[SCANVI_LATENT_KEY] = scanvi_model.get_latent_representation(adata)

#Plotting
sc.pl.embedding(
    adata, basis=SCANVI_MDE_KEY, color=["cell_type"], ncols=1, frameon=False
)

In [None]:
#Data viewing for additional components added
adata

In [None]:
#COMPUTING INTEGRATION METRICS#

#Selecting metrics for plotting
bm = Benchmarker(
    adata,
    batch_key="batch",
    label_key="cell_type",
    embedding_obsm_keys=["X_pca", SCVI_LATENT_KEY, SCANVI_LATENT_KEY],
    n_jobs=-1,
)

#Viewing metrics
bm.benchmark()

#Plotting results in table
bm.plot_results_table(min_max_scale=False)

#Saving results as dataframe
df = bm.get_results(min_max_scale=False)
print(df)

#Saving as file
df.write(output)