In [1]:
import numpy as np
import scanpy as sc
from scvi.data import cellxgene

from scib_metrics.benchmark import Benchmarker, BioConservation

%matplotlib inline


  self.seed = seed
  self.dl_pin_memory_gpu_training = (
  dispatcher = jit(_target='npyufunc',
  dispatcher = jit(_target='npyufunc',
  dispatcher = jit(_target='npyufunc',
  dispatcher = jit(_target='npyufunc',


In [2]:
url = "https://cellxgene.cziscience.com/e/232f6a5a-a04c-4758-a6e8-88ab2e3a6e69.cxg/"
adata = cellxgene(url, filename="luca.h5ad", save_path="data/")

: 

: 

In [None]:
adata.var["highly_variable"] = np.asarray(adata.var["is_highly_variable"].astype(bool))
sc.tl.pca(adata)

In [None]:
import scvi
def trainModel(adata, prior, prior_kwargs = None, max_epochs = 100):
    scvi.model.SCVI.setup_anndata(adata, layer="counts", batch_key="batch")
    vae = scvi.model.SCVI(adata, prior_distribution = prior,prior_kwargs=prior_kwargs, n_layers=2, n_latent=30)
    vae.train(max_epochs=max_epochs,check_val_every_n_epoch=5)
    adata.obsm["scVI"] = vae.get_latent_representation()
    return adata, vae

In [None]:
adataSD, vaeSD = trainModel(adata, "sdnormal")

In [None]:
adataVP, vaeVP = trainModel(adata, "vamp")

In [None]:
adataBoth = adataSD
adataBoth.obsm["scVIN"] = adataSD.obsm["scVI"]
adataBoth.obsm["scVIV"] = adataVP.obsm["scVIVP"]

In [None]:
import faiss

from scib_metrics.nearest_neighbors import NeighborsOutput


def faiss_hnsw_nn(X: np.ndarray, k: int):
    """Gpu HNSW nearest neighbor search using faiss.

    See https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md
    for index param details.
    """
    X = np.ascontiguousarray(X, dtype=np.float32)
    res = faiss.StandardGpuResources()
    M = 32
    index = faiss.IndexHNSWFlat(X.shape[1], M, faiss.METRIC_L2)
    gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
    gpu_index.add(X)
    distances, indices = gpu_index.search(X, k)
    del index
    del gpu_index
    # distances are squared
    return NeighborsOutput(indices=indices, distances=np.sqrt(distances))


def faiss_brute_force_nn(X: np.ndarray, k: int):
    """Gpu brute force nearest neighbor search using faiss."""
    X = np.ascontiguousarray(X, dtype=np.float32)
    res = faiss.StandardGpuResources()
    index = faiss.IndexFlatL2(X.shape[1])
    gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
    gpu_index.add(X)
    distances, indices = gpu_index.search(X, k)
    del index
    del gpu_index
    # distances are squared
    return NeighborsOutput(indices=indices, distances=np.sqrt(distances))

In [None]:
import time

adata.obsm["Unintegrated"] = adata.obsm["X_pca"]

biocons = BioConservation(isolated_labels=False)

start = time.time()
bm = Benchmarker(
    adata,
    batch_key="sample",
    label_key="cell_type",
    embedding_obsm_keys=["Unintegrated", "scANVI", "scVIN","scVIV"],
    pre_integrated_embedding_obsm_key="X_pca",
    bio_conservation_metrics=biocons,
    n_jobs=-1,
)
bm.prepare(neighbor_computer=faiss_brute_force_nn)
bm.benchmark()
end = time.time()
print(f"Time: {int((end - start) / 60)} min {int((end - start) % 60)} sec")


In [None]:
bm.plot_results_table(min_max_scale=False)