In [1]:
import scanpy as sc
import scanpy.external as sce
import tempfile
import anndata as ad
import os
import torch
import scvi
import ray
import matplotlib.pyplot as plt
from ray import tune
from scvi import autotune
torch.set_float32_matmul_precision('high')
os.chdir("/data")


In [2]:
scvi.__version__

'1.2.0'

In [3]:
torch.cuda.is_available()

True

In [4]:
adata = sc.read_h5ad("h5ad/doublet_filtered.h5ad")

In [5]:
scvi.data.poisson_gene_selection(adata, layer='counts', n_top_genes=8000, subset=True, batch_key="sample_id") # selecting HVGs using poisson sampling

Sampling from binomial...:   0%|          | 0/10000 [00:00<?, ?it/s]

Sampling from binomial...:   0%|          | 0/10000 [00:00<?, ?it/s]

Sampling from binomial...:   0%|          | 0/10000 [00:00<?, ?it/s]

Sampling from binomial...:   0%|          | 0/10000 [00:00<?, ?it/s]

Sampling from binomial...:   0%|          | 0/10000 [00:00<?, ?it/s]

Sampling from binomial...:   0%|          | 0/10000 [00:00<?, ?it/s]

Sampling from binomial...:   0%|          | 0/10000 [00:00<?, ?it/s]

Sampling from binomial...:   0%|          | 0/10000 [00:00<?, ?it/s]

In [7]:
model_cls = scvi.model.SCVI

model_cls.setup_anndata(adata, layer="counts", batch_key='sample_id')

search_space = {
    "model_params": {"n_hidden": tune.choice([64, 128, 256]), 
                     "n_layers": tune.choice([1, 2, 3, 4]),
                     "n_latent": tune.choice([10, 20, 30, 40, 50]),
                     "gene_likelihood": tune.choice(["nb", "zinb"])
                    },
    "train_params": {"max_epochs": 100,
                     "plan_kwargs": {"lr": tune.loguniform(1e-4, 1e-2)}}}

In [None]:
ray.init(log_to_driver=False)

In [None]:
results = autotune.run_autotune(
    model_cls,
    data=adata,
    mode="min",
    metrics="validation_loss",
    search_space=search_space,
    num_samples=120,
    resources={"cpu": 5, "gpu": 1},
    logging_dir="/opt/dlami/nvme"    
)

0,1
Current time:,2024-10-08 22:23:11
Running for:,02:16:04.45
Memory:,29.6/60.5 GiB

Trial name,status,loc,model_params/gene_li kelihood,model_params/n_hidde n,model_params/n_laten t,model_params/n_layer s,train_params/max_epo chs,train_params/plan_kw args/lr,iter,total time (s),validation_loss
_trainable_389046a1,RUNNING,12.0.0.75:12024,nb,256,40,2,100,0.00524906,11.0,72.5345,7631.6
_trainable_9ea70b1d,PENDING,,nb,256,40,2,100,0.00225791,,,
_trainable_0414f233,TERMINATED,12.0.0.75:12024,zinb,64,40,2,100,0.000823858,1.0,7.98707,8507.42
_trainable_04dc9c06,TERMINATED,12.0.0.75:12024,zinb,128,20,2,100,0.000539716,1.0,7.44745,8373.02
_trainable_0534e71d,TERMINATED,12.0.0.75:12024,nb,128,30,1,100,0.00735901,16.0,89.8206,7649.27
_trainable_05a0e40c,TERMINATED,12.0.0.75:12024,nb,256,30,1,100,0.000197739,1.0,6.34256,8659.28
_trainable_0fd454f7,TERMINATED,12.0.0.75:12024,nb,64,10,4,100,0.00276395,2.0,15.7941,8040.75
_trainable_10082111,TERMINATED,12.0.0.75:12024,nb,256,10,2,100,0.000695989,1.0,7.25631,8478.11
_trainable_117a24b7,TERMINATED,12.0.0.75:12024,nb,256,20,2,100,0.00233862,2.0,13.7472,7894.62
_trainable_11e3363e,TERMINATED,12.0.0.75:12024,nb,256,40,3,100,0.000101841,1.0,7.834,8795.93


In [17]:
df=results.result_grid.get_dataframe()

In [18]:
df.to_csv("nb/scvi/autotune_results.csv")