# Step 2B (Optional): scVI Analysis
Deep generative modeling with scvi-tools.

In [None]:
import pandas as pd
import scanpy as sc
from pathlib import Path

try:
    import scvi
except ImportError as e:
    raise ImportError('Install optional dependency: pip install scvi-tools') from e

adata_path = Path('data/processed/human_breast_cancer_single_cell.h5ad')
if not adata_path.exists():
    raise FileNotFoundError('Run 01_single_cell_analysis.ipynb first')

adata = sc.read_h5ad(str(adata_path))
if 'counts' not in adata.layers:
    adata.layers['counts'] = adata.X.copy()

scvi.model.SCVI.setup_anndata(adata, layer='counts')
model = scvi.model.SCVI(adata, n_latent=20)
model.train(max_epochs=150)

adata.obsm['X_scvi'] = model.get_latent_representation()
sc.pp.neighbors(adata, use_rep='X_scvi')
sc.tl.umap(adata)
sc.tl.leiden(adata, key_added='leiden_scvi', resolution=0.8)

adata.write('data/processed/human_breast_cancer_scvi.h5ad')
model.save('results/machine_learning/scvi_model', overwrite=True)
pd.DataFrame(adata.obsm['X_scvi']).to_csv('results/machine_learning/scvi_latent_embeddings.csv', index=False)

cluster_stats = adata.obs['leiden_scvi'].value_counts().sort_index()
pd.DataFrame({'Cluster': cluster_stats.index, 'Cell_Count': cluster_stats.values}).to_csv('results/machine_learning/scvi_cluster_counts.csv', index=False)

sc.pl.umap(adata, color='leiden_scvi', save='figures/human_breast_cancer_scvi_umap.png', show=False)
print('Step 2B complete')