In [None]:
!CUDA_VISIBLE_DEVICE=0

# Before running the above, ensure you have following commands, as described in detail in the README found in Jean_Instructions folder

## You have trained the VAE

```jsx
echo "Training Autoencoder, this might take a long time"
CUDA_VISIBLE_DEVICES=0 python /path/to/VAE_train.py --data_dir '/path/where/you/saved/tabula_muris/all.h5ad' --num_genes 18996 --state_dict '/path/where/you/saved/scimilarity/pretrained/wrights/annotation_model_v1' --save_dir '/dir/where/to/save/the/trained/VAE/model/' --max_steps 200000 --max_minutes 600
echo "Training Autoencoder done"
```

## You have trained the diffusion model

```jsx
echo "Training diffusion backbone"
CUDA_VISIBLE_DEVICES=0 python path/to/cell_train.py --data_dir '/path/where/you/saved/tabula_muris/all.h5ad' --vae_path '/path/where/you/saved/VAE/model.pt'   \
    --save_dir '/dir/where/to/save/the/trained/diffusion/model/' --model_name 'name_you_want_to_give' --lr_anneal_steps 80000
echo "Training diffusion backbone done"
```

## You have generated the latent space

```jsx
python path/to/cell_sample.py --model_path "/path/where/you/saved/diffusion/model.pt" --sample_dir "/file/where/to/save/the/generated/latent/space/example.npz"
```

## Then, you can run the following

In [None]:
### CHANGE ACCORDING TO YOUR FILE SYSTEM ###
path_to_anndata = '/path/where/you/saved/tabula_muris/all.h5ad'
path_to_saved_VAE_model = '/path/where/you/saved/VAE/model.pt'
path_to_unconditional_sample = '/file/where/you/saved/the/latent/space/example.npz'

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import anndata as ad
import scanpy as sc
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
from scipy import stats
import torch
import sys
sys.path.append('..') ### CHANGE TO THE PATH WHERE THE FOLDER VAE IS LOCATED
from VAE.VAE_model import VAE 

In [None]:
def load_VAE():
    autoencoder = VAE(
        num_genes=18996,
        device='cpu',
        seed=0,
        loss_ae='mse',
        hidden_dim=128,
        decoder_activation='ReLU',
    )
    autoencoder.load_state_dict(torch.load(path_to_saved_VAE_model, map_location=torch.device('cpu')))
    return autoencoder

In [None]:
# LOAD REAL DATA
adata = sc.read_h5ad(path_to_anndata)
adata = adata[np.where(adata.obs['celltype'].values.isnull()==0)[0]][::5]
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
gene_names = adata.var_names
celltype = adata.obs['celltype']
real_data = adata.X.toarray()
real_data.shape

In [None]:
# LOAD GENERATED DATA
npzfile=np.load(path_to_unconditional_sample,allow_pickle=True)
cell_gen_all = npzfile['cell_gen'][::]
autoencoder = load_VAE()
gen_data = autoencoder(torch.tensor(cell_gen_all),return_decoded=True).cpu().detach().numpy()
gen_data.shape

In [None]:
# CONCATENATE, FILTER AND COMPUTE UMAP
adata = np.concatenate((real_data, gen_data),axis=0)
adata = ad.AnnData(adata, dtype=np.float32)
adata.obs['cell_name'] = [f"real" for i in range(real_data.shape[0])]+[f"generated" for i in range(gen_data.shape[0])]
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata.raw = adata
adata = adata[:, adata.var.highly_variable]
sc.pp.scale(adata)
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=20)
sc.tl.umap(adata)

In [None]:
# PLOT THE UMAP
sc.pl.umap(adata=adata,color="cell_name",size=6,title='Tabula muris',palette={"real":"tab:blue", "generated":"tab:orange"})