In [1]:
import os
import sys

import numpy as np
import matplotlib.pyplot as plt

import scanpy as sc
import scvi
import torch

from pytorch_lightning.loggers import WandbLogger
import wandb
from dotenv import load_dotenv

import session_info
import warnings
from pyprojroot.here import here

warnings.filterwarnings("ignore")
sys.path.insert(1, str(here('bin')))

torch.set_float32_matmul_precision('high')

#plt.style.use(['science','nature','no-latex'])
dpi_fig_save = 300
sc.set_figure_params(dpi=100, dpi_save=dpi_fig_save, vector_friendly=True)

load_dotenv(here('.env'))

True

In [2]:
class CustomWandbLogger(WandbLogger):
    @property
    def save_dir(self):
        return self.experiment.dir

In [3]:
scvi.settings.seed = 0
print("Last run with scvi-tools version:", scvi.__version__)

Seed set to 0


Last run with scvi-tools version: 1.1.2


**Setting parameters** 

In [4]:
overwriteFigures = True
overwriteData = True

In [5]:
cellGroup = 'NK'
workDir = os.getcwd()

In [6]:
annotationLevel = os.path.basename(os.path.normpath(workDir))

In [7]:
workDir

'/home/jupyter/Inflammation-PBMCs-Atlas/02_cell_annotation/02_fromCellLineages_to_CellTypes/Step4'

**Load data**

In [8]:
adata = sc.read_h5ad(here(f"{workDir}/{cellGroup}/results/01_{cellGroup}_normalized_HVGsubset.h5ad"))
adata

AnnData object with n_obs × n_vars = 376885 × 2437
    obs: 'studyID', 'libraryID', 'sampleID', 'chemistry', 'technology', 'patientID', 'disease', 'timepoint_replicate', 'treatmentStatus', 'therapyResponse', 'sex', 'age', 'BMI', 'binned_age', 'diseaseStatus', 'smokingStatus', 'ethnicity', 'institute', 'diseaseGroup', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_20_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'total_counts_plt', 'log1p_total_counts_plt', 'pct_counts_plt', 'doublet_score', 'predicted_doublet', 'S_score', 'G2M_score', 'phase'
    var: 'hgnc_id', 'symbol', 'locus_group', 'HUGO_status', 'mt', 'ribo', 'hb', 'plt', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'highly_variable'
    uns: 'chemist

## scVI integration

**scVI parameters**

In [9]:
# https://docs.scvi-tools.org/en/stable/api/reference/scvi.train.Trainer.html#scvi.train.Trainer
setup_kwargs = dict(
    layer="counts", 
    batch_key='libraryID', 
    categorical_covariate_keys = ['studyID', 'chemistry', 'disease', 'sampleID'],
    labels_key = 'chemistry'    
)

scvi_kwargs = dict(n_hidden=512,
                   n_latent=10, # reduced to 20 after step 00 (main) and to 10 after step 03
                   n_layers=2,
                   gene_likelihood='nb',
                   dispersion='gene-label')

trainer_kwargs = dict(
    checkpointing_monitor = 'elbo_validation',
    early_stopping_monitor = 'reconstruction_loss_validation',
    early_stopping_patience = 10,
    early_stopping_min_delta=0.1,
    early_stopping = True,
    max_epochs = 1000,

    #logger = # wandb
)
# https://docs.scvi-tools.org/en/stable/api/reference/scvi.train.TrainingPlan.html#scvi.train.TrainingPlan
plan_kwargs = dict(
    lr = 5e-4,
    #reduce_lr_on_plateau = True
)

# https://docs.scvi-tools.org/en/stable/api/reference/scvi.module.VAE.html#scvi.module.VAE
#vae = dict(
#    use_layer_norm='both',
#    use_batch_norm='none',
#    encode_covariates=True,
#    deeply_inject_covariates=False
#)
parameter_dict = setup_kwargs | scvi_kwargs | trainer_kwargs | plan_kwargs

**wandb parameter**

In [10]:
run_name = f"{annotationLevel}_{cellGroup}"
run_name

'Step4_NK'

In [11]:
logger = CustomWandbLogger(name = run_name, project='inflammation_atlas_R1', config=parameter_dict)

**Running integration**

In [12]:
scvi.model.SCVI.setup_anndata(adata, 
                              **setup_kwargs)

In [13]:
model = scvi.model.SCVI(adata, **scvi_kwargs)

In [14]:
model.train(logger=logger, plan_kwargs = plan_kwargs, **trainer_kwargs)

GPU available: True (cuda), used: True


TPU available: False, using: 0 TPU cores


IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs


[34m[1mwandb[0m: Currently logged in as: [33mdav1989[0m ([33minflammation[0m). Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: wandb version 0.16.6 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[34m[1mwandb[0m: Tracking run with wandb version 0.16.3


[34m[1mwandb[0m: Run data is saved locally in [35m[1m./wandb/run-20240506_165506-uohc3i4n[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.


[34m[1mwandb[0m: Syncing run [33mStep4_NK[0m


[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/inflammation/inflammation_atlas_R1[0m


[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/inflammation/inflammation_atlas_R1/runs/uohc3i4n[0m


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Training:   0%|          | 0/1000 [00:00<?, ?it/s]

Epoch 1/1000:   0%|          | 0/1000 [00:00<?, ?it/s]

Epoch 1/1000:   0%|          | 1/1000 [00:40<11:21:13, 40.91s/it]

Epoch 1/1000:   0%|          | 1/1000 [00:40<11:21:13, 40.91s/it, v_num=3i4n, train_loss_step=728, train_loss_epoch=766]

Epoch 2/1000:   0%|          | 1/1000 [00:40<11:21:13, 40.91s/it, v_num=3i4n, train_loss_step=728, train_loss_epoch=766]

Epoch 2/1000:   0%|          | 2/1000 [01:20<11:09:18, 40.24s/it, v_num=3i4n, train_loss_step=728, train_loss_epoch=766]

Epoch 2/1000:   0%|          | 2/1000 [01:20<11:09:18, 40.24s/it, v_num=3i4n, train_loss_step=749, train_loss_epoch=750]

Epoch 3/1000:   0%|          | 2/1000 [01:20<11:09:18, 40.24s/it, v_num=3i4n, train_loss_step=749, train_loss_epoch=750]

Epoch 3/1000:   0%|          | 3/1000 [02:00<11:04:00, 39.96s/it, v_num=3i4n, train_loss_step=749, train_loss_epoch=750]

Epoch 3/1000:   0%|          | 3/1000 [02:00<11:04:00, 39.96s/it, v_num=3i4n, train_loss_step=757, train_loss_epoch=745]

Epoch 4/1000:   0%|          | 3/1000 [02:00<11:04:00, 39.96s/it, v_num=3i4n, train_loss_step=757, train_loss_epoch=745]

Epoch 4/1000:   0%|          | 4/1000 [02:38<10:52:46, 39.32s/it, v_num=3i4n, train_loss_step=757, train_loss_epoch=745]

Epoch 4/1000:   0%|          | 4/1000 [02:38<10:52:46, 39.32s/it, v_num=3i4n, train_loss_step=742, train_loss_epoch=743]

Epoch 5/1000:   0%|          | 4/1000 [02:38<10:52:46, 39.32s/it, v_num=3i4n, train_loss_step=742, train_loss_epoch=743]

Epoch 5/1000:   0%|          | 5/1000 [03:17<10:51:47, 39.30s/it, v_num=3i4n, train_loss_step=742, train_loss_epoch=743]

Epoch 5/1000:   0%|          | 5/1000 [03:17<10:51:47, 39.30s/it, v_num=3i4n, train_loss_step=719, train_loss_epoch=742]

Epoch 6/1000:   0%|          | 5/1000 [03:17<10:51:47, 39.30s/it, v_num=3i4n, train_loss_step=719, train_loss_epoch=742]

Epoch 6/1000:   1%|          | 6/1000 [03:56<10:49:34, 39.21s/it, v_num=3i4n, train_loss_step=719, train_loss_epoch=742]

Epoch 6/1000:   1%|          | 6/1000 [03:56<10:49:34, 39.21s/it, v_num=3i4n, train_loss_step=738, train_loss_epoch=741]

Epoch 7/1000:   1%|          | 6/1000 [03:56<10:49:34, 39.21s/it, v_num=3i4n, train_loss_step=738, train_loss_epoch=741]

Epoch 7/1000:   1%|          | 7/1000 [04:35<10:43:47, 38.90s/it, v_num=3i4n, train_loss_step=738, train_loss_epoch=741]

Epoch 7/1000:   1%|          | 7/1000 [04:35<10:43:47, 38.90s/it, v_num=3i4n, train_loss_step=711, train_loss_epoch=740]

Epoch 8/1000:   1%|          | 7/1000 [04:35<10:43:47, 38.90s/it, v_num=3i4n, train_loss_step=711, train_loss_epoch=740]

Epoch 8/1000:   1%|          | 8/1000 [05:14<10:45:40, 39.05s/it, v_num=3i4n, train_loss_step=711, train_loss_epoch=740]

Epoch 8/1000:   1%|          | 8/1000 [05:14<10:45:40, 39.05s/it, v_num=3i4n, train_loss_step=759, train_loss_epoch=740]

Epoch 9/1000:   1%|          | 8/1000 [05:14<10:45:40, 39.05s/it, v_num=3i4n, train_loss_step=759, train_loss_epoch=740]

Epoch 9/1000:   1%|          | 9/1000 [05:53<10:44:09, 39.00s/it, v_num=3i4n, train_loss_step=759, train_loss_epoch=740]

Epoch 9/1000:   1%|          | 9/1000 [05:53<10:44:09, 39.00s/it, v_num=3i4n, train_loss_step=743, train_loss_epoch=739]

Epoch 10/1000:   1%|          | 9/1000 [05:53<10:44:09, 39.00s/it, v_num=3i4n, train_loss_step=743, train_loss_epoch=739]

Epoch 10/1000:   1%|          | 10/1000 [06:31<10:40:12, 38.80s/it, v_num=3i4n, train_loss_step=743, train_loss_epoch=739]

Epoch 10/1000:   1%|          | 10/1000 [06:31<10:40:12, 38.80s/it, v_num=3i4n, train_loss_step=746, train_loss_epoch=739]

Epoch 11/1000:   1%|          | 10/1000 [06:31<10:40:12, 38.80s/it, v_num=3i4n, train_loss_step=746, train_loss_epoch=739]

Epoch 11/1000:   1%|          | 11/1000 [07:10<10:40:28, 38.86s/it, v_num=3i4n, train_loss_step=746, train_loss_epoch=739]

Epoch 11/1000:   1%|          | 11/1000 [07:10<10:40:28, 38.86s/it, v_num=3i4n, train_loss_step=699, train_loss_epoch=739]

Epoch 12/1000:   1%|          | 11/1000 [07:10<10:40:28, 38.86s/it, v_num=3i4n, train_loss_step=699, train_loss_epoch=739]

Epoch 12/1000:   1%|          | 12/1000 [07:49<10:40:48, 38.91s/it, v_num=3i4n, train_loss_step=699, train_loss_epoch=739]

Epoch 12/1000:   1%|          | 12/1000 [07:49<10:40:48, 38.91s/it, v_num=3i4n, train_loss_step=735, train_loss_epoch=739]

Epoch 13/1000:   1%|          | 12/1000 [07:49<10:40:48, 38.91s/it, v_num=3i4n, train_loss_step=735, train_loss_epoch=739]

Epoch 13/1000:   1%|▏         | 13/1000 [08:28<10:37:42, 38.77s/it, v_num=3i4n, train_loss_step=735, train_loss_epoch=739]

Epoch 13/1000:   1%|▏         | 13/1000 [08:28<10:37:42, 38.77s/it, v_num=3i4n, train_loss_step=710, train_loss_epoch=738]

Epoch 14/1000:   1%|▏         | 13/1000 [08:28<10:37:42, 38.77s/it, v_num=3i4n, train_loss_step=710, train_loss_epoch=738]

Epoch 14/1000:   1%|▏         | 14/1000 [09:07<10:39:15, 38.90s/it, v_num=3i4n, train_loss_step=710, train_loss_epoch=738]

Epoch 14/1000:   1%|▏         | 14/1000 [09:07<10:39:15, 38.90s/it, v_num=3i4n, train_loss_step=718, train_loss_epoch=738]

Epoch 15/1000:   1%|▏         | 14/1000 [09:07<10:39:15, 38.90s/it, v_num=3i4n, train_loss_step=718, train_loss_epoch=738]

Epoch 15/1000:   2%|▏         | 15/1000 [09:46<10:39:04, 38.93s/it, v_num=3i4n, train_loss_step=718, train_loss_epoch=738]

Epoch 15/1000:   2%|▏         | 15/1000 [09:46<10:39:04, 38.93s/it, v_num=3i4n, train_loss_step=762, train_loss_epoch=738]

Epoch 16/1000:   2%|▏         | 15/1000 [09:46<10:39:04, 38.93s/it, v_num=3i4n, train_loss_step=762, train_loss_epoch=738]

Epoch 16/1000:   2%|▏         | 16/1000 [10:24<10:36:01, 38.78s/it, v_num=3i4n, train_loss_step=762, train_loss_epoch=738]

Epoch 16/1000:   2%|▏         | 16/1000 [10:24<10:36:01, 38.78s/it, v_num=3i4n, train_loss_step=764, train_loss_epoch=738]

Epoch 17/1000:   2%|▏         | 16/1000 [10:24<10:36:01, 38.78s/it, v_num=3i4n, train_loss_step=764, train_loss_epoch=738]

Epoch 17/1000:   2%|▏         | 17/1000 [11:04<10:36:49, 38.87s/it, v_num=3i4n, train_loss_step=764, train_loss_epoch=738]

Epoch 17/1000:   2%|▏         | 17/1000 [11:04<10:36:49, 38.87s/it, v_num=3i4n, train_loss_step=746, train_loss_epoch=738]

Epoch 18/1000:   2%|▏         | 17/1000 [11:04<10:36:49, 38.87s/it, v_num=3i4n, train_loss_step=746, train_loss_epoch=738]

Epoch 18/1000:   2%|▏         | 18/1000 [11:42<10:36:30, 38.89s/it, v_num=3i4n, train_loss_step=746, train_loss_epoch=738]

Epoch 18/1000:   2%|▏         | 18/1000 [11:42<10:36:30, 38.89s/it, v_num=3i4n, train_loss_step=710, train_loss_epoch=738]

Epoch 19/1000:   2%|▏         | 18/1000 [11:42<10:36:30, 38.89s/it, v_num=3i4n, train_loss_step=710, train_loss_epoch=738]

Epoch 19/1000:   2%|▏         | 19/1000 [12:21<10:35:46, 38.88s/it, v_num=3i4n, train_loss_step=710, train_loss_epoch=738]

Epoch 19/1000:   2%|▏         | 19/1000 [12:21<10:35:46, 38.88s/it, v_num=3i4n, train_loss_step=744, train_loss_epoch=738]

Epoch 20/1000:   2%|▏         | 19/1000 [12:21<10:35:46, 38.88s/it, v_num=3i4n, train_loss_step=744, train_loss_epoch=738]

Epoch 20/1000:   2%|▏         | 20/1000 [13:01<10:37:58, 39.06s/it, v_num=3i4n, train_loss_step=744, train_loss_epoch=738]

Epoch 20/1000:   2%|▏         | 20/1000 [13:01<10:37:58, 39.06s/it, v_num=3i4n, train_loss_step=704, train_loss_epoch=738]

Epoch 21/1000:   2%|▏         | 20/1000 [13:01<10:37:58, 39.06s/it, v_num=3i4n, train_loss_step=704, train_loss_epoch=738]

Epoch 21/1000:   2%|▏         | 21/1000 [13:39<10:32:47, 38.78s/it, v_num=3i4n, train_loss_step=704, train_loss_epoch=738]

Epoch 21/1000:   2%|▏         | 21/1000 [13:39<10:32:47, 38.78s/it, v_num=3i4n, train_loss_step=721, train_loss_epoch=738]

Epoch 22/1000:   2%|▏         | 21/1000 [13:39<10:32:47, 38.78s/it, v_num=3i4n, train_loss_step=721, train_loss_epoch=738]

Epoch 22/1000:   2%|▏         | 22/1000 [14:18<10:34:39, 38.94s/it, v_num=3i4n, train_loss_step=721, train_loss_epoch=738]

Epoch 22/1000:   2%|▏         | 22/1000 [14:18<10:34:39, 38.94s/it, v_num=3i4n, train_loss_step=744, train_loss_epoch=738]

Epoch 23/1000:   2%|▏         | 22/1000 [14:18<10:34:39, 38.94s/it, v_num=3i4n, train_loss_step=744, train_loss_epoch=738]

Epoch 23/1000:   2%|▏         | 23/1000 [14:57<10:35:39, 39.04s/it, v_num=3i4n, train_loss_step=744, train_loss_epoch=738]

Epoch 23/1000:   2%|▏         | 23/1000 [14:57<10:35:39, 39.04s/it, v_num=3i4n, train_loss_step=763, train_loss_epoch=737]

Epoch 24/1000:   2%|▏         | 23/1000 [14:57<10:35:39, 39.04s/it, v_num=3i4n, train_loss_step=763, train_loss_epoch=737]

Epoch 24/1000:   2%|▏         | 24/1000 [15:36<10:30:47, 38.78s/it, v_num=3i4n, train_loss_step=763, train_loss_epoch=737]

Epoch 24/1000:   2%|▏         | 24/1000 [15:36<10:30:47, 38.78s/it, v_num=3i4n, train_loss_step=718, train_loss_epoch=737]

Epoch 25/1000:   2%|▏         | 24/1000 [15:36<10:30:47, 38.78s/it, v_num=3i4n, train_loss_step=718, train_loss_epoch=737]

Epoch 25/1000:   2%|▎         | 25/1000 [16:15<10:32:31, 38.92s/it, v_num=3i4n, train_loss_step=718, train_loss_epoch=737]

Epoch 25/1000:   2%|▎         | 25/1000 [16:15<10:32:31, 38.92s/it, v_num=3i4n, train_loss_step=766, train_loss_epoch=737]

Epoch 26/1000:   2%|▎         | 25/1000 [16:15<10:32:31, 38.92s/it, v_num=3i4n, train_loss_step=766, train_loss_epoch=737]

Epoch 26/1000:   3%|▎         | 26/1000 [16:54<10:33:19, 39.01s/it, v_num=3i4n, train_loss_step=766, train_loss_epoch=737]

Epoch 26/1000:   3%|▎         | 26/1000 [16:54<10:33:19, 39.01s/it, v_num=3i4n, train_loss_step=720, train_loss_epoch=737]

Epoch 26/1000:   3%|▎         | 26/1000 [16:54<10:33:30, 39.03s/it, v_num=3i4n, train_loss_step=720, train_loss_epoch=737]


Monitored metric reconstruction_loss_validation did not improve in the last 10 records. Best score: 739.499. Signaling Trainer to stop.


In [15]:
wandb.finish()

[34m[1mwandb[0m: - 0.003 MB of 0.003 MB uploaded

[34m[1mwandb[0m: \ 0.003 MB of 0.009 MB uploaded

[34m[1mwandb[0m: | 0.003 MB of 0.009 MB uploaded

[34m[1mwandb[0m: / 0.009 MB of 0.009 MB uploaded

[34m[1mwandb[0m:                                                                                


[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:                     elbo_train █▆▅▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:                elbo_validation █▆▅▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:                          epoch ▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
[34m[1mwandb[0m:                kl_global_train ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:           kl_global_validation ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:                 kl_local_train ██▇▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁
[34m[1mwandb[0m:            kl_local_validation █▇▆▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
[34m[1mwandb[0m:                      kl_weight ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
[34m[1mwandb[0m:      reconstruction_loss_train █▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m: reconstruction_loss_validation █▅▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:               train_loss_epoch █▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:                train_loss_step █▇▆▆▃█▅▁▄

[34m[1mwandb[0m: 🚀 View run [33mStep4_NK[0m at: [34m[4mhttps://wandb.ai/inflammation/inflammation_atlas_R1/runs/uohc3i4n[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)


[34m[1mwandb[0m: Find logs at: [35m[1m./wandb/run-20240506_165506-uohc3i4n/logs[0m


### Extracting embedding space

In [16]:
adata.obsm['X_scVI'] = model.get_latent_representation()

## Save the results

In [17]:
if overwriteData:
    adata.write(here(f"{workDir}/{cellGroup}/results/02_{annotationLevel}_{cellGroup}_HVGsubset_scVI.h5ad"), compression="gzip")
    model.save(here(f"{workDir}/{cellGroup}/results/02_{annotationLevel}_{cellGroup}_HVGsubset_scVI_MODEL/"),
               overwrite = True, 
               save_anndata = False )   

In [18]:
# To load the model do (after loading corresponding adata):
# model = scvi.model.SCVI.load(here('{}/results/01_{}_HVGsubset_scVI_MODEL/'.format(workDir, cellGroup)), 
#                              adata, 
#                              use_gpu=True)