# Benchmark the runtime of Smoother models on different datasets

In [1]:
import time
import torch
import numpy as np
import pandas as pd
import scipy

from smoother import SpatialWeightMatrix, SpatialLoss
from smoother.models.impute import ImputeTorch, ImputeConvex
from smoother.models.deconv import NNLS, DWLS
from smoother.models.reduction import SpatialVAE
from smoother.simulation.utils import grouped_obs_mean

import scanpy as sc

%load_ext memory_profiler

In [2]:
def sp_loss_construction(coords, y_expr):
    weights = SpatialWeightMatrix()
    weights.calc_weights_knn(coords, k=6)
    weights.scale_by_expr(y_expr)

    # convert spatial weight into loss
    spatial_loss = SpatialLoss('icar', weights, rho=0.99, standardize_cov=False)

    return spatial_loss

def sp_loss_execution_ten_epoch(sp_loss):
    x = torch.randn(sp_loss.inv_cov.shape[1], 1)
    for _ in range(10):
        sp_loss(x)

In [3]:
from memory_profiler import memory_usage

## DLPFC

### Load data and preprocessing

In [4]:
data_dir = "/Users/jysumac/Projects/Smoother_paper/data/sodb_samples/visium_normal/" + \
    "10X_Visium_maynard2021trans_151673_data.h5ad"

data = sc.read_h5ad(data_dir)
data.layers['raw'] = data.X.copy()
coords = data.obsm['spatial']

sc.pp.normalize_total(data, target_sum=1e4)
sc.pp.log1p(data)
data.layers['log1p'] = data.X.copy()
sc.pp.highly_variable_genes(data, n_top_genes=2000, subset=True)

y = data.layers['log1p'][:, data.var['highly_variable']].toarray()
y = torch.tensor(y).T # n_genes x n_spots





In [5]:
data, y.shape

(AnnData object with n_obs × n_vars = 3639 × 2000
     obs: 'in_tissue', 'array_row', 'array_col', 'Region', 'leiden'
     var: 'gene_ids', 'feature_types', 'genome', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
     uns: 'hvg', 'leiden', 'leiden_colors', 'log1p', 'moranI', 'neighbors', 'pca', 'spatial', 'spatial_neighbors', 'umap'
     obsm: 'X_pca', 'X_umap', 'spatial'
     varm: 'PCs'
     layers: 'raw', 'log1p'
     obsp: 'connectivities', 'distances', 'spatial_connectivities', 'spatial_distances',
 torch.Size([2000, 3639]))

### Spatial loss

In [6]:
torch.manual_seed(100)
start_time = time.time()
sp_loss = sp_loss_construction(coords, y)
end_time = time.time()
print(f"Execution time: {end_time - start_time} seconds")
%memit sp_loss_construction(coords, y)

Number of spots: 3639. Average number of neighbors per spot:  5.85.
Number of spots: 3639. Average number of neighbors per spot:  5.86.
Execution time: 0.1545250415802002 seconds
Number of spots: 3639. Average number of neighbors per spot:  5.85.
Number of spots: 3639. Average number of neighbors per spot:  5.86.
peak memory: 1016.70 MiB, increment: 0.20 MiB


In [7]:
torch.manual_seed(100)
start_time = time.time()
sp_loss_execution_ten_epoch(sp_loss)
end_time = time.time()
print(f"Execution time: {(end_time - start_time)/10} seconds")
%memit sp_loss_execution_ten_epoch(sp_loss)

Execution time: 0.0011655092239379883 seconds
peak memory: 1016.88 MiB, increment: 0.00 MiB


### Imputation

In [8]:
sp_loss.standardize_cov = True
y_obs = y.clone().T[:,:1]
start_time = time.time()
m = ImputeConvex(y_obs, sp_loss, fixed_obs = False, nonneg=False, lambda_spatial_loss = 1)
y_imp = m.get_results()
end_time = time.time()
print(f"Execution time: {end_time - start_time} seconds")

%memit ImputeConvex(y_obs, sp_loss, fixed_obs = False, nonneg=False, lambda_spatial_loss = 1)

=== Time  0.06s. Loss: (total)  0.006, (recon)  0.005, (spatial)  0.001
Execution time: 0.0745999813079834 seconds
=== Time  0.04s. Loss: (total)  0.006, (recon)  0.005, (spatial)  0.001
peak memory: 1038.72 MiB, increment: 11.25 MiB


### Deconvolution

#### Prepare inputs

In [9]:
# select marker genes
sc.tl.rank_genes_groups(data, 'Region', method='wilcoxon')
markers_df = sc.get.rank_genes_groups_df(
    data, group = None, pval_cutoff = 0.01, log2fc_min = 1)
markers_nm50 = markers_df.sort_values(
    ['group', 'logfoldchanges'], ascending=False
).groupby('group').head(50)
gene_selected = markers_nm50['names'].unique()



In [10]:
# reference expression per region
ref_exp = grouped_obs_mean(data, group_key='Region', layer = 'raw')
ref_exp = np.log1p(ref_exp.loc[gene_selected, :])
ref_exp = torch.tensor(ref_exp.to_numpy()).float()

gene_idx = [np.where(data.var_names == g)[0][0] for g in gene_selected]

# spatial counts for deconv
y = torch.tensor(data.layers['log1p'][:, gene_idx].toarray().T).float()

print(ref_exp.shape, y.shape)

torch.Size([83, 7]) torch.Size([83, 3639])




#### Run deconvolution

In [11]:
# deconvolution with NNLS
def _deconv():
    model = NNLS()
    model.deconv(
        ref_exp, y, spatial_loss=sp_loss, lambda_spatial_loss=1, 
        lr = 0.1, max_epochs = -1, verbose = False
    )

%memit _deconv()

=== Time  0.47s. Total epoch 150. Final loss: (total) 0.320. (spatial) 0.008.
peak memory: 1193.88 MiB, increment: 6.28 MiB


In [12]:
# deconvolution with DWLS
def _deconv():
    model = DWLS()
    model.deconv(
        ref_exp, y, spatial_loss=sp_loss, lambda_spatial_loss=1, 
        lr = 0.1, max_epochs = -1, verbose = False
    )

%memit _deconv()

=== Time  0.61s. Total epoch 147. Final loss: (total) 1.565. (spatial) 0.018.
peak memory: 1195.59 MiB, increment: 1.72 MiB


### Dimension reduction

In [13]:
SpatialVAE.setup_anndata(data, layer="raw")
m = SpatialVAE(st_adata=data, spatial_loss=sp_loss)
m.train(max_epochs = 400, lr = 0.01, accelerator='cpu')

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Epoch 400/400: 100%|█| 400/400 [01:55<00:00,  3.42it/s, v_num=1, train_loss_step

`Trainer.fit` stopped: `max_epochs=400` reached.


Epoch 400/400: 100%|█| 400/400 [01:55<00:00,  3.47it/s, v_num=1, train_loss_step


## CRC Stereo-seq

### Load data and preprocessing

In [14]:
data_dir = "/Users/jysumac/Projects/Smoother_paper/data/crc_stereo/" + \
    "P19_T.h5ad"

data = sc.read_h5ad(data_dir)
data.layers['raw'] = data.X.copy()
coords = data.obs[['x', 'y']]

sc.pp.normalize_total(data, target_sum=1e4)
sc.pp.log1p(data)
data.layers['log1p'] = data.X.copy()
sc.pp.highly_variable_genes(data, n_top_genes=2000, subset=True)

y = data.layers['log1p'][:, data.var['highly_variable']].toarray()
y = torch.tensor(y).T # n_genes x n_spots



In [15]:
data, y.shape

(AnnData object with n_obs × n_vars = 18125 × 2000
     obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'bayes_clusters', 'x', 'y'
     var: 'features', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
     uns: 'log1p', 'hvg'
     layers: 'raw', 'log1p',
 torch.Size([2000, 18125]))

### Spatial loss

In [16]:
torch.manual_seed(100)
start_time = time.time()
sp_loss = sp_loss_construction(coords, y)
end_time = time.time()
print(f"Execution time: {end_time - start_time} seconds")
%memit sp_loss_construction(coords, y)

Number of spots: 18125. Average number of neighbors per spot:  4.94.
Number of spots: 18125. Average number of neighbors per spot:  4.94.
Execution time: 0.8040728569030762 seconds
Number of spots: 18125. Average number of neighbors per spot:  4.94.
Number of spots: 18125. Average number of neighbors per spot:  4.94.
peak memory: 2838.34 MiB, increment: 278.12 MiB


In [17]:
torch.manual_seed(100)
start_time = time.time()
sp_loss_execution_ten_epoch(sp_loss)
end_time = time.time()
print(f"Execution time: {(end_time - start_time)/10} seconds")
%memit sp_loss_execution_ten_epoch(sp_loss)

Execution time: 0.0012638092041015625 seconds
peak memory: 2577.91 MiB, increment: 0.00 MiB


### Imputation

In [18]:
sp_loss.standardize_cov = True
y_obs = y.clone().T[:,:1]
start_time = time.time()
m = ImputeConvex(y_obs, sp_loss, fixed_obs = False, nonneg=False, lambda_spatial_loss = 1)
y_imp = m.get_results()
end_time = time.time()
print(f"Execution time: {end_time - start_time} seconds")

%memit ImputeConvex(y_obs, sp_loss, fixed_obs = False, nonneg=False, lambda_spatial_loss = 1)

=== Time  0.18s. Loss: (total)  0.196, (recon)  0.123, (spatial)  0.073
Execution time: 0.1871318817138672 seconds
=== Time  0.15s. Loss: (total)  0.196, (recon)  0.123, (spatial)  0.073
peak memory: 2920.62 MiB, increment: 57.16 MiB


### Deconvolution

#### Prepare inputs

In [19]:
# select marker genes
data.obs['cluster'] = pd.Categorical(data.obs['bayes_clusters'])
sc.tl.rank_genes_groups(
    data, 'cluster', method='wilcoxon', use_raw = False, layer = 'log1p'
)
markers_df = sc.get.rank_genes_groups_df(
    data, group = None, pval_cutoff = 0.01, log2fc_min = 1
)
gene_selected = markers_df['names'].unique()



In [20]:
# reference expression per region
ref_exp = grouped_obs_mean(data, group_key='cluster', layer = 'raw')
ref_exp = np.log1p(ref_exp.loc[gene_selected, :])
ref_exp = torch.tensor(ref_exp.to_numpy()).float()

gene_idx = [np.where(data.var_names == g)[0][0] for g in gene_selected]

# spatial counts for deconv
y = torch.tensor(data.layers['log1p'][:, gene_idx].toarray().T).float()

print(ref_exp.shape, y.shape)

torch.Size([90, 12]) torch.Size([90, 18125])




#### Run deconvolution

In [21]:
# deconvolution with NNLS
def _deconv():
    model = NNLS()
    model.deconv(
        ref_exp, y, spatial_loss=sp_loss, lambda_spatial_loss=1, 
        lr = 0.1, max_epochs = -1, verbose = False
    )

%memit _deconv()

=== Time  8.64s. Total epoch 586. Final loss: (total) 0.485. (spatial) 0.007.
peak memory: 2392.78 MiB, increment: 37.70 MiB


In [22]:
# deconvolution with DWLS
def _deconv():
    model = DWLS()
    model.deconv(
        ref_exp, y, spatial_loss=sp_loss, lambda_spatial_loss=1, 
        lr = 0.1, max_epochs = -1, verbose = False
    )

%memit _deconv()

=== Time  12.59s. Total epoch 638. Final loss: (total) 2.052. (spatial) 0.015.
peak memory: 2399.69 MiB, increment: 6.91 MiB


### Dimension reduction

In [23]:
SpatialVAE.setup_anndata(data, layer="raw")
m = SpatialVAE(st_adata=data, spatial_loss=sp_loss)
m.train(max_epochs = 400, lr = 0.01, accelerator='cpu')

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Epoch 400/400: 100%|█| 400/400 [10:32<00:00,  1.61s/it, v_num=1, train_loss_step

`Trainer.fit` stopped: `max_epochs=400` reached.


Epoch 400/400: 100%|█| 400/400 [10:32<00:00,  1.58s/it, v_num=1, train_loss_step


## Prostate Slide-seqV2

### Load data and preprocessing

In [24]:
data_dir = "/Users/jysumac/Projects/Smoother_paper/data/prostate_ref_mapping/" + \
    "ST_HP1.h5ad"

data = sc.read_h5ad(data_dir)
data.layers['raw'] = data.X.copy()
coords = data.obs[['xcoord', 'ycoord']]

sc.pp.normalize_total(data, target_sum=1e4)
sc.pp.log1p(data)
data.layers['log1p'] = data.X.copy()
sc.pp.highly_variable_genes(data, n_top_genes=2000, subset=True)

y = data.layers['log1p'][:, data.var['highly_variable']].toarray()
y = torch.tensor(y).T # n_genes x n_spots



In [25]:
data, y.shape

(AnnData object with n_obs × n_vars = 29278 × 2000
     obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'cell1', 'cell2', 'xcoord', 'ycoord', 'batch'
     var: 'features', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
     uns: 'log1p', 'hvg'
     layers: 'raw', 'log1p',
 torch.Size([2000, 29278]))

In [26]:
data.obs['cell1'].unique()

array(['Epithelial', 'Fibroblasts', 'Pericytes', 'Endothelial',
       'Macrophage', 'mDC', 'plasma cells', 'Tcell', 'Mast cells', 'PDC',
       'B cells', 'Monocytes'], dtype=object)

### Spatial loss

In [27]:
torch.manual_seed(100)
start_time = time.time()
sp_loss = sp_loss_construction(coords, y)
end_time = time.time()
print(f"Execution time: {end_time - start_time} seconds")
%memit sp_loss_construction(coords, y)

Number of spots: 29278. Average number of neighbors per spot:  5.13.
Number of spots: 29278. Average number of neighbors per spot:  5.14.
Execution time: 1.3070640563964844 seconds
Number of spots: 29278. Average number of neighbors per spot:  5.13.
Number of spots: 29278. Average number of neighbors per spot:  5.14.
peak memory: 3902.94 MiB, increment: 784.94 MiB


In [28]:
torch.manual_seed(100)
start_time = time.time()
sp_loss_execution_ten_epoch(sp_loss)
end_time = time.time()
print(f"Execution time: {(end_time - start_time)/10} seconds")
%memit sp_loss_execution_ten_epoch(sp_loss)

Execution time: 0.004995584487915039 seconds
peak memory: 3136.48 MiB, increment: 0.00 MiB


### Imputation

In [29]:
sp_loss.standardize_cov = True
y_obs = y.clone().T[:,:1]
start_time = time.time()
m = ImputeConvex(y_obs, sp_loss, fixed_obs = False, nonneg=False, lambda_spatial_loss = 1)
y_imp = m.get_results()
end_time = time.time()
print(f"Execution time: {end_time - start_time} seconds")

%memit ImputeConvex(y_obs, sp_loss, fixed_obs = False, nonneg=False, lambda_spatial_loss = 1)

=== Time  0.24s. Loss: (total)  0.907, (recon)  0.537, (spatial)  0.369
Execution time: 0.25188302993774414 seconds
=== Time  0.22s. Loss: (total)  0.907, (recon)  0.537, (spatial)  0.369
peak memory: 3478.31 MiB, increment: 92.14 MiB


### Deconvolution

#### Prepare inputs

In [30]:
# select marker genes
data.obs['cluster'] = pd.Categorical(data.obs['cell1'])
sc.tl.rank_genes_groups(
    data, 'cluster', method='wilcoxon', use_raw = False, layer = 'log1p'
)
markers_df = sc.get.rank_genes_groups_df(
    data, group = None, pval_cutoff = 0.01, log2fc_min = 1
)
gene_selected = markers_df['names'].unique()



In [31]:
# reference expression per region
ref_exp = grouped_obs_mean(data, group_key='cluster', layer = 'raw')
ref_exp = np.log1p(ref_exp.loc[gene_selected, :])
ref_exp = torch.tensor(ref_exp.to_numpy()).float()

gene_idx = [np.where(data.var_names == g)[0][0] for g in gene_selected]

# spatial counts for deconv
y = torch.tensor(data.layers['log1p'][:, gene_idx].toarray().T).float()

print(ref_exp.shape, y.shape)

torch.Size([145, 12]) torch.Size([145, 29278])




#### Run deconvolution

In [32]:
# deconvolution with NNLS
def _deconv():
    model = NNLS()
    model.deconv(
        ref_exp, y, spatial_loss=sp_loss, lambda_spatial_loss=1, 
        lr = 0.1, max_epochs = -1, verbose = False
    )

%memit _deconv()

=== Time  11.12s. Total epoch 348. Final loss: (total) 0.683. (spatial) 0.015.
peak memory: 3080.80 MiB, increment: 6.70 MiB


In [33]:
# deconvolution with DWLS
def _deconv():
    model = DWLS()
    model.deconv(
        ref_exp, y, spatial_loss=sp_loss, lambda_spatial_loss=1, 
        lr = 0.1, max_epochs = -1, verbose = False
    )

%memit _deconv()

=== Time  15.50s. Total epoch 315. Final loss: (total) 1.823. (spatial) 0.022.
peak memory: 3035.33 MiB, increment: 16.20 MiB


### Dimension reduction

In [34]:
SpatialVAE.setup_anndata(data, layer="raw")
m = SpatialVAE(st_adata=data, spatial_loss=sp_loss)
m.train(max_epochs = 400, lr = 0.01, accelerator='cpu')

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Epoch 400/400: 100%|█| 400/400 [15:33<00:00,  2.33s/it, v_num=1, train_loss_step

`Trainer.fit` stopped: `max_epochs=400` reached.


Epoch 400/400: 100%|█| 400/400 [15:33<00:00,  2.33s/it, v_num=1, train_loss_step
