# Benchmark for all data merged using [scIB](https://github.com/theislab/scib/tree/main)

In [1]:
# Load packages
import numpy as np
import pandas as pd
import scanpy as sc
import scib
import warnings
from benchmarks.benchmark import benchmark as benchmark

warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=PendingDeprecationWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

  from tensorflow.tsl.python.lib.core import pywrap_ml_dtypes


In [2]:
data_path = '../../data/processed/merged/merged_all.h5ad'
benchmark_env = benchmark(data_path=data_path, batch_key="patientID", HVG=True, HVGs=4000, Scaled=False, seed=42)

### Unintegrated

In [1]:
benchmark_env.unintegrated(save_figure=True)

### PCA

In [2]:
benchmark_env.pca(save_figure=True)

### [Scanorama](https://github.com/brianhie/scanorama)

In [3]:
benchmark_env.scanorama(save_figure=True)

### [Harmony](https://github.com/lilab-bcb/harmony-pytorch)

In [4]:
benchmark_env.harmony(save_figure=True)

### [scVI](https://github.com/scverse/scvi-tools)

In [5]:
vae = benchmark_env.scvi(save_figure=True)

### [scANVI](https://github.com/scverse/scvi-tools)

In [6]:
benchmark_env.scanvi(vae=vae, save_figure=True)

### [scGen](https://github.com/theislab/scgen)

In [7]:
benchmark_env.scgen(save_figure=True)

### [SAUCIE](https://github.com/KrishnaswamyLab/SAUCIE)

In [9]:
# Note: Pause for now. It requires a old version of tensorflow
# benchmark_env.saucie()

### [ComBat](https://scanpy.readthedocs.io/en/stable/generated/scanpy.pp.combat.html)

In [8]:
benchmark_env.combat(save_figure=True)

### [DESC](https://github.com/eleozzr/desc)

In [9]:
benchmark_env.desc(save_figure=True)

### [trVAE](https://github.com/theislab/trvaep)

In [12]:
# Takes to much memory to run
# benchmark_env.trvae()

### [FastMNN](https://github.com/chriscainx/mnnpy)

In [10]:
# Doesn't work great for some reason
benchmark_env.fastmnn(save_figure=True)

### [BBKNN](https://github.com/Teichlab/bbknn)

In [4]:
# Don't know how to calculate the metrics for this one since it doesn't make a latent space like all other methods
#benchmark_env.bbknn()

### [TOSICA](https://github.com/JackieHanLab/TOSICA/tree/main)

In [15]:
# Takes to much memory to run
# benchmark_env.tosica()

### In-house model

#### Model using just a Encoder on HVGs

In [11]:
benchmark_env.in_house_model_encoder(save_path='trained_models/All_merged/Encoder/', train=True, save_figure=True)

#### Transformer model using pathway mask on HVGs

In [12]:
benchmark_env.in_house_model_pathways(save_path='trained_models/All_merged/Pathways/', train=True, save_figure=True)

#### Model using Encoder directly on HVGs + Transformer on pathway mask applied to HVGs

In [13]:
benchmark_env.in_house_model_encoder_pathways(save_path='trained_models/All_merged/Encoder_with_Pathways/', train=True, save_figure=True)

#### Model implementing a transformer mechanism on HVGs

In [14]:
benchmark_env.in_house_model_transformer_encoder(save_path='trained_models/All_merged/Transformer_Encoder/', train=True, save_figure=True)

#### Model implementing a transformer mechanism on HVGs + transformer mechanism on pathway masks applied to HVGs

In [4]:
benchmark_env.in_house_model_transformer_encoder_pathways(save_path='trained_models/All_merged/Transformer_Encoder_with_Pathways/', train=True, save_figure=True)

#### Model implementing a transformer mechanism on tokenized HVGs + Gene2vec representations

In [None]:
benchmark_env.in_house_model_tokenized_HVG_transformer(save_path='trained_models/All_merged/Tokenized_HVG_Transformer/', train=True, save_figure=True)

#### Model implementing a transformer mechanism on tokenized HVGs + Gene2vec representations + transformer mechanism on pathways/gene sets

In [None]:
benchmark_env.in_house_model_tokenized_HVG_transformer_with_pathways(save_path='trained_models/All_merged/Tokenized_HVG_Transformer_with_Pathways/', train=True, save_figure=True)

### Make dataframe and visualize

In [4]:
# Load csv dataframe if we want to merge old runs with the new one (saved in benchmark_env.metrics)
#New_loss_Benchmark_results, Benchmark_results
benchmark_env.read_csv(name="benchmarks/results/All_merged/Benchmark_results")

In [5]:
benchmark_env.make_benchamrk_results_dataframe(min_max_normalize=True)

In [17]:
benchmark_env.visualize_results() 

In [17]:
benchmark_env.save_results_as_csv(name="benchmarks/results/All_merged/Benchmark_results")