In [1]:
%load_ext autoreload
%autoreload 2

## data loading

In [2]:
import concord as ccd
import scanpy as sc
import torch
import warnings
from pathlib import Path
warnings.filterwarnings('ignore')
import time
from pathlib import Path
device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
seed = 0

In [None]:
adata = sc.read_h5ad(Path('../data/CBCEcombineN2/') / 'adata_celsub_Jun26-1610.h5ad')

In [5]:
proj_name = "cel_packerN2"
file_name = "cel_packerN2"
file_suffix = time.strftime('%b%d-%H%M')
seed = 0

save_dir = Path(f"../save/{proj_name}")
save_dir.mkdir(parents=True, exist_ok=True)

data_dir = Path(f"../data/{proj_name}")
data_dir.mkdir(parents=True, exist_ok=True)


In [None]:
sc.pp.highly_variable_genes(adata, flavor='seurat_v3', n_top_genes=10000, subset=False)
sc.tl.pca(adata, n_comps=300, svd_solver='arpack', use_highly_variable=True)
adata = adata[:, adata.var.highly_variable].copy()

In [None]:
adata.write_h5ad(data_dir / f"{file_name}_preprocessed.h5ad")
print(f"✅ Preprocessed data saved to {data_dir / f'{file_name}_preprocessed.h5ad'}")

✅ Preprocessed data saved to ../data/cel_packerN2/cel_packerN2_preprocessed.h5ad


### Create jobs

In [None]:
concord_args = {
        'batch_size':64, # Batch size for training, adjust as needed
        'encoder_dims':[1000], # Encoder dimensions, recommended to be larger than latent_dim
        'augmentation_mask_prob': 0.3, # Probability of masking features, recommended to be between 0.2 and 0.5
        'clr_temperature': 0.3, # Temperature for contrastive loss, recommended to be between 0.1 and 0.5
        'sampler_knn': 1000, # Size of neighbohood for intra-neighborhood sampling
        'n_epochs': 15, # Number of epochs for training, adjust as needed
        'verbose': True, # Verbosity level, set to True for more detailed output
        'seed': seed, # random seed for reproducibility
        'save_dir': '../'+str(save_dir) # Directory to save the model and results
    }

In [None]:
import subprocess, json
methods = ["scvi", "harmony", "scanorama", "liger", "unintegrated", "concord_hcl", "concord_knn", "contrastive"]
output_dir = '../jobs'
device = 'auto'
conda_env = 'cellpath'
batch_key = 'batch'
state_key = 'None'
latent_dim = 300  # Adjust as needed, but should match the encoder_dims in concord_args
subprocess.run([
    "python", "./generate_py_sh_jobs.py",
    "--proj_name", proj_name,
    "--adata_filename", f"{file_name}_preprocessed.h5ad",
    "--methods", *methods,
    "--batch_key", batch_key,
    "--state_key", state_key,
    "--latent_dim", latent_dim,
    "--output_dir", output_dir,
    "--device", device,
    "--conda_env", conda_env,
    "--runtime", "02:00:00",
    "--concord_kwargs", json.dumps(concord_args)
])


✅ Generated: benchmark_cel_packerN2/benchmark_cel_packerN2_scvi.py
✅ Generated: benchmark_cel_packerN2/benchmark_cel_packerN2_scvi.sh

✅ Generated: benchmark_cel_packerN2/benchmark_cel_packerN2_harmony.py
✅ Generated: benchmark_cel_packerN2/benchmark_cel_packerN2_harmony.sh

✅ Generated: benchmark_cel_packerN2/benchmark_cel_packerN2_scanorama.py
✅ Generated: benchmark_cel_packerN2/benchmark_cel_packerN2_scanorama.sh

✅ Generated: benchmark_cel_packerN2/benchmark_cel_packerN2_liger.py
✅ Generated: benchmark_cel_packerN2/benchmark_cel_packerN2_liger.sh

✅ Generated: benchmark_cel_packerN2/benchmark_cel_packerN2_unintegrated.py
✅ Generated: benchmark_cel_packerN2/benchmark_cel_packerN2_unintegrated.sh

✅ Generated: benchmark_cel_packerN2/benchmark_cel_packerN2_concord_hcl.py
✅ Generated: benchmark_cel_packerN2/benchmark_cel_packerN2_concord_hcl.sh

✅ Generated: benchmark_cel_packerN2/benchmark_cel_packerN2_concord_knn.py
✅ Generated: benchmark_cel_packerN2/benchmark_cel_packerN2_concord_k

CompletedProcess(args=['python', './generate_py_sh_jobs.py', '--proj_name', 'cel_packerN2', '--adata_filename', 'cel_packerN2_preprocessed.h5ad', '--methods', 'scvi', 'harmony', 'scanorama', 'liger', 'unintegrated', 'concord_hcl', 'concord_knn', 'contrastive', '--batch_key', 'batch', '--state_key', 'None', '--latent_dim', '300', '--output_dir', '../jobs', '--device', 'auto', '--conda_env', 'cellpath', '--runtime', '02:00:00', '--concord_kwargs', '{"batch_size": 64, "latent_dim": 300, "encoder_dims": [1000], "augmentation_mask_prob": 0.3, "clr_temperature": 0.3, "sampler_knn": 1000, "n_epochs": 15, "verbose": true, "seed": 0, "save_dir": "../../save/cel_packerN2"}'], returncode=0)