# Slurm Job Submission

- **Creator**: Sebastian Birk (<sebastian.birk@helmholtz-munich.de>).
- **Affiliation:** Helmholtz Munich, Institute of Computational Biology (ICB), Talavera-López Lab
- **Date of Creation:** 20.03.2023
- **Date of Last Modification:** 13.06.2023

## 1. Setup

### 1.1 Import Libraries

In [1]:
import os

### 1.2 Define Functions

In [2]:
def submit_python_script(
        job_name_prefix,
        job_id,
        job_folder_path,
        conda_env_name,
        script_folder_path,
        script_name,
        script_args,
        nice=10000):
    job_name = f"{job_name_prefix}_{job_id}"
    # Account for fact that submit node has different home path than compute node
    job_file_path = f"{job_folder_path.replace('/aih', '')}/job_{job_name}.cmd"
    out_file_path = f"{job_folder_path}/logs/out_{job_name}.txt"
    err_file_path = f"{job_folder_path}/logs/err_{job_name}.txt"
    
    os.makedirs(job_folder_path + "/logs", exist_ok=True)
    
    with open(job_file_path, "w") as handle:
        handle.writelines("#!/bin/bash\n")
        handle.writelines(f"#SBATCH -J {job_name}\n")
        handle.writelines(f"#SBATCH -o {out_file_path}\n")
        handle.writelines(f"#SBATCH -e {err_file_path}\n")
        handle.writelines("#SBATCH -t 48:00:00\n")
        handle.writelines("#SBATCH -p gpu_p\n")
        handle.writelines("#SBATCH -c 6\n")
        handle.writelines("#SBATCH --gres=gpu:1\n")
        handle.writelines("#SBATCH --qos=gpu\n")       
        handle.writelines("#SBATCH --mem=64GB\n")
        handle.writelines(f"#SBATCH --nice={nice}\n")
        handle.writelines("source $HOME/.bashrc\n")
        handle.writelines(f"conda activate {conda_env_name}\n")
        handle.writelines("cd /\n")
        handle.writelines(f"cd {script_folder_path}\n")
        handle.writelines(f"python ../{script_name}")
        handle.writelines(f"{script_args}")
        handle.writelines("\n")
        
    os.system(f"sbatch {job_file_path}")

## 2. NicheCompass Reference Model Training

### 2.1 seqFISH Mouse Organogenesis Imputed

In [32]:
job_name_prefix = "seqfish_mouse_organogenesis_imputed_nichecompass_one-hop-attention_reference_model"
job_id = 1
job_folder_path = "../scripts/reference/slurm_jobs"
script_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/reference"
conda_env_name = "nichecompass"
script_name = "train_nichecompass_reference_model.py"
script_args = " --dataset seqfish_mouse_organogenesis_imputed" \
              " --reference_batches batch1 batch2 batch3 batch4 batch5 batch6" \
              " --n_neighbors 8" \
              " --filter_genes" \
              " --n_hvg 4000" \
              " --nichenet_keep_target_genes_ratio 0.01" \
              " --nichenet_max_n_target_genes_per_gp 1000" \
              " --include_mebocost_gps" \
              " --mebocost_species mouse" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              " --model_label one-hop-attention_reference" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              " --cond_embed_injection gene_expr_decoder" \
              " --n_cond_embed 3" \
              " --log_variational" \
              " --node_label_method one-hop-attention" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cond_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              " --lambda_cond_contrastive 0." \
              " --contrastive_logits_ratio 0." \
              " --lambda_group_lasso 0." \
              " --lambda_l1_masked 5." \
              " --edge_batch_size 1024" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

Submitted batch job 12069840


In [9]:
job_name_prefix = "seqfish_mouse_organogenesis_imputed_nichecompass_one-hop-norm_reference_model"
job_id = 2
job_folder_path = "../scripts/reference/slurm_jobs"
script_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/reference"
conda_env_name = "nichecompass"
script_name = "train_nichecompass_reference_model.py"
script_args = " --dataset seqfish_mouse_organogenesis_imputed" \
              " --reference_batches batch1 batch2 batch3 batch4 batch5 batch6" \
              " --n_neighbors 8" \
              " --filter_genes" \
              " --n_hvg 4000" \
              " --nichenet_keep_target_genes_ratio 0.01" \
              " --nichenet_max_n_target_genes_per_gp 1000" \
              " --include_mebocost_gps" \
              " --mebocost_species mouse" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              " --model_label one-hop-norm_reference" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              " --cond_embed_injection gene_expr_decoder" \
              " --n_cond_embed 3" \
              " --log_variational" \
              " --node_label_method one-hop-norm" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cond_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              " --lambda_cond_contrastive 0." \
              " --contrastive_logits_ratio 0." \
              " --lambda_group_lasso 0." \
              " --lambda_l1_masked 5." \
              " --edge_batch_size 1024" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

Submitted batch job 12074555


### 2.2 STARmap PLUS Mouse Central Nervous System

In [None]:
job_name_prefix = "starmap_plus_mouse_cns_nichecompass_one-hop-attention_reference_model"
job_id = 1
job_folder_path = "../scripts/reference/slurm_jobs"
script_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/reference"
conda_env_name = "nichecompass"
script_name = "train_nichecompass_reference_model.py"
script_args = " --dataset starmap_plus_mouse_cns" \
              " --reference_batches batch1 batch2 batch3" \
              " --n_neighbors 4" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 0.01" \
              " --nichenet_max_n_target_genes_per_gp 1000" \
              " --include_mebocost_gps" \
              " --mebocost_species mouse" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              " --model_label one-hop-attention_reference" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.1" \
              " --gene_expr_recon_dist nb" \
              " --cond_embed_injection gene_expr_decoder" \
              " --n_cond_embed 3" \
              " --log_variational" \
              " --node_label_method one-hop-attention" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cond_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              " --lambda_cond_contrastive 0." \
              " --contrastive_logits_ratio 0." \
              " --lambda_group_lasso 0." \
              " --lambda_l1_masked 5." \
              " --edge_batch_size 1024" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [10]:
job_name_prefix = "starmap_plus_mouse_cns_nichecompass_one-hop-norm_reference_model"
job_id = 5
job_folder_path = "../scripts/reference/slurm_jobs"
script_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/reference"
conda_env_name = "nichecompass"
script_name = "train_nichecompass_reference_model.py"
script_args = " --dataset starmap_plus_mouse_cns" \
              " --reference_batches batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8" \
              " batch9 batch10 batch11 batch12 batch13 batch14 batch15 batch16 batch17 batch18" \
              " batch19 batch20" \
              " --n_neighbors 8" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 0.01" \
              " --nichenet_max_n_target_genes_per_gp 1000" \
              " --include_mebocost_gps" \
              " --mebocost_species mouse" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              " --model_label one-hop-norm_reference" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              " --cond_embed_injection gene_expr_decoder" \
              " --n_cond_embed 20" \
              " --log_variational" \
              " --node_label_method one-hop-norm" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cond_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              " --lambda_cond_contrastive 10000." \
              " --contrastive_logits_ratio 0.03125" \
              " --lambda_group_lasso 0." \
              " --lambda_l1_masked 5." \
              " --edge_batch_size 4096" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

Submitted batch job 12078658


In [5]:
0.015625 * 2

0.03125

### 2.3 nanoString CosMx SMI Human Non-Small-Cell Lung Cancer (NSCLC)

In [34]:
job_name_prefix = "nanostring_cosmx_human_nsclc_nichecompass_one-hop-attention_reference_model"
job_id = 1
job_folder_path = "../scripts/reference/slurm_jobs"
script_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/reference"
conda_env_name = "nichecompass"
script_name = "train_nichecompass_reference_model.py"
script_args = " --dataset nanostring_cosmx_human_nsclc" \
              " --reference_batches batch1 batch2 batch3" \
              " --n_neighbors 4" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 0.01" \
              " --nichenet_max_n_target_genes_per_gp 1000" \
              " --include_mebocost_gps" \
              " --mebocost_species human" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              " --model_label one-hop-attention_reference" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.1" \
              " --gene_expr_recon_dist nb" \
              " --cond_embed_injection gene_expr_decoder" \
              " --n_cond_embed 3" \
              " --log_variational" \
              " --node_label_method one-hop-attention" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cond_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              " --lambda_cond_contrastive 0." \
              " --contrastive_logits_ratio 0." \
              " --lambda_group_lasso 0." \
              " --lambda_l1_masked 5." \
              " --edge_batch_size 1024" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

Submitted batch job 12071419


In [13]:
job_name_prefix = "nanostring_cosmx_human_nsclc_nichecompass_one-hop-norm_reference_model"
job_id = 4
job_folder_path = "../scripts/reference/slurm_jobs"
script_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/reference"
conda_env_name = "nichecompass"
script_name = "train_nichecompass_reference_model.py"
script_args = " --dataset nanostring_cosmx_human_nsclc" \
              " --reference_batches batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8" \
              " --n_neighbors 12" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 0.01" \
              " --nichenet_max_n_target_genes_per_gp 1000" \
              " --include_mebocost_gps" \
              " --mebocost_species human" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              " --model_label one-hop-norm_reference" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              " --cond_embed_injection gene_expr_decoder" \
              " --n_cond_embed 8" \
              " --log_variational" \
              " --node_label_method one-hop-norm" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cond_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 5000000." \
              " --lambda_gene_expr_recon 3000." \
              " --lambda_cond_contrastive 0." \
              " --contrastive_logits_ratio 0." \
              " --lambda_group_lasso 0." \
              " --lambda_l1_masked 5." \
              " --edge_batch_size 1024" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

Submitted batch job 12076877


### 2.4 Vizgen MERFISH Human Ovarian Cancer Dataset

In [32]:
job_name_prefix = "vizgen_merfish_human_ovarian_cancer_nichecompass_one-hop-attention_reference_model"
job_id = 4
job_folder_path = "../scripts/reference/slurm_jobs"
script_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/reference"
conda_env_name = "nichecompass"
script_name = "train_nichecompass_reference_model.py"
script_args = " --dataset vizgen_merfish_human_ovarian_cancer" \
              " --reference_batches batch1 batch2 batch3 batch4" \
              " --n_neighbors 8" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 0.01" \
              " --nichenet_max_n_target_genes_per_gp 1000" \
              " --include_mebocost_gps" \
              " --mebocost_species human" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              " --model_label one-hop-attention_reference" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              " --cond_embed_injection gene_expr_decoder" \
              " --n_cond_embed 4" \
              " --log_variational" \
              " --node_label_method one-hop-attention" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cond_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              " --lambda_cond_contrastive 0." \
              " --contrastive_logits_ratio 0." \
              " --lambda_group_lasso 0." \
              " --lambda_l1_masked 5." \
              " --edge_batch_size 1024" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

Submitted batch job 12078279


In [6]:
job_name_prefix = "vizgen_merfish_human_ovarian_cancer_nichecompass_one-hop-norm_reference_model"
job_id = 2
job_folder_path = "../scripts/reference/slurm_jobs"
script_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/reference"
conda_env_name = "nichecompass"
script_name = "train_nichecompass_reference_model.py"
script_args = " --dataset vizgen_merfish_human_ovarian_cancer" \
              " --reference_batches batch1 batch2 batch3 batch4" \
              " --n_neighbors 8" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 0.01" \
              " --nichenet_max_n_target_genes_per_gp 1000" \
              " --include_mebocost_gps" \
              " --mebocost_species human" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              " --model_label one-hop-norm_reference" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              " --cond_embed_injection gene_expr_decoder" \
              " --n_cond_embed 4" \
              " --log_variational" \
              " --node_label_method one-hop-norm" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cond_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              " --lambda_cond_contrastive 0." \
              " --contrastive_logits_ratio 0." \
              " --lambda_group_lasso 0." \
              " --lambda_l1_masked 5." \
              " --edge_batch_size 2048" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

Submitted batch job 12074383


### 2.5 Spatial ATAC-RNA-Seq Mouse Embryo & Brain

In [None]:
job_name_prefix = "vizgen_merfish_human_ovarian_cancer_nichecompass_one-hop-norm_reference_model"
job_id = 1
job_folder_path = "../scripts/reference/slurm_jobs"
script_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/reference"
conda_env_name = "nichecompass"
script_name = "train_nichecompass_reference_model.py"
script_args = " --dataset spatial_atac_rna_seq_mouse_brain_batch2" \
              " --reference_batches None" \
              " --n_neighbors 8" \
              " --filter_genes" \
              " --n_hvg 3000" \
              " --nichenet_keep_target_genes_ratio 0.01" \
              " --nichenet_max_n_target_genes_per_gp 1000" \
              " --include_mebocost_gps" \
              " --mebocost_species mouse" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              " --include_atac_modality" \
              " --filter_peaks" \
              " --min_cell_peak_thresh_ratio 0.0005" \
              " --model_label one-hop-norm_reference" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              " --cond_embed_injection gene_expr_decoder chrom_access_decoder" \
              " --n_cond_embed None" \
              " --log_variational" \
              " --node_label_method one-hop-norm" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cond_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              " --lambda_chrom_access_recon 100." \
              " --lambda_cond_contrastive 0." \
              " --contrastive_logits_ratio 0." \
              " --lambda_group_lasso 0." \
              " --lambda_l1_masked 5." \
              " --edge_batch_size 4096" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

## 3. NicheCompass Query Mapping on Reference Model

In [None]:
job_name_prefix = "nichecompass_nanostring_cosmx_human_liver_query"
job_id = 1
job_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-repro-new/slurm_jobs"
conda_env_name = "nichecompass_hpc"
script_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-repro-new/scripts"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = " --dataset nanostring_cosmx_human_liver" \
              " --query_batches sample2" \
              " --reference_batch sample1" \
              " --load_timestamp 10032023_145839" \
              " --nichenet_max_n_target_genes_per_gp=20000" \
              " --n_epochs=40" \
              " --n_epochs_all_gps=0" \
              " --lambda_group_lasso=0." \
              " --lambda_l1_masked=0." \
              " --edge_batch_size=256" \
              " --node_batch_size=32"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

## 4. NicheCompass Individual Sample Benchmark Models Training

In [None]:
job_name_prefix = "nichecompass_vizgen_merfish_mouse_liver_benchmarking"
job_id = 1
job_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/slurm_jobs"
conda_env_name = "nichecompass_hpc"
script_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts"
script_name = "train_nichecompass_benchmark_models.py"
script_args = " --dataset vizgen_merfish_mouse_liver" \
              " --adata_new_name vizgen_merfish_mouse_liver_nichecompass" \
              " --n_neighbors_list 12 16 16 20 20" \
              " --edge_batch_size_list 256 128 128 128 128" \
              " --node_batch_size_list 32 16 16 16 16" \
              " --seeds 5 6 7 8 9" \
              " --run_index 5 6 7 8 9 10"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
job_name_prefix = "nichecompass_starmap_plus_mouse_cns_benchmarking"
job_id = 1
job_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/slurm_jobs"
conda_env_name = "nichecompass_hpc"
script_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts"
script_name = "train_nichecompass_benchmark_models.py"
script_args = " --dataset starmap_plus_mouse_cns" \
              " --adata_new_name starmap_plus_mouse_cns_nichecompass" \
              " --n_neighbors_list 20" \
              " --edge_batch_size_list 128" \
              " --node_batch_size_list 16" \
              " --seeds 9" \
              " --run_index 10"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

## 5. NicheCompass Sample Integration Benchmarking Models Training

In [None]:
job_name_prefix = "nichecompass_seqfish_mouse_organogenesis_sample_integration_method_benchmarking"
job_id = 1
job_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/slurm_jobs"
conda_env_name = "nichecompass"
script_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts"
script_name = "train_nichecompass_benchmarking_models.py"
script_args = " --adata_new_name None " \
              " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20 " \
              " --edge_batch_size_list 512 512 256 256 256 256 128 128 128 128 " \
              " --node_batch_size_list 64 64 32 32 32 32 16 16 16 16 " \
              " --seeds 0 1 2 3 4 5 6 7 8 9 " \
              " --run_index 1 2 3 4 5 6 7 8 9 10 " \
              " --cell_type_key celltype_mapped_refined " \
              " --nichenet_keep_target_genes_ratio 0.01 " \
              " --nichenet_max_n_target_genes_per_gp 25344 " \
              " --include_mebocost_gps " \
              " --mebocost_species mouse " \
              " --gp_filter_mode subset " \
              " --combine_overlap_gps " \
              " --overlap_thresh_source_genes 0.9 " \
              " --overlap_thresh_target_genes 0.9 " \
              " --overlap_thresh_genes 0.9 " \
              " --dataset seqfish_mouse_organogenesis " \
              " --reference_batches batch1 batch2 batch3 batch4 batch5 batch6 " \
              " --counts_key counts " \
              " --condition_key batch " \
              " --spatial_key spatial " \
              " --adj_key spatial_connectivities " \
              " --mapping_entity_key mapping_entity " \
              " --no-filter_genes " \
              " --gp_targets_mask_key nichecompass_gp_targets " \
              " --gp_sources_mask_key nichecompass_gp_sources " \
              " --gp_names_key nichecompass_gp_names " \
              " --model_label sample_integration_method_benchmarking " \
              " --active_gp_names_key nichecompass_active_gp_names " \
              " --latent_key nichecompass_latent " \
              " --active_gp_thresh_ratio 0.03 " \
              " --gene_expr_recon_dist nb " \
              " --cond_embed_injection gene_expr_decoder " \
              " --log_variational " \
              " --n_layers_encoder 1 " \
              " --conv_layer_encoder gcnconv " \
              " --n_epochs 40 " \
              " --n_epochs_all_gps 20 " \
              " --lr 0.001 " \
              " --lambda_edge_recon 10. " \
              " --lambda_gene_expr_recon 0.01 " \
              " --lambda_cond_contrastive 10. " \
              " --contrastive_logits_ratio 0.1 " \
              " --lambda_group_lasso 0. " \
              " --lambda_l1_masked 0. " \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
job_name_prefix = "nichecompass_starmap_plus_mouse_cns_benchmarking"
job_id = 1
job_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-repro-new/slurm_jobs"
conda_env_name = "nichecompass_hpc"
script_folder_path = "/home/aih/sebastian.birk/workspace/projects/nichecompass-repro-new/scripts"
script_name = "train_nichecompass_benchmark_models.py"
script_args = " --dataset starmap_plus_mouse_cns" \
              " --adata_new_name starmap_plus_mouse_cns_nichecompass" \
              " --n_neighbors_list 20" \
              " --edge_batch_size_list 128" \
              " --node_batch_size_list 16" \
              " --seeds 9" \
              " --run_index 10"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)