# Slurm Job Submission

- **Creator**: Sebastian Birk (<sebastian.birk@helmholtz-munich.de>).
- **Affiliation:** Helmholtz Munich, Institute of Computational Biology (ICB), Talavera-López Lab
- **Date of Creation:** 20.03.2023
- **Date of Last Modification:** 05.09.2023

## 1. Setup

### 1.1 Import Libraries

In [1]:
import numpy as np
import os

### 1.2 Define Parameters

In [2]:
conda_env_name = "nichecompass"
reproducibility_conda_env_name = "nichecompass-reproducibility"

### 1.3 Define Functions

In [3]:
def submit_python_script(
        job_name_prefix,
        job_id,
        job_folder_path,
        conda_env_name,
        script_folder_path,
        script_name,
        script_args,
        t="24:00:00",
        p="gpu_p",
        gres="gpu:1",
        qos="gpu_normal",
        nice=10000):
    
    job_name = f"{job_id}_{job_name_prefix}"
    job_file_path = f"{script_folder_path}/slurm_jobs/job_{job_name}.cmd"
    out_file_path = f"logs/out_{job_name}.txt"
    err_file_path = f"logs/err_{job_name}.txt"
    
    os.makedirs(job_folder_path + "/logs", exist_ok=True)
    
    with open(job_file_path, "w") as handle:
        handle.writelines("#!/bin/bash\n")
        handle.writelines(f"#SBATCH -J {job_name}\n")
        handle.writelines(f"#SBATCH -o {out_file_path}\n")
        handle.writelines(f"#SBATCH -e {err_file_path}\n")
        handle.writelines(f"#SBATCH -t {t}\n")
        handle.writelines(f"#SBATCH -p {p}\n")
        handle.writelines("#SBATCH -c 6\n")
        #handle.writelines("#SBATCH --exclude=supergpu02,supergpu03\n")
        if "gpu" in p:
            handle.writelines(f"#SBATCH --gres={gres}\n")
            handle.writelines(f"#SBATCH --qos={qos}\n")       
        handle.writelines("#SBATCH --mem=128G\n")
        handle.writelines(f"#SBATCH --nice={nice}\n")
        handle.writelines("source $HOME/.bashrc\n")
        handle.writelines(f"conda activate {conda_env_name}\n")
        handle.writelines(f"cd {script_folder_path}\n")
        handle.writelines(f"python ../{script_name}")
        handle.writelines(f"{script_args}")
        handle.writelines("\n")
        
    os.system(f"sbatch {job_file_path}")

## 5. NicheCompass Reference Model

### 5.3 nanoString CosMx SMI Human Non-Small-Cell Lung Cancer (NSCLC)

In [None]:
job_id = 1
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0. # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 0.
lambda_l1_masked = 0. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01

In [None]:
job_id = 3
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0. # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 0.
lambda_l1_masked = 100. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01

In [None]:
job_id = 5
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0.0078125 # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 1000000.
lambda_l1_masked = 0. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01

In [None]:
job_id = 7
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0.0078125 # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 1000000.
lambda_l1_masked = 100. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01

In [None]:
job_id = 9
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0.015625 # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 1000000.
lambda_l1_masked = 100. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01

In [None]:
job_id = 11
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0.015625 # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 500000.
lambda_l1_masked = 100. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01

In [None]:
job_id = 14
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0.03125 # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 500000.
lambda_l1_masked = 100. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01

In [None]:
job_id = 16
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0.015625 # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 100000.
lambda_l1_masked = 100. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01

In [None]:
job_id = 18
cat_covariates_embeds_nums = "3 30 15" # "3 30 15"
contrastive_logits_pos_ratio = 0.015625 # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 1000000.
lambda_l1_masked = 100. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01

In [None]:
job_id = 20
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0.015625 # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 1000000.
lambda_l1_masked = 100. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.1 # 0.01

In [4]:
# no contrastive, highger covar embedding node
job_id = 21
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8" 
cat_covariates_embeds_nums = "3 30 15" # "3 30 15"
contrastive_logits_pos_ratio = 0. # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 0.
lambda_l1_masked = 0. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01
lambda_edge_recon = 5000000.
lambda_gene_expr_recon = 3000.

In [6]:
# no contrastive, highger covar embedding node
job_id = 22
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8" 
cat_covariates_embeds_nums = "3 30 30" # "3 30 15"
contrastive_logits_pos_ratio = 0. # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 0.
lambda_l1_masked = 0. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01
lambda_edge_recon = 5000000.
lambda_gene_expr_recon = 3000.

In [8]:
# no contrastive, decrease kl
job_id = 23
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8" 
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0. # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 0.
lambda_l1_masked = 0. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01
# decrease weight of KL divergence in the loss
lambda_edge_recon = 25000000.
lambda_gene_expr_recon = 15000.

In [10]:
# no contrastive, decrease kl
job_id = 24
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8" 
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0. # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 0.
lambda_l1_masked = 100. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01
# decrease weight of KL divergence in the loss
lambda_edge_recon = 25000000.
lambda_gene_expr_recon = 15000.

In [12]:
# no contrastive, decrease kl
job_id = 25
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8" 
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0. # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 0.
lambda_l1_masked = 0. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01
# decrease weight of KL divergence in the loss
lambda_edge_recon = 50000000.
lambda_gene_expr_recon = 30000.

In [14]:
# no contrastive, decrease kl
job_id = 26
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8" 
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0. # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 0.
lambda_l1_masked = 100. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01
# decrease weight of KL divergence in the loss
lambda_edge_recon = 50000000.
lambda_gene_expr_recon = 30000.

In [16]:
# no contrastive, decrease kl, -5r3 - 6
job_id = 27
reference_batches = "batch1 batch2 batch5 batch6 batch7 batch8" 
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0. # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 0.
lambda_l1_masked = 0. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01
# decrease weight of KL divergence in the loss
lambda_edge_recon = 50000000.
lambda_gene_expr_recon = 30000.

In [18]:
# no contrastive, -5r3 - 13, decrease kl
job_id = 28
reference_batches = "batch1 batch2 batch4 batch5 batch6 batch7" 
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0. # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 0.
lambda_l1_masked = 0. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01
# decrease weight of KL divergence in the loss
lambda_edge_recon = 50000000.
lambda_gene_expr_recon = 30000.

In [None]:
# no contrastive, -5r3 - 6
job_id = 29
reference_batches = "batch1 batch2 batch5 batch6 batch7 batch8" 
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0. # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 0.
lambda_l1_masked = 0. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01
# decrease weight of KL divergence in the loss
lambda_edge_recon = 50000000.
lambda_gene_expr_recon = 30000.

In [None]:
# no contrastive, -5r3 - 13
job_id = 30
reference_batches = "batch1 batch2 batch4 batch5 batch6 batch7" 
cat_covariates_embeds_nums = "3 30 5" # "3 30 15"
contrastive_logits_pos_ratio = 0. # 0 0.0078125 (4) 0.015625 (8) 0.03125 (16)
lambda_cat_covariates_contrastive = 0.
lambda_l1_masked = 0. # 0 100
lambda_l1_addon = 1000. # 0 100 1000
active_gp_thresh_ratio = 0.03 # 0.01
# decrease weight of KL divergence in the loss
lambda_edge_recon = 5000000.
lambda_gene_expr_recon = 3000.

In [23]:
dataset = "nanostring_cosmx_human_nsclc"
task = "reference"
job_name_prefix = f"{dataset}_nichecompass_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/lustre/groups/imm01/workspace/irene.bonafonte/Projects/2023May_nichecompass/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"


# Hyperparameters
# reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8" 
cat_covariates_keys = "batch fov patient"
species = "human"
node_label_method = "one-hop-norm"
n_neighbors = 4
n_sampled_neighbors = 4
edge_batch_size = 512
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_no_edges = "True False True"
contrastive_logits_neg_ratio = 0.

script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --n_addon_gp 100" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder 960" \
              " --conv_layer_encoder gatv2conv" \
              " --n_epochs 400" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size 256" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

Submitted batch job 13795722


## 6. NicheCompass Reference Query Mapping

### 6.1 nanoString CosMx Human NSCLC

In [None]:
task = "reference_query"
dataset = "nanostring_cosmx_human_nsclc"
job_id = 11
query_batches = "batch8"
n_neighbors = 4
n_sampled_neighbors = 4
load_timestamp = "05092023_101353_3"  #
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.
edge_batch_size = 512

job_name_prefix = f"{dataset}_nichecompass_{task}_query_mapping"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --query_batches {query_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --spatial_key spatial" \
              " --mapping_entity_key mapping_entity" \
              " --gp_names_key nichecompass_gp_names" \
              f" --reference_model_label reference" \
              f" --load_timestamp {load_timestamp}" \
              f" --query_model_label query" \
              f" --reference_query_model_label reference_query_mapping" \
              " --n_epochs 400" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args)