# Slurm Job Submission

- **Creator**: Sebastian Birk (<sebastian.birk@helmholtz-munich.de>)
- **Date of Creation:** 20.03.2023
- **Date of Last Modification:** 09.10.2024

## 1. Setup

### 1.1 Import Libraries

In [None]:
import numpy as np
import os

### 1.2 Define Parameters

In [None]:
conda_env_name = "nichecompass-reproducibility"

### 1.3 Define Functions

In [None]:
def submit_python_script(
        job_name_prefix,
        job_id,
        job_folder_path,
        conda_env_name,
        script_folder_path,
        script_name,
        script_args,
        t="48:00:00",
        p="gpu_p",
        gres="gpu:1",
        qos="gpu_normal",
        mem="156G",
        nice=10000):
    job_name = f"{job_name_prefix}_{job_id}"
    # account for fact that submit node has different home path than compute node
    job_file_path = f"{job_folder_path.replace('/aih', '')}/job_{job_name}.cmd"
    out_file_path = f"{job_folder_path}/logs/out_{job_name}.txt"
    err_file_path = f"{job_folder_path}/logs/err_{job_name}.txt"
    
    os.makedirs(job_folder_path + "/logs", exist_ok=True)
    
    with open(job_file_path, "w") as handle:
        handle.writelines("#!/bin/bash\n")
        handle.writelines(f"#SBATCH -J {job_name}\n")
        handle.writelines(f"#SBATCH -o {out_file_path}\n")
        handle.writelines(f"#SBATCH -e {err_file_path}\n")
        handle.writelines(f"#SBATCH -t {t}\n")
        handle.writelines(f"#SBATCH -p {p}\n")
        handle.writelines("#SBATCH -c 6\n")
        #handle.writelines("#SBATCH --exclude=supergpu02,supergpu03\n")
        if "gpu" in p:
            handle.writelines(f"#SBATCH --gres={gres}\n")
        handle.writelines(f"#SBATCH --qos={qos}\n")       
        handle.writelines(f"#SBATCH --mem={mem}\n")
        handle.writelines(f"#SBATCH --nice={nice}\n")
        handle.writelines("source $HOME/.bashrc\n")
        handle.writelines(f"conda activate {conda_env_name}\n")
        handle.writelines("cd /\n")
        handle.writelines(f"cd {script_folder_path}\n")
        handle.writelines(f"python ../{script_name}")
        handle.writelines(f"{script_args}")
        handle.writelines("\n")
        
    os.system(f"sbatch {job_file_path}")

## 2. NicheCompass Ablation

### 2.1 Loss Ablation

In [None]:
task = "ablation"
ablation_task = "loss"
dataset = "starmap_plus_mouse_cns"
reference_batches = "batch1"
species = "mouse"

job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
job_folder_path = f"./{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk_new/workspace/projects/nichecompass-reproducibility/analysis/{task}"

n_neighbors = 6
active_gp_thresh_ratio = 0.01
node_label_method = "one-hop-norm"
conv_layer_encoder = "gatv2conv"
n_epochs = 400

lambda_l1_masked = 0.
lambda_l1_addon = 0.
l1_targets_categories = "target_gene"
l1_sources_categories = None
edge_batch_size = 256
n_addon_gp = 100

job_id = 1
for lambda_edge_recon, lambda_gene_expr_recon in zip([500000., 0., 500000., 50000., 500000.], [300., 300., 0., 300., 30]):
    for seed in range(8):
        script_name = "./data_analysis/train_nichecompass_reference_model.py"
        script_args = f" --dataset {dataset}" \
                      f" --reference_batches {reference_batches}" \
                      f" --n_neighbors {n_neighbors}" \
                      " --no-filter_genes" \
                      " --nichenet_keep_target_genes_ratio 1." \
                      " --nichenet_max_n_target_genes_per_gp 250" \
                      " --include_mebocost_gps" \
                      f" --species {species}" \
                      " --gp_filter_mode subset" \
                      " --combine_overlap_gps" \
                      " --overlap_thresh_source_genes 0.9" \
                      " --overlap_thresh_target_genes 0.9" \
                      " --overlap_thresh_genes 0.9" \
                      " --counts_key counts" \
                      " --spatial_key spatial" \
                      " --adj_key spatial_connectivities" \
                      " --mapping_entity_key mapping_entity" \
                      " --gp_targets_mask_key nichecompass_gp_targets" \
                      " --gp_sources_mask_key nichecompass_gp_sources" \
                      " --gp_names_key nichecompass_gp_names" \
                      f" --model_label {ablation_task}_{task}" \
                      " --active_gp_names_key nichecompass_active_gp_names" \
                      " --latent_key nichecompass_latent" \
                      f" --n_addon_gp {n_addon_gp}" \
                      f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                      " --gene_expr_recon_dist nb" \
                      " --log_variational" \
                      f" --node_label_method {node_label_method}" \
                      " --n_layers_encoder 1" \
                      " --n_hidden_encoder None" \
                      f" --conv_layer_encoder {conv_layer_encoder}" \
                      f" --n_epochs {n_epochs}" \
                      " --n_epochs_all_gps 25" \
                      " --lr 0.001" \
                      f" --lambda_edge_recon {lambda_edge_recon}" \
                      f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                      " --lambda_group_lasso 0." \
                      f" --lambda_l1_masked {lambda_l1_masked}" \
                      f" --lambda_l1_addon {lambda_l1_addon}" \
                      f" --l1_targets_categories {l1_targets_categories}" \
                      f" --l1_sources_categories {l1_sources_categories}" \
                      f" --edge_batch_size {edge_batch_size}" \
                      " --node_batch_size None" \
                      f" --n_sampled_neighbors 4" \
                      f" --seed {seed}" \
                      f" --use_new_gp_mask" \
                      f" --timestamp_suffix _{job_id}"

        submit_python_script(
                job_name_prefix=job_name_prefix,
                job_id=job_id,
                job_folder_path=job_folder_path,
                conda_env_name=conda_env_name,
                script_folder_path=script_folder_path,
                script_name=script_name,
                script_args=script_args,
                nice=10000)

        job_id += 1

### 2.2 Loss Ablation Extended

In [None]:
task = "ablation"
ablation_task = "loss_extended"
dataset = "starmap_plus_mouse_cns"
reference_batches = "batch1"
species = "mouse"

job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
job_folder_path = f"./{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk_new/workspace/projects/nichecompass-reproducibility/analysis/{task}"

n_neighbors = 6
active_gp_thresh_ratio = 0.01
node_label_method = "one-hop-norm"
conv_layer_encoder = "gatv2conv"
n_epochs = 400

lambda_edge_recon = 500000.
lambda_gene_expr_recon = 300.
l1_targets_categories = "target_gene"
l1_sources_categories = None
edge_batch_size = 256
n_addon_gp = 100

job_id = 1
for lambda_l1_masked, lambda_l1_addon in zip([0., 0., 0., 0., 3., 30., 300.], [0., 3., 30., 300., 3., 30., 300.]):
    for seed in range(8):
        script_name = "./data_analysis/train_nichecompass_reference_model.py"
        script_args = f" --dataset {dataset}" \
                      f" --reference_batches {reference_batches}" \
                      f" --n_neighbors {n_neighbors}" \
                      " --no-filter_genes" \
                      " --nichenet_keep_target_genes_ratio 1." \
                      " --nichenet_max_n_target_genes_per_gp 250" \
                      " --include_mebocost_gps" \
                      f" --species {species}" \
                      " --gp_filter_mode subset" \
                      " --combine_overlap_gps" \
                      " --overlap_thresh_source_genes 0.9" \
                      " --overlap_thresh_target_genes 0.9" \
                      " --overlap_thresh_genes 0.9" \
                      " --counts_key counts" \
                      " --spatial_key spatial" \
                      " --adj_key spatial_connectivities" \
                      " --mapping_entity_key mapping_entity" \
                      " --gp_targets_mask_key nichecompass_gp_targets" \
                      " --gp_sources_mask_key nichecompass_gp_sources" \
                      " --gp_names_key nichecompass_gp_names" \
                      f" --model_label {ablation_task}_{task}" \
                      " --active_gp_names_key nichecompass_active_gp_names" \
                      " --latent_key nichecompass_latent" \
                      f" --n_addon_gp {n_addon_gp}" \
                      f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                      " --gene_expr_recon_dist nb" \
                      " --log_variational" \
                      f" --node_label_method {node_label_method}" \
                      " --n_layers_encoder 1" \
                      " --n_hidden_encoder None" \
                      f" --conv_layer_encoder {conv_layer_encoder}" \
                      f" --n_epochs {n_epochs}" \
                      " --n_epochs_all_gps 25" \
                      " --lr 0.001" \
                      f" --lambda_edge_recon {lambda_edge_recon}" \
                      f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                      " --lambda_group_lasso 0." \
                      f" --lambda_l1_masked {lambda_l1_masked}" \
                      f" --lambda_l1_addon {lambda_l1_addon}" \
                      f" --l1_targets_categories {l1_targets_categories}" \
                      f" --l1_sources_categories {l1_sources_categories}" \
                      f" --edge_batch_size {edge_batch_size}" \
                      " --node_batch_size None" \
                      f" --n_sampled_neighbors 4" \
                      f" --seed {seed}" \
                      f" --use_new_gp_mask" \
                      f" --timestamp_suffix _{job_id}"

        submit_python_script(
                job_name_prefix=job_name_prefix,
                job_id=job_id,
                job_folder_path=job_folder_path,
                conda_env_name=conda_env_name,
                script_folder_path=script_folder_path,
                script_name=script_name,
                script_args=script_args,
                nice=10000)

        job_id += 1

### 2.3 Encoder Ablation

In [None]:
task = "ablation"
ablation_task = "encoder"
dataset = "starmap_plus_mouse_cns"
reference_batches = "batch1"
species = "mouse"

job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
job_folder_path = f"./{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk_new/workspace/projects/nichecompass-reproducibility/analysis/{task}"

n_neighbors = 6
active_gp_thresh_ratio = 0.01
node_label_method = "one-hop-norm"
n_epochs = 400

lambda_edge_recon = 500000.
lambda_gene_expr_recon = 300.
lambda_l1_masked = 0.
lambda_l1_addon = 0.
l1_targets_categories = "target_gene"
l1_sources_categories = None
edge_batch_size = 256
n_addon_gp = 100

job_id = 1
for conv_layer_encoder in ["gatv2conv", "gcnconv"]:
    for seed in range(8):
        script_name = "./data_analysis/train_nichecompass_reference_model.py"
        script_args = f" --dataset {dataset}" \
                      f" --reference_batches {reference_batches}" \
                      f" --n_neighbors {n_neighbors}" \
                      " --no-filter_genes" \
                      " --nichenet_keep_target_genes_ratio 1." \
                      " --nichenet_max_n_target_genes_per_gp 250" \
                      " --include_mebocost_gps" \
                      f" --species {species}" \
                      " --gp_filter_mode subset" \
                      " --combine_overlap_gps" \
                      " --overlap_thresh_source_genes 0.9" \
                      " --overlap_thresh_target_genes 0.9" \
                      " --overlap_thresh_genes 0.9" \
                      " --counts_key counts" \
                      " --spatial_key spatial" \
                      " --adj_key spatial_connectivities" \
                      " --mapping_entity_key mapping_entity" \
                      " --gp_targets_mask_key nichecompass_gp_targets" \
                      " --gp_sources_mask_key nichecompass_gp_sources" \
                      " --gp_names_key nichecompass_gp_names" \
                      f" --model_label {ablation_task}_{task}" \
                      " --active_gp_names_key nichecompass_active_gp_names" \
                      " --latent_key nichecompass_latent" \
                      f" --n_addon_gp {n_addon_gp}" \
                      f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                      " --gene_expr_recon_dist nb" \
                      " --log_variational" \
                      f" --node_label_method {node_label_method}" \
                      " --n_layers_encoder 1" \
                      " --n_hidden_encoder None" \
                      f" --conv_layer_encoder {conv_layer_encoder}" \
                      f" --n_epochs {n_epochs}" \
                      " --n_epochs_all_gps 25" \
                      " --lr 0.001" \
                      f" --lambda_edge_recon {lambda_edge_recon}" \
                      f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                      " --lambda_group_lasso 0." \
                      f" --lambda_l1_masked {lambda_l1_masked}" \
                      f" --lambda_l1_addon {lambda_l1_addon}" \
                      f" --l1_targets_categories {l1_targets_categories}" \
                      f" --l1_sources_categories {l1_sources_categories}" \
                      f" --edge_batch_size {edge_batch_size}" \
                      " --node_batch_size None" \
                      f" --n_sampled_neighbors 4" \
                      f" --seed {seed}" \
                      f" --use_new_gp_mask" \
                      f" --timestamp_suffix _{job_id}"

        submit_python_script(
                job_name_prefix=job_name_prefix,
                job_id=job_id,
                job_folder_path=job_folder_path,
                conda_env_name=conda_env_name,
                script_folder_path=script_folder_path,
                script_name=script_name,
                script_args=script_args,
                nice=10000)

        job_id += 1

### 2.4 Neighbor Ablation

In [None]:
task = "ablation"
ablation_task = "neighbor"
dataset = "starmap_plus_mouse_cns"
reference_batches = "batch1"
species = "mouse"

job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
job_folder_path = f"./{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk_new/workspace/projects/nichecompass-reproducibility/analysis/{task}"

active_gp_thresh_ratio = 0.01
node_label_method = "one-hop-norm"
conv_layer_encoder = "gatv2conv"
n_epochs = 400

lambda_edge_recon = 500000.
lambda_gene_expr_recon = 300.
lambda_l1_masked = 0.
lambda_l1_addon = 0.
l1_targets_categories = "target_gene"
l1_sources_categories = None
edge_batch_size = 256
n_addon_gp = 100

job_id = 1
for n_neighbors in [4, 8, 12, 16, 20]:
    for seed in range(8):
        script_name = "./data_analysis/train_nichecompass_reference_model.py"
        script_args = f" --dataset {dataset}" \
                      f" --reference_batches {reference_batches}" \
                      f" --n_neighbors {n_neighbors}" \
                      " --no-filter_genes" \
                      " --nichenet_keep_target_genes_ratio 1." \
                      " --nichenet_max_n_target_genes_per_gp 250" \
                      " --include_mebocost_gps" \
                      f" --species {species}" \
                      " --gp_filter_mode subset" \
                      " --combine_overlap_gps" \
                      " --overlap_thresh_source_genes 0.9" \
                      " --overlap_thresh_target_genes 0.9" \
                      " --overlap_thresh_genes 0.9" \
                      " --counts_key counts" \
                      " --spatial_key spatial" \
                      " --adj_key spatial_connectivities" \
                      " --mapping_entity_key mapping_entity" \
                      " --gp_targets_mask_key nichecompass_gp_targets" \
                      " --gp_sources_mask_key nichecompass_gp_sources" \
                      " --gp_names_key nichecompass_gp_names" \
                      f" --model_label {ablation_task}_{task}" \
                      " --active_gp_names_key nichecompass_active_gp_names" \
                      " --latent_key nichecompass_latent" \
                      f" --n_addon_gp {n_addon_gp}" \
                      f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                      " --gene_expr_recon_dist nb" \
                      " --log_variational" \
                      f" --node_label_method {node_label_method}" \
                      " --n_layers_encoder 1" \
                      " --n_hidden_encoder None" \
                      f" --conv_layer_encoder {conv_layer_encoder}" \
                      f" --n_epochs {n_epochs}" \
                      " --n_epochs_all_gps 25" \
                      " --lr 0.001" \
                      f" --lambda_edge_recon {lambda_edge_recon}" \
                      f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                      " --lambda_group_lasso 0." \
                      f" --lambda_l1_masked {lambda_l1_masked}" \
                      f" --lambda_l1_addon {lambda_l1_addon}" \
                      f" --l1_targets_categories {l1_targets_categories}" \
                      f" --l1_sources_categories {l1_sources_categories}" \
                      f" --edge_batch_size {edge_batch_size}" \
                      " --node_batch_size None" \
                      f" --n_sampled_neighbors 4" \
                      f" --seed {seed}" \
                      f" --use_new_gp_mask" \
                      f" --timestamp_suffix _{job_id}"

        submit_python_script(
                job_name_prefix=job_name_prefix,
                job_id=job_id,
                job_folder_path=job_folder_path,
                conda_env_name=conda_env_name,
                script_folder_path=script_folder_path,
                script_name=script_name,
                script_args=script_args,
                nice=10000)

        job_id += 1

### 2.5 De Novo GP Ablation

In [None]:
task = "ablation"
ablation_task = "denovogp"
dataset = "starmap_plus_mouse_cns"
reference_batches = "batch1"
species = "mouse"

job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
job_folder_path = f"./{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk_new/workspace/projects/nichecompass-reproducibility/analysis/{task}"

n_neighbors = 6
active_gp_thresh_ratio = 0.01
node_label_method = "one-hop-norm"
conv_layer_encoder = "gatv2conv"
n_epochs = 400

lambda_edge_recon = 500000.
lambda_gene_expr_recon = 300.
lambda_l1_masked = 0.
lambda_l1_addon = 0.
l1_targets_categories = "target_gene"
l1_sources_categories = None
edge_batch_size = 256

job_id = 1
for n_addon_gp in [0, 10, 30, 100, 500]:
    for seed in range(8):
        script_name = "./data_analysis/train_nichecompass_reference_model.py"
        script_args = f" --dataset {dataset}" \
                      f" --reference_batches {reference_batches}" \
                      f" --n_neighbors {n_neighbors}" \
                      " --no-filter_genes" \
                      " --nichenet_keep_target_genes_ratio 1." \
                      " --nichenet_max_n_target_genes_per_gp 250" \
                      " --include_mebocost_gps" \
                      f" --species {species}" \
                      " --gp_filter_mode subset" \
                      " --combine_overlap_gps" \
                      " --overlap_thresh_source_genes 0.9" \
                      " --overlap_thresh_target_genes 0.9" \
                      " --overlap_thresh_genes 0.9" \
                      " --counts_key counts" \
                      " --spatial_key spatial" \
                      " --adj_key spatial_connectivities" \
                      " --mapping_entity_key mapping_entity" \
                      " --gp_targets_mask_key nichecompass_gp_targets" \
                      " --gp_sources_mask_key nichecompass_gp_sources" \
                      " --gp_names_key nichecompass_gp_names" \
                      f" --model_label {ablation_task}_{task}" \
                      " --active_gp_names_key nichecompass_active_gp_names" \
                      " --latent_key nichecompass_latent" \
                      f" --n_addon_gp {n_addon_gp}" \
                      f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                      " --gene_expr_recon_dist nb" \
                      " --log_variational" \
                      f" --node_label_method {node_label_method}" \
                      " --n_layers_encoder 1" \
                      " --n_hidden_encoder None" \
                      f" --conv_layer_encoder {conv_layer_encoder}" \
                      f" --n_epochs {n_epochs}" \
                      " --n_epochs_all_gps 25" \
                      " --lr 0.001" \
                      f" --lambda_edge_recon {lambda_edge_recon}" \
                      f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                      " --lambda_group_lasso 0." \
                      f" --lambda_l1_masked {lambda_l1_masked}" \
                      f" --lambda_l1_addon {lambda_l1_addon}" \
                      f" --l1_targets_categories {l1_targets_categories}" \
                      f" --l1_sources_categories {l1_sources_categories}" \
                      f" --edge_batch_size {edge_batch_size}" \
                      " --node_batch_size None" \
                      f" --n_sampled_neighbors 4" \
                      f" --seed {seed}" \
                      f" --use_new_gp_mask" \
                      f" --timestamp_suffix _{job_id}"

        submit_python_script(
                job_name_prefix=job_name_prefix,
                job_id=job_id,
                job_folder_path=job_folder_path,
                conda_env_name=conda_env_name,
                script_folder_path=script_folder_path,
                script_name=script_name,
                script_args=script_args,
                nice=10000)

        job_id += 1

### 2.6 GP Selection Ablation

In [None]:
task = "ablation"
ablation_task = "gpselection"
dataset = "starmap_plus_mouse_cns"
reference_batches = "batch1"
species = "mouse"

job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
job_folder_path = f"./{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk_new/workspace/projects/nichecompass-reproducibility/analysis/{task}"

n_neighbors = 6
node_label_method = "one-hop-norm"
conv_layer_encoder = "gatv2conv"
n_epochs = 400

lambda_edge_recon = 500000.
lambda_gene_expr_recon = 300.
lambda_l1_masked = 0.
lambda_l1_addon = 0.
l1_targets_categories = "target_gene"
l1_sources_categories = None
edge_batch_size = 256
n_addon_gp = 100

job_id = 1
for active_gp_thresh_ratio in [0., 0.01, 0.03, 0.1, 0.3, 0.5, 1]:
    for seed in range(8):
        script_name = "./data_analysis/train_nichecompass_reference_model.py"
        script_args = f" --dataset {dataset}" \
                      f" --reference_batches {reference_batches}" \
                      f" --n_neighbors {n_neighbors}" \
                      " --no-filter_genes" \
                      " --nichenet_keep_target_genes_ratio 1." \
                      " --nichenet_max_n_target_genes_per_gp 250" \
                      " --include_mebocost_gps" \
                      f" --species {species}" \
                      " --gp_filter_mode subset" \
                      " --combine_overlap_gps" \
                      " --overlap_thresh_source_genes 0.9" \
                      " --overlap_thresh_target_genes 0.9" \
                      " --overlap_thresh_genes 0.9" \
                      " --counts_key counts" \
                      " --spatial_key spatial" \
                      " --adj_key spatial_connectivities" \
                      " --mapping_entity_key mapping_entity" \
                      " --gp_targets_mask_key nichecompass_gp_targets" \
                      " --gp_sources_mask_key nichecompass_gp_sources" \
                      " --gp_names_key nichecompass_gp_names" \
                      f" --model_label {ablation_task}_{task}" \
                      " --active_gp_names_key nichecompass_active_gp_names" \
                      " --latent_key nichecompass_latent" \
                      f" --n_addon_gp {n_addon_gp}" \
                      f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                      " --gene_expr_recon_dist nb" \
                      " --log_variational" \
                      f" --node_label_method {node_label_method}" \
                      " --n_layers_encoder 1" \
                      " --n_hidden_encoder None" \
                      f" --conv_layer_encoder {conv_layer_encoder}" \
                      f" --n_epochs {n_epochs}" \
                      " --n_epochs_all_gps 25" \
                      " --lr 0.001" \
                      f" --lambda_edge_recon {lambda_edge_recon}" \
                      f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                      " --lambda_group_lasso 0." \
                      f" --lambda_l1_masked {lambda_l1_masked}" \
                      f" --lambda_l1_addon {lambda_l1_addon}" \
                      f" --l1_targets_categories {l1_targets_categories}" \
                      f" --l1_sources_categories {l1_sources_categories}" \
                      f" --edge_batch_size {edge_batch_size}" \
                      " --node_batch_size None" \
                      f" --n_sampled_neighbors 4" \
                      f" --seed {seed}" \
                      f" --use_new_gp_mask" \
                      f" --timestamp_suffix _{job_id}"

        submit_python_script(
                job_name_prefix=job_name_prefix,
                job_id=job_id,
                job_folder_path=job_folder_path,
                conda_env_name=conda_env_name,
                script_folder_path=script_folder_path,
                script_name=script_name,
                script_args=script_args,
                nice=10000)

        job_id += 1

### 2.7 No Prior GP Ablation

In [None]:
task = "ablation"
ablation_task = "nopriorgp"
dataset = "starmap_plus_mouse_cns"
reference_batches = "batch1"
species = "mouse"

job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
job_folder_path = f"./{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk_new/workspace/projects/nichecompass-reproducibility/analysis/{task}"

n_neighbors = 6
active_gp_thresh_ratio = 0.01
node_label_method = "one-hop-norm"
conv_layer_encoder = "gatv2conv"
n_epochs = 400

lambda_edge_recon = 500000.
lambda_gene_expr_recon = 300.
lambda_l1_masked = 0.
lambda_l1_addon = 0.
l1_targets_categories = "target_gene"
l1_sources_categories = None
edge_batch_size = 256
n_addon_gp = 100

job_id = 1
for add_fc_gps_instead_of_gp_dict_gps in [True, False]:
    for seed in range(8):
        if add_fc_gps_instead_of_gp_dict_gps:
            script_name = "./data_analysis/train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --add_fc_gps_instead_of_gp_dict_gps" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          f" --n_addon_gp {n_addon_gp}" \
                          f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          f" --conv_layer_encoder {conv_layer_encoder}" \
                          f" --n_epochs {n_epochs}" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked {lambda_l1_masked}" \
                          f" --lambda_l1_addon {lambda_l1_addon}" \
                          f" --l1_targets_categories {l1_targets_categories}" \
                          f" --l1_sources_categories {l1_sources_categories}" \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors 4" \
                          f" --seed {seed}" \
                          f" --use_new_gp_mask" \
                          f" --timestamp_suffix _{job_id}"
        else:
            script_name = "./data_analysis/train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          f" --n_addon_gp {n_addon_gp}" \
                          f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          f" --conv_layer_encoder {conv_layer_encoder}" \
                          f" --n_epochs {n_epochs}" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked {lambda_l1_masked}" \
                          f" --lambda_l1_addon {lambda_l1_addon}" \
                          f" --l1_targets_categories {l1_targets_categories}" \
                          f" --l1_sources_categories {l1_sources_categories}" \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors 4" \
                          f" --seed {seed}" \
                          f" --use_new_gp_mask" \
                          f" --timestamp_suffix _{job_id}"

        submit_python_script(
                job_name_prefix=job_name_prefix,
                job_id=job_id,
                job_folder_path=job_folder_path,
                conda_env_name=conda_env_name,
                script_folder_path=script_folder_path,
                script_name=script_name,
                script_args=script_args,
                nice=10000)

        job_id += 1

## 3. Single Sample Method Benchmarking

### 3.1 seqFISH Mouse Organogenesis

#### 3.1.1 Spatial Transcriptomics Data

In [None]:
# GCN encoder
task = "single_sample_method_benchmarking"
dataset = "seqfish_mouse_organogenesis_embryo2"
cell_type_key = "celltype_mapped_refined"
species = "mouse"
edge_batch_size_str = "131072 131072 131072 131072 131072 131072 131072 131072" # full dataset
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder
task = "single_sample_method_benchmarking"
dataset = "seqfish_mouse_organogenesis_embryo2"
cell_type_key = "celltype_mapped_refined"
species = "mouse"
edge_batch_size_str = "2048 2048 2048 2048 2048 2048 2048 2048" # out of memory
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 3.1.2 Spatial Transcriptomics Data Subsamples

In [None]:
# GCN encoder
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "single_sample_method_benchmarking"
    dataset = f"seqfish_mouse_organogenesis_subsample_{subsample_pct}pct_embryo2"
    cell_type_key = "celltype_mapped_refined"
    species = "mouse"
    edge_batch_size_str = "131072 131072 131072 131072 131072 131072 131072 131072 131072 131072" # full dataset
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "single_sample_method_benchmarking"
    dataset = f"seqfish_mouse_organogenesis_subsample_{subsample_pct}pct_embryo2"
    cell_type_key = "celltype_mapped_refined"
    species = "mouse"
    edge_batch_size_str = "2048 2048 2048 2048 2048 2048 2048 2048 2048 2048" # out of memory
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

### 3.2 seqFISH Mouse Organogenesis Imputed

This is not used for the manuscript.

#### 3.2.1 Spatial Transcriptomics Data

In [None]:
# GCN encoder
task = "single_sample_method_benchmarking"
dataset = "seqfish_mouse_organogenesis_imputed_embryo2"
cell_type_key = "celltype_mapped_refined"
species = "mouse"
edge_batch_size_str = "8192 8192 8192 8192 8192 8192 8192 8192" # out of memory
n_svg = 5000

job_name_prefix = f"{dataset}_nichecompass_gcnconv_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --filter_genes" \
               " --n_hvg 0" \
               f" --n_svg {n_svg}" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label gcnconv_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               " --conv_layer_encoder gcnconv" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 3.2.2 Spatial Transcriptomics Data Subsamples

In [None]:
# GCN encoder
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "single_sample_method_benchmarking"
    dataset = f"seqfish_mouse_organogenesis_imputed_subsample_{subsample_pct}pct_embryo2"
    cell_type_key = "celltype_mapped_refined"
    species = "mouse"
    edge_batch_size_str = "8192 8192 8192 8192 8192 8192 8192 8192" # out of memory
    n_svg = 3000

    job_name_prefix = f"{dataset}_nichecompass_one-hop-norm_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --filter_genes" \
                   " --n_hvg 0" \
                   f" --n_svg {n_svg}" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label one-hop-norm_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 0" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   " --conv_layer_encoder gcnconv" \
                   " --n_epochs 100" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

### 3.3 nanoString CosMx SMI Human Non-Small-Cell Lung Cancer (NSCLC)

#### 3.3.1 Spatial Transcriptomics Data

In [None]:
# GCN encoder
task = "single_sample_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc_batch5"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "32768 32768 32768 32768 32768 32768 32768 32768" # out of memory
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 1 due to 2 day limit)
task = "single_sample_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc_batch5"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "512 512 512 512" # out of memory
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None" \
               " --seeds 7 6 5 4" \
               " --run_index 8 7 6 5" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 2 due to 2 day limit)
task = "single_sample_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc_batch5"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "512 512 512 512" # out of memory
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 2
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None" \
               " --seeds 3 2 1 0" \
               " --run_index 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 3.3.2 Spatial Transcriptomics Data Subsamples

In [None]:
# GCN encoder
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "single_sample_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct_batch5"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "32768 32768 32768 32768 32768 32768 32768 32768" # out of memory
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   " --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# tmp
for subsample_pct in [5, 10]:
    task = "single_sample_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct_batch5"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "32768 32768 32768 32768 32768 32768 32768 32768" # out of memory
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   " --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "single_sample_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct_batch5"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "512 512 512 512 512 512 512 512" # out of memory
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   " --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# tmp
# GATv2 encoder
for subsample_pct in [25]:
    task = "single_sample_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct_batch5"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "512 512 512 512 512 512 512 512" # out of memory
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                    f" --model_label {conv_layer_encoder}_{task}" \
                  " --gp_names_key nichecompass_gp_names" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   " --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

### 3.4 Vizgen MERFISH Mouse Liver

#### 3.4.1 Spatial Transcriptomics Data

In [None]:
# GCN encoder (run 1 due to 2 day limit)
task = "single_sample_method_benchmarking"
dataset = "vizgen_merfish_mouse_liver"
cell_type_key = "Cell_Type"
species = "mouse"
edge_batch_size_str = "4096 4096 4096 4096 4096" # out of memory
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None" \
               " --seeds 7 6 5 4 3" \
               " --run_index 8 7 6 5 4" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GCN encoder (run 2 due to 2 day limit)
task = "single_sample_method_benchmarking"
dataset = "vizgen_merfish_mouse_liver"
cell_type_key = "Cell_Type"
species = "mouse"
edge_batch_size_str = "4096 4096 4096" # out of memory
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 2
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None" \
               " --seeds 2 1 0" \
               " --run_index 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 1 due to 2 day limit)
# 50 epochs due to 2 day time limit
task = "single_sample_method_benchmarking"
dataset = "vizgen_merfish_mouse_liver"
cell_type_key = "Cell_Type"
species = "mouse"
edge_batch_size_str = "512" # out of memory
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 7" \
               " --run_index 8" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 50" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 2 due to 2 day limit)
# 50 epochs due to 2 day time limit
task = "single_sample_method_benchmarking"
dataset = "vizgen_merfish_mouse_liver"
cell_type_key = "Cell_Type"
species = "mouse"
edge_batch_size_str = "512" # out of memory
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 6" \
               " --run_index 7" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 50" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 3 due to 2 day limit)
# 50 epochs due to 2 day time limit
task = "single_sample_method_benchmarking"
dataset = "vizgen_merfish_mouse_liver"
cell_type_key = "Cell_Type"
species = "mouse"
edge_batch_size_str = "512" # out of memory
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 2
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 12" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 5" \
               " --run_index 6" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 50" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 4 due to 2 day limit)
# 50 epochs due to 2 day time limit
task = "single_sample_method_benchmarking"
dataset = "vizgen_merfish_mouse_liver"
cell_type_key = "Cell_Type"
species = "mouse"
edge_batch_size_str = "512" # out of memory
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 2
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 12" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 4" \
               " --run_index 5" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 50" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 5 due to 2 day limit)
task = "single_sample_method_benchmarking"
dataset = "vizgen_merfish_mouse_liver"
cell_type_key = "Cell_Type"
species = "mouse"
edge_batch_size_str = "512 512" # out of memory
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 3
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 8 8" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None" \
               " --seeds 3 2" \
               " --run_index 4 3" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 6 due to 2 day limit)
task = "single_sample_method_benchmarking"
dataset = "vizgen_merfish_mouse_liver"
cell_type_key = "Cell_Type"
species = "mouse"
edge_batch_size_str = "512 512" # out of memory
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 4
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None" \
               " --seeds 1 0" \
               " --run_index 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 3.4.2 Spatial Transcriptomics Data Subsamples

In [None]:
# GCN encoder
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "single_sample_method_benchmarking"
    dataset = f"vizgen_merfish_mouse_liver_subsample_{subsample_pct}pct"
    cell_type_key = "Cell_Type"
    species = "mouse"
    edge_batch_size_str = "4096 4096 4096 4096 4096 4096 4096 4096" # out of memory
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 1 due to 2 day limit)
for subsample_pct in [50]:
    task = "single_sample_method_benchmarking"
    dataset = f"vizgen_merfish_mouse_liver_subsample_{subsample_pct}pct"
    cell_type_key = "Cell_Type"
    species = "mouse"
    edge_batch_size_str = "512" # out of memory
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 7" \
                   " --run_index 8" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 2 due to 2 day limit)
for subsample_pct in [50]:
    task = "single_sample_method_benchmarking"
    dataset = f"vizgen_merfish_mouse_liver_subsample_{subsample_pct}pct"
    cell_type_key = "Cell_Type"
    species = "mouse"
    edge_batch_size_str = "512" # out of memory
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 6" \
                   " --run_index 7" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 3 due to 2 day limit)
for subsample_pct in [50]:
    task = "single_sample_method_benchmarking"
    dataset = f"vizgen_merfish_mouse_liver_subsample_{subsample_pct}pct"
    cell_type_key = "Cell_Type"
    species = "mouse"
    edge_batch_size_str = "512" # out of memory
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 12" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 5" \
                   " --run_index 6" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 4 due to 2 day limit)
for subsample_pct in [50]:
    task = "single_sample_method_benchmarking"
    dataset = f"vizgen_merfish_mouse_liver_subsample_{subsample_pct}pct"
    cell_type_key = "Cell_Type"
    species = "mouse"
    edge_batch_size_str = "512" # out of memory
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 12" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 4" \
                   " --run_index 5" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 5 due to 2 day limit)
for subsample_pct in [50]:
    task = "single_sample_method_benchmarking"
    dataset = f"vizgen_merfish_mouse_liver_subsample_{subsample_pct}pct"
    cell_type_key = "Cell_Type"
    species = "mouse"
    edge_batch_size_str = "512 512 512 512" # out of memory
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 3
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None" \
                   " --seeds 3 2 1 0" \
                   " --run_index 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 1 due to 2 day limit)
for subsample_pct in [25]:
    task = "single_sample_method_benchmarking"
    dataset = f"vizgen_merfish_mouse_liver_subsample_{subsample_pct}pct"
    cell_type_key = "Cell_Type"
    species = "mouse"
    edge_batch_size_str = "512 512 512 512 512 512" # out of memory
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None" \
                   " --seeds 7 6 5 4 3 2" \
                   " --run_index 8 7 6 5 4 3" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 2 due to 2 day limit)
for subsample_pct in [25]:
    task = "single_sample_method_benchmarking"
    dataset = f"vizgen_merfish_mouse_liver_subsample_{subsample_pct}pct"
    cell_type_key = "Cell_Type"
    species = "mouse"
    edge_batch_size_str = "512 512" # out of memory
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None" \
                   " --seeds 1 0" \
                   " --run_index 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder
for subsample_pct in [10, 5, 1]:
    task = "single_sample_method_benchmarking"
    dataset = f"vizgen_merfish_mouse_liver_subsample_{subsample_pct}pct"
    cell_type_key = "Cell_Type"
    species = "mouse"
    edge_batch_size_str = "512 512 512 512 512 512 512 512" # out of memory
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

### 3.5 Slide-seqV2 Mouse Hippocampus

#### 3.5.1 Spatial Transcriptomics Data

In [None]:
# GCN encoder
task = "single_sample_method_benchmarking"
dataset = "slideseqv2_mouse_hippocampus"
cell_type_key = "cell_type"
species = "mouse"
edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384" # out of memory
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder
task = "single_sample_method_benchmarking"
dataset = "slideseqv2_mouse_hippocampus"
cell_type_key = "cell_type"
species = "mouse"
edge_batch_size_str = "256 256 256 256 256 256 256 256" # out of memory
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 3.5.2 Spatial Transcriptomics Data Subsamples

In [None]:
# GCN encoder
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "single_sample_method_benchmarking"
    dataset = f"slideseqv2_mouse_hippocampus_subsample_{subsample_pct}pct"
    cell_type_key = "cell_type"
    species = "mouse"
    edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384" # out of memory
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "single_sample_method_benchmarking"
    dataset = f"slideseqv2_mouse_hippocampus_subsample_{subsample_pct}pct"
    cell_type_key = "cell_type"
    species = "mouse"
    edge_batch_size_str = "256 256 256 256 256 256 256 256" # out of memory
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# tmp
for subsample_pct in [50]:
    task = "single_sample_method_benchmarking"
    dataset = f"slideseqv2_mouse_hippocampus_subsample_{subsample_pct}pct"
    cell_type_key = "cell_type"
    species = "mouse"
    edge_batch_size_str = "256 256 256 256 256 256 256 256" # out of memory
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

### 3.6 Simulation Data

In [None]:
# GCN encoder
task = "single_sample_method_benchmarking"
n_sim = "1"
increment_mode = "strong" # "weak", strong"
n_genes = 1105
n_locs = 10000
dataset = f"sim{n_sim}_{n_genes}genes_{n_locs}locs_{increment_mode}increments"
cell_type_key = "cell_types"
species = "mouse"
edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384" # full dataset
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"./benchmarking/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 100" \
               " --active_gp_thresh_ratio 0.03" \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 30." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               " --use_new_gp_mask" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder
task = "single_sample_method_benchmarking"
n_sim = "1"
increment_mode = "strong" # "weak", strong"
n_genes = 1105
n_locs = 10000
dataset = f"sim{n_sim}_{n_genes}genes_{n_locs}locs_{increment_mode}increments"
cell_type_key = "cell_types"
species = "mouse"
edge_batch_size_str = "2048 2048 2048 2048 2048 2048 2048 2048"
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 40
job_folder_path = f"./benchmarking/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 100" \
               " --active_gp_thresh_ratio 0.03" \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 30." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               " --use_new_gp_mask" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        qos="gpu_long",
        nice=10000)

In [None]:
# GATv2 encoder
task = "single_sample_method_benchmarking"
n_sim = "1"
increment_mode = "strong" # "weak", strong"
n_genes = 1105
n_locs = 10000
dataset = f"sim{n_sim}_{n_genes}genes_{n_locs}locs_{increment_mode}increments"
cell_type_key = "cell_types"
species = "mouse"
edge_batch_size_str = "2048 2048 2048 2048 2048 2048 2048 2048"
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 41
job_folder_path = f"./benchmarking/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 6 6 6 6 6 6 6 6" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 100" \
               " --active_gp_thresh_ratio 0.03" \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 30." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               " --use_new_gp_mask" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        qos="gpu_long",
        nice=10000)

### 3.7 SDMBench Data

In [None]:
# GCN encoder
task = "single_sample_method_benchmarking"
dataset = "starmap_mouse_mpfc"
cell_type_key = "cell_type"
species = "mouse"
edge_batch_size_str = "131072 131072 131072 131072 131072 131072 131072 131072" # full dataset
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"./benchmarking/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               " --use_new_gp_mask" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder
task = "single_sample_method_benchmarking"
dataset = "starmap_mouse_mpfc"
cell_type_key = "cell_type"
species = "mouse"
edge_batch_size_str = "131072 131072 131072 131072 131072 131072 131072 131072" # full dataset
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"./benchmarking/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               " --use_new_gp_mask" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GCN encoder
task = "single_sample_method_benchmarking"
dataset = "stereoseq_mouse_embryo"
cell_type_key = "leiden"
species = "mouse"
edge_batch_size_str = "131072 131072 131072 131072 131072 131072 131072 131072" # full dataset
n_svg = 3000
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"./benchmarking/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --filter_genes" \
               " --n_hvg 0" \
               f" --n_svg {n_svg}" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label gcnconv_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               " --conv_layer_encoder gcnconv" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               " --use_new_gp_mask" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder
task = "single_sample_method_benchmarking"
dataset = "stereoseq_mouse_embryo"
cell_type_key = "leiden"
species = "mouse"
edge_batch_size_str = "1024 1024 1024 1024 1024 1024 1024 1024"
n_svg = 3000
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"./benchmarking/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --filter_genes" \
               " --n_hvg 0" \
               f" --n_svg {n_svg}" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               " --use_new_gp_mask" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

### 3.8 Metrics Computation

#### 3.8.1 NicheCompass

In [None]:
# GCN encoder
job_id = 1
for dataset in ["seqfish_mouse_organogenesis_embryo2",
                "seqfish_mouse_organogenesis_subsample_50pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_25pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_10pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_5pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_1pct_embryo2"]:    
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_nichecompass_gcnconv.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GATv2 encoder
job_id = 1
for dataset in ["seqfish_mouse_organogenesis_embryo2",
                "seqfish_mouse_organogenesis_subsample_50pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_25pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_10pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_5pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_1pct_embryo2"]:    
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_nichecompass_gatv2conv.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GCN encoder
job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc_batch5",
                "nanostring_cosmx_human_nsclc_subsample_50pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_25pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_10pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_5pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_1pct_batch5"]:    
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_nichecompass_gcnconv.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GATv2 encoder
job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc_batch5",
                "nanostring_cosmx_human_nsclc_subsample_50pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_25pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_10pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_5pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_1pct_batch5"]:    
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_nichecompass_gatv2conv.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GCN encoder
job_id = 1
for dataset in ["vizgen_merfish_mouse_liver",
                "vizgen_merfish_mouse_liver_subsample_50pct",
                "vizgen_merfish_mouse_liver_subsample_25pct",
                "vizgen_merfish_mouse_liver_subsample_10pct",
                "vizgen_merfish_mouse_liver_subsample_5pct",
                "vizgen_merfish_mouse_liver_subsample_1pct"]:   
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_nichecompass_gcnconv.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GATv2 encoder
job_id = 1
for dataset in ["vizgen_merfish_mouse_liver",
                "vizgen_merfish_mouse_liver_subsample_50pct",
                "vizgen_merfish_mouse_liver_subsample_25pct",
                "vizgen_merfish_mouse_liver_subsample_10pct",
                "vizgen_merfish_mouse_liver_subsample_5pct",
                "vizgen_merfish_mouse_liver_subsample_1pct"]:   
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_nichecompass_gatv2conv.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GCN encoder
job_id = 1
for dataset in ["slideseqv2_mouse_hippocampus",
                "slideseqv2_mouse_hippocampus_subsample_50pct",
                "slideseqv2_mouse_hippocampus_subsample_25pct",
                "slideseqv2_mouse_hippocampus_subsample_10pct",
                "slideseqv2_mouse_hippocampus_subsample_5pct",
                "slideseqv2_mouse_hippocampus_subsample_1pct"]:    
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_nichecompass_gcnconv.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_gcnconv_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GATv2 encoder
job_id = 1
for dataset in ["slideseqv2_mouse_hippocampus",
                "slideseqv2_mouse_hippocampus_subsample_50pct",
                "slideseqv2_mouse_hippocampus_subsample_25pct",
                "slideseqv2_mouse_hippocampus_subsample_10pct",
                "slideseqv2_mouse_hippocampus_subsample_5pct",
                "slideseqv2_mouse_hippocampus_subsample_1pct"
]:    
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_nichecompass_gatv2conv.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_gcnconv_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "nichecompass"
encoder = "gcnconv"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["sim1_1105genes_10000locs_strongincrements",
               ]:
    file_name = f"{dataset}_{method}_{encoder}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "nichecompass"
encoder = "gatv2conv"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["sim1_1105genes_10000locs_strongincrements",
               ]:
    file_name = f"{dataset}_{method}_{encoder}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "nichecompass"
encoder = "gcnconv"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["starmap_mouse_mpfc",
               ]:
    file_name = f"{dataset}_{method}_{encoder}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "nichecompass"
encoder = "gatv2conv"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["starmap_mouse_mpfc",
               ]:
    file_name = f"{dataset}_{method}_{encoder}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "nichecompass"
encoder = "gcnconv"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["stereoseq_mouse_embryo",
               ]:
    file_name = f"{dataset}_{method}_{encoder}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "nichecompass"
encoder = "gatv2conv"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["stereoseq_mouse_embryo",
               ]:
    file_name = f"{dataset}_{method}_{encoder}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

#### 3.8.2 STACI

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis_embryo2",
                "seqfish_mouse_organogenesis_subsample_50pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_25pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_10pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_5pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_1pct_embryo2"
]:  
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_staci.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"staci_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key staci_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            t="48:00:00",
            p="gpu_p",
            qos="gpu_normal",
            mem="160G",
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc_subsample_10pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_5pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_1pct_batch5"
               ]:
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_staci.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"staci_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key staci_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            t="48:00:00",
            p="gpu_p",
            qos="gpu_normal",
            mem="160G",
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["slideseqv2_mouse_hippocampus_subsample_25pct",
                "slideseqv2_mouse_hippocampus_subsample_10pct",
                "slideseqv2_mouse_hippocampus_subsample_5pct",
                "slideseqv2_mouse_hippocampus_subsample_1pct"
]:
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_staci.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"staci_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key staci_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            t="48:00:00",
            p="gpu_p",
            qos="gpu_normal",
            mem="160G",
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["vizgen_merfish_mouse_liver_subsample_5pct",
                "vizgen_merfish_mouse_liver_subsample_1pct"]:  
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_staci.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"staci_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key staci_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            t="48:00:00",
            p="gpu_p",
            qos="gpu_normal",
            mem="160G",
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "staci"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["sim1_1105genes_10000locs_strongincrements",
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "staci"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["starmap_mouse_mpfc",
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

#### 3.8.3 GraphST

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis_embryo2",
                "seqfish_mouse_organogenesis_subsample_50pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_25pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_10pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_5pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_1pct_embryo2"]:  
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_graphst.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"graphst_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key graphst_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["vizgen_merfish_mouse_liver_subsample_10pct",
                "vizgen_merfish_mouse_liver_subsample_5pct",
                "vizgen_merfish_mouse_liver_subsample_1pct"]:  
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_graphst.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"graphst_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key graphst_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc_subsample_50pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_25pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_10pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_5pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_1pct_batch5"]:
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_graphst.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"graphst_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key graphst_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["slideseqv2_mouse_hippocampus",
                "slideseqv2_mouse_hippocampus_subsample_50pct",
                "slideseqv2_mouse_hippocampus_subsample_25pct",
                "slideseqv2_mouse_hippocampus_subsample_10pct",
                "slideseqv2_mouse_hippocampus_subsample_5pct"]:
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_graphst.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"graphst_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key graphst_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "graphst"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["sim1_1105genes_10000locs_strongincrements",
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "graphst"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["starmap_mouse_mpfc",
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "graphst"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["stereoseq_mouse_embryo",
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

#### 3.8.4 DeepLinc

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis_embryo2",
                "seqfish_mouse_organogenesis_subsample_50pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_25pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_10pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_5pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_1pct_embryo2"]:   
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_deeplinc.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"deeplinc_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key deeplinc_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc_subsample_50pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_25pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_10pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_5pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_1pct_batch5"]:   
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_deeplinc.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"deeplinc_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key deeplinc_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["slideseqv2_mouse_hippocampus",
                "slideseqv2_mouse_hippocampus_subsample_50pct",
                "slideseqv2_mouse_hippocampus_subsample_25pct",
                "slideseqv2_mouse_hippocampus_subsample_10pct",
                "slideseqv2_mouse_hippocampus_subsample_5pct",
                "slideseqv2_mouse_hippocampus_subsample_1pct"]:  
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_deeplinc.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"deeplinc_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key deeplinc_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "deeplinc"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["sim1_1105genes_10000locs_strongincrements",
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

#### 3.8.5 CellCharter

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis_embryo2",
                "seqfish_mouse_organogenesis_subsample_50pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_25pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_10pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_5pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_1pct_embryo2"
               ]:   
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_cellcharter.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"cellcharter_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key cellcharter_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc_batch5",
                "nanostring_cosmx_human_nsclc_subsample_50pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_25pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_10pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_5pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_1pct_batch5"
               ]:   
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_cellcharter.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"cellcharter_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key cellcharter_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["vizgen_merfish_mouse_liver",
                "vizgen_merfish_mouse_liver_subsample_50pct",
                "vizgen_merfish_mouse_liver_subsample_25pct",
                "vizgen_merfish_mouse_liver_subsample_10pct",
                "vizgen_merfish_mouse_liver_subsample_5pct",
                "vizgen_merfish_mouse_liver_subsample_1pct"
               ]:    
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_cellcharter.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"cellcharter_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key cellcharter_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["slideseqv2_mouse_hippocampus",
                "slideseqv2_mouse_hippocampus_subsample_50pct",
                "slideseqv2_mouse_hippocampus_subsample_25pct",
                "slideseqv2_mouse_hippocampus_subsample_10pct",
                "slideseqv2_mouse_hippocampus_subsample_5pct",
                "slideseqv2_mouse_hippocampus_subsample_1pct"
               ]:    
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_cellcharter.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"cellcharter_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key cellcharter_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "cellcharter"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["sim1_1105genes_10000locs_strongincrements",
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "cellcharter"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["starmap_mouse_mpfc",
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "cellcharter"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["stereoseq_mouse_embryo",
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

#### 3.8.6 BANKSY

In [None]:
method = "banksy"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["seqfish_mouse_organogenesis_embryo2",
                "seqfish_mouse_organogenesis_subsample_50pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_25pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_10pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_5pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_1pct_embryo2"
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "banksy"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc_batch5",
                "nanostring_cosmx_human_nsclc_subsample_50pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_25pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_10pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_5pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_1pct_batch5"
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "banksy"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["vizgen_merfish_mouse_liver",
                "vizgen_merfish_mouse_liver_subsample_50pct",
                "vizgen_merfish_mouse_liver_subsample_25pct",
                "vizgen_merfish_mouse_liver_subsample_10pct",
                "vizgen_merfish_mouse_liver_subsample_5pct",
                "vizgen_merfish_mouse_liver_subsample_1pct"
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "banksy"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["slideseqv2_mouse_hippocampus",
                "slideseqv2_mouse_hippocampus_subsample_50pct",
                "slideseqv2_mouse_hippocampus_subsample_25pct",
                "slideseqv2_mouse_hippocampus_subsample_10pct",
                "slideseqv2_mouse_hippocampus_subsample_5pct",
                "slideseqv2_mouse_hippocampus_subsample_1pct"
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \b
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "banksy"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["sim1_1105genes_10000locs_strongincrements",
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "banksy"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["starmap_mouse_mpfc",
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "banksy"
task = "single_sample_method_benchmarking"
cell_type_key = "cell_type"
niche_type_key = "niche_type"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["stereoseq_mouse_embryo",
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/single_sample_method_benchmarking/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --niche_type_key {niche_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}" \
                  " --include_sdmbench"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

#### 3.8.7 scVI (Not Included)

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis_embryo2",
                #"seqfish_mouse_organogenesis_subsample_50pct_embryo2",
                #"seqfish_mouse_organogenesis_subsample_25pct_embryo2",
                #"seqfish_mouse_organogenesis_subsample_10pct_embryo2",
                #"seqfish_mouse_organogenesis_subsample_5pct_embryo2",
                #"seqfish_mouse_organogenesis_subsample_1pct_embryo2"
               ]:   
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_scvi.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"scvi_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key scvi_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc_batch5",
                "nanostring_cosmx_human_nsclc_subsample_50pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_25pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_10pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_5pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_1pct_batch5"]:   
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_scvi.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"scvi_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key scvi_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["vizgen_merfish_mouse_liver",
                "vizgen_merfish_mouse_liver_subsample_50pct",
                "vizgen_merfish_mouse_liver_subsample_25pct",
                "vizgen_merfish_mouse_liver_subsample_10pct",
                "vizgen_merfish_mouse_liver_subsample_5pct",
                "vizgen_merfish_mouse_liver_subsample_1pct"]:    
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_scvi.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"scvi_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key scvi_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["slideseqv2_mouse_hippocampus",
                "slideseqv2_mouse_hippocampus_subsample_50pct",
                "slideseqv2_mouse_hippocampus_subsample_25pct",
                "slideseqv2_mouse_hippocampus_subsample_10pct",
                "slideseqv2_mouse_hippocampus_subsample_5pct",
                "slideseqv2_mouse_hippocampus_subsample_1pct"]:    
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_scvi.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"scvi_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key scvi_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

#### 3.8.8 expiMap (Not Included)

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis_embryo2",
                "seqfish_mouse_organogenesis_subsample_50pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_25pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_10pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_5pct_embryo2",
                "seqfish_mouse_organogenesis_subsample_1pct_embryo2"]:   
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_expimap.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"expimap_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key expimap_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc_batch5",
                "nanostring_cosmx_human_nsclc_subsample_50pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_25pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_10pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_5pct_batch5",
                "nanostring_cosmx_human_nsclc_subsample_1pct_batch5"]:   
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_expimap.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"expimap_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key expimap_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["vizgen_merfish_mouse_liver",
                "vizgen_merfish_mouse_liver_subsample_50pct",
                "vizgen_merfish_mouse_liver_subsample_25pct",
                "vizgen_merfish_mouse_liver_subsample_10pct",
                "vizgen_merfish_mouse_liver_subsample_5pct",
                "vizgen_merfish_mouse_liver_subsample_1pct"]: 
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_expimap.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"expimap_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key expimap_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["slideseqv2_mouse_hippocampus",
                "slideseqv2_mouse_hippocampus_subsample_50pct",
                "slideseqv2_mouse_hippocampus_subsample_25pct",
                "slideseqv2_mouse_hippocampus_subsample_10pct",
                "slideseqv2_mouse_hippocampus_subsample_5pct",
                "slideseqv2_mouse_hippocampus_subsample_1pct"
               ]:  
    task = "single_sample_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "None"
    file_name = f"{dataset}_expimap.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"expimap_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key expimap_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

## 4. NicheCompass Sample Integration Method Benchmarking

### 4.1 seqFISH Mouse Organogenesis

#### 4.1.1 Spatial Transcriptomics Data

In [None]:
# GCN encoder
task = "sample_integration_method_benchmarking"
dataset = "seqfish_mouse_organogenesis"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
cell_type_key = "celltype_mapped_refined"
species = "mouse"
edge_batch_size_str = "65536 65536 65536 65536 65536 65536 65536 65536"
conv_layer_encoder = "gcnconv"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = True
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder
task = "sample_integration_method_benchmarking"
dataset = "seqfish_mouse_organogenesis"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
cell_type_key = "celltype_mapped_refined"
species = "mouse"
edge_batch_size_str = "256 256 256 256 256 256 256 256"
conv_layer_encoder = "gatv2conv"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = True
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 4.1.2 Spatial Transcriptomics Data Subsamples

In [None]:
# GCN encoder
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "sample_integration_method_benchmarking"
    dataset = f"seqfish_mouse_organogenesis_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    cell_type_key = "celltype_mapped_refined"
    species = "mouse"
    edge_batch_size_str = "65536 65536 65536 65536 65536 65536 65536 65536"
    conv_layer_encoder = "gcnconv"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = True
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "sample_integration_method_benchmarking"
    dataset = f"seqfish_mouse_organogenesis_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    cell_type_key = "celltype_mapped_refined"
    species = "mouse"
    edge_batch_size_str = "512 512 512 512 512 512 512 512"
    conv_layer_encoder = "gatv2conv"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = True
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# tmp
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"seqfish_mouse_organogenesis_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    cell_type_key = "celltype_mapped_refined"
    species = "mouse"
    edge_batch_size_str = "512 512 512 512 512 512 512 512"
    conv_layer_encoder = "gatv2conv"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = True
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

### 4.2 seqFISH Mouse Organogenesis Imputed

This is not used for the manuscript.

#### 4.2.1 Spatial Transcriptomics Data

In [None]:
# GATv2 encoder
task = "sample_integration_method_benchmarking"
dataset = "visium_mouse_brain"
reference_batches = "batch1 batch2"
cell_type_key = "cell_type"
species = "mouse"
edge_batch_size_str = "256 256 256 256 256 256 256 256" # out of memory 
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "2"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 2
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --filter_genes" \
               f" --n_svg 5000" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               " --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GCN encoder
task = "sample_integration_method_benchmarking"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
cell_type_key = "celltype_mapped_refined"
species = "mouse"
edge_batch_size_str = "8192 8192 8192 8192 8192 8192 8192 8192" # out of memory 
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 2
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16 16 12 12 8 8 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None" \
               " --seeds 7 6 5 4 3 2 1 0" \
               " --run_index 8 7 6 5 4 3 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --filter_genes" \
               f" --n_svg 3000" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               " --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 1 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
cell_type_key = "celltype_mapped_refined"
species = "mouse"
edge_batch_size_str = "256" # out of memory 
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 7" \
               " --run_index 8" \
               f" --cell_type_key {cell_type_key}" \
               " --filter_genes" \
               f" --n_svg 3000" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               " --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 2 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
cell_type_key = "celltype_mapped_refined"
species = "mouse"
edge_batch_size_str = "256" # out of memory 
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 6" \
               " --run_index 7" \
               f" --cell_type_key {cell_type_key}" \
               " --filter_genes" \
               f" --n_svg 3000" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               " --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 3 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
cell_type_key = "celltype_mapped_refined"
species = "mouse"
edge_batch_size_str = "256" # out of memory 
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 12" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 5" \
               " --run_index 6" \
               f" --cell_type_key {cell_type_key}" \
               " --filter_genes" \
               f" --n_svg 3000" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               " --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 4 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
cell_type_key = "celltype_mapped_refined"
species = "mouse"
edge_batch_size_str = "256" # out of memory 
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 12" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 4" \
               " --run_index 5" \
               f" --cell_type_key {cell_type_key}" \
               " --filter_genes" \
               f" --n_svg 3000" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               " --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 5 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
cell_type_key = "celltype_mapped_refined"
species = "mouse"
edge_batch_size_str = "256 256" # out of memory 
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 8 8" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None" \
               " --seeds 3 2" \
               " --run_index 4 3" \
               f" --cell_type_key {cell_type_key}" \
               " --filter_genes" \
               f" --n_svg 3000" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               " --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 6 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
cell_type_key = "celltype_mapped_refined"
species = "mouse"
edge_batch_size_str = "256 256" # out of memory 
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None" \
               " --seeds 1 0" \
               " --run_index 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --filter_genes" \
               f" --n_svg 3000" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               " --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 4.2.2 Spatial Transcriptomics Data Subsamples

In [None]:
# GCN encoder
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "sample_integration_method_benchmarking"
    dataset = f"seqfish_mouse_organogenesis_imputed_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    cell_type_key = "celltype_mapped_refined"
    species = "mouse"
    edge_batch_size_str = "8192 8192 8192 8192 8192 8192 8192 8192" # out of memory 
    cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --filter_genes" \
                   f" --n_svg 3000" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   " --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# tmp
for subsample_pct in [5]:
    task = "sample_integration_method_benchmarking"
    dataset = f"seqfish_mouse_organogenesis_imputed_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    cell_type_key = "celltype_mapped_refined"
    species = "mouse"
    edge_batch_size_str = "8192 8192 8192 8192 8192 8192 8192 8192" # out of memory 
    cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --filter_genes" \
                   f" --n_svg 3000" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   " --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 1 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"seqfish_mouse_organogenesis_imputed_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    cell_type_key = "celltype_mapped_refined"
    species = "mouse"
    edge_batch_size_str = "256 256" # out of memory 
    cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None" \
                   " --seeds 7 6" \
                   " --run_index 8 7" \
                   f" --cell_type_key {cell_type_key}" \
                   " --filter_genes" \
                   f" --n_svg 3000" \
                   " --nichenet_keep_target_genes_ratio 1." \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   " --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 2 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"seqfish_mouse_organogenesis_imputed_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    cell_type_key = "celltype_mapped_refined"
    species = "mouse"
    edge_batch_size_str = "256 256" # out of memory 
    cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 12 12" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None" \
                   " --seeds 5 4" \
                   " --run_index 6 5" \
                   f" --cell_type_key {cell_type_key}" \
                   " --filter_genes" \
                   f" --n_svg 3000" \
                   " --nichenet_keep_target_genes_ratio 1." \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   " --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 3 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"seqfish_mouse_organogenesis_imputed_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    cell_type_key = "celltype_mapped_refined"
    species = "mouse"
    edge_batch_size_str = "256 256 256 256" # out of memory 
    cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None" \
                   " --seeds 3 2 1 0" \
                   " --run_index 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --filter_genes" \
                   f" --n_svg 3000" \
                   " --nichenet_keep_target_genes_ratio 1." \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   " --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder
for subsample_pct in [25, 10, 5, 1]:
    task = "sample_integration_method_benchmarking"
    dataset = f"seqfish_mouse_organogenesis_imputed_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    cell_type_key = "celltype_mapped_refined"
    species = "mouse"
    edge_batch_size_str = "256 256 256 256 256 256 256 256" # out of memory 
    cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --filter_genes" \
                   f" --n_svg 3000" \
                   " --nichenet_keep_target_genes_ratio 1." \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   " --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

### 4.3 nanoString CosMx SMI Human Non-Small-Cell Lung Cancer (NSCLC)

#### 4.3.1 Spatial Transcriptomics Data

##### 4.3.1.1 GCN W/O FOV Embedding

In [None]:
# GCN encoder (run 1 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "8192"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 7" \
               " --run_index 8" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GCN encoder (run 2 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "8192"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 6" \
               " --run_index 7 " \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GCN encoder (run 3 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "8192"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 12" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 5" \
               " --run_index 6 " \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GCN encoder (run 4 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "8192"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 12" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 4" \
               " --run_index 5 " \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GCN encoder (run 5 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "8192 8192"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 8 8" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None" \
               " --seeds 3 2" \
               " --run_index 4 3" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GCN encoder (run 6 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "8192 8192"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None" \
               " --seeds 1 0" \
               " --run_index 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

##### 4.3.1.2 GCN with FOV Embedding

In [None]:
# GCN encoder (run 1 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "8192"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch fov"
cat_covariates_no_edges = "True False"
cat_covariates_embeds_nums = "3 30"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 7" \
               " --run_index 8" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GCN encoder (run 2 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "8192"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch fov"
cat_covariates_no_edges = "True False"
cat_covariates_embeds_nums = "3 30"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 2
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 6" \
               " --run_index 7" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GCN encoder (run 3 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "8192"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch fov"
cat_covariates_no_edges = "True False"
cat_covariates_embeds_nums = "3 30"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 3
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 12" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 5" \
               " --run_index 6" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GCN encoder (run 4 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "8192"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch fov"
cat_covariates_no_edges = "True False"
cat_covariates_embeds_nums = "3 30"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 4
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 12" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 4" \
               " --run_index 5" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GCN encoder (run 5 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "8192 8192"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch fov"
cat_covariates_no_edges = "True False"
cat_covariates_embeds_nums = "3 30"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 5
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 8 8" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None" \
               " --seeds 3 2" \
               " --run_index 4 3" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GCN encoder (run 6 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
edge_batch_size_str = "8192 8192"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch fov"
cat_covariates_no_edges = "True False"
cat_covariates_embeds_nums = "3 30"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gcnconv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 6
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None" \
               " --seeds 1 0" \
               " --run_index 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

##### 4.3.1.3 GATv2 W/O FOV Embedding

In [None]:
# GATv2 encoder (run 1 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size_str = "512"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 7" \
               " --run_index 8" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 2 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size_str = "512"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 6" \
               " --run_index 7" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 3 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size_str = "512"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 12" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 5" \
               " --run_index 6" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 4 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size_str = "512"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 12" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 4" \
               " --run_index 5" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 5 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size_str = "512 512"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 8 8" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None" \
               " --seeds 3 2" \
               " --run_index 4 3" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 6 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size_str = "512 512"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None" \
               " --seeds 1 0" \
               " --run_index 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

##### 4.3.1.4 GATv2 with FOV Embedding

In [None]:
# GATv2 encoder (run 1 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size_str = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch fov"
cat_covariates_no_edges = "True False"
cat_covariates_embeds_nums = "3 30"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 7" \
               " --run_index 8" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 2 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size_str = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch fov"
cat_covariates_no_edges = "True False"
cat_covariates_embeds_nums = "3 30"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 2
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 16" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 6" \
               " --run_index 7" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 3 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size_str = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch fov"
cat_covariates_no_edges = "True False"
cat_covariates_embeds_nums = "3 30"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 3
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 12" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 5" \
               " --run_index 6" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 4 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size_str = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch fov"
cat_covariates_no_edges = "True False"
cat_covariates_embeds_nums = "3 30"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 4
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 12" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None" \
               " --seeds 4" \
               " --run_index 5" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 5 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size_str = "256 256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch fov"
cat_covariates_no_edges = "True False"
cat_covariates_embeds_nums = "3 30"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 5
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 8 8" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None" \
               " --seeds 3 2" \
               " --run_index 4 3" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
# GATv2 encoder (run 6 due to 2 day limit)
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size_str = "256 256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch fov"
cat_covariates_no_edges = "True False"
cat_covariates_embeds_nums = "3 30"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
conv_layer_encoder = "gatv2conv"

job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
job_id = 6
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 4 4" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None" \
               " --seeds 1 0" \
               " --run_index 2 1" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1.0" \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {conv_layer_encoder}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --n_addon_gp 10" \
               " --active_gp_thresh_ratio 0." \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method one-hop-norm" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               f" --conv_layer_encoder {conv_layer_encoder}" \
               " --n_epochs 400" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 5000000." \
               " --lambda_gene_expr_recon 3000." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               " --lambda_l1_addon 0." \
               f" --n_sampled_neighbors 4" \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 4.3.2 Spatial Transcriptomics Data Subsamples

##### 4.3.2.1 GCN W/O FOV Embedding

In [None]:
# GCN encoder (run 1 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "8192"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 7" \
                   " --run_index 8" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GCN encoder (run 2 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "8192"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 6" \
                   " --run_index 7" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GCN encoder (run 3 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "8192 8192"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 12 12" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None" \
                   " --seeds 5 4" \
                   " --run_index 6 5" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GCN encoder (run 4 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "8192 8192 8192 8192"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None" \
                   " --seeds 3 2 1 0" \
                   " --run_index 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

##### 4.3.2.2 GCN with FOV Embedding

In [None]:
# GCN encoder (run 1 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "8192"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch fov"
    cat_covariates_no_edges = "True False"
    cat_covariates_embeds_nums = "3 30"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 7" \
                   " --run_index 8" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GCN encoder (run 2 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "8192"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch fov"
    cat_covariates_no_edges = "True False"
    cat_covariates_embeds_nums = "3 30"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 2
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 6" \
                   " --run_index 7" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GCN encoder (run 3 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "8192"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch fov"
    cat_covariates_no_edges = "True False"
    cat_covariates_embeds_nums = "3 30"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 3
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 12" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 5" \
                   " --run_index 6" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GCN encoder (run 4 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "8192"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch fov"
    cat_covariates_no_edges = "True False"
    cat_covariates_embeds_nums = "3 30"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 4
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 12" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 4" \
                   " --run_index 5" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GCN encoder (run 5 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "8192 8192 8192 8192"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch fov"
    cat_covariates_no_edges = "True False"
    cat_covariates_embeds_nums = "3 30"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 5
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None" \
                   " --seeds 3 2 1 0" \
                   " --run_index 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GCN encoder
for subsample_pct in [25, 10, 5, 1]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "8192 8192 8192 8192 8192 8192 8192 8192"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gcnconv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

##### 4.3.2.3 GATv2 W/O FOV Embedding

In [None]:
# GATv2 encoder (run 1 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "512"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 7" \
                   " --run_index 8" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 2 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "512"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 6" \
                   " --run_index 7" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 3 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "512"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 12" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 5" \
                   " --run_index 6" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 4 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "512"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 12" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 4" \
                   " --run_index 5" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 5 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "512 512 512 512"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None" \
                   " --seeds 3 2 1 0" \
                   " --run_index 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder
for subsample_pct in [25, 10, 5, 1]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "256 256 256 256 256 256 256 256"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_no_edges = "True"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

##### 4.3.2.1 GATv2 with FOV Embedding

In [None]:
# GATv2 encoder (run 1 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "256"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch fov"
    cat_covariates_no_edges = "True False"
    cat_covariates_embeds_nums = "3 30"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 7" \
                   " --run_index 8" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 2 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "256"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch fov"
    cat_covariates_no_edges = "True False"
    cat_covariates_embeds_nums = "3 30"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 2
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 6" \
                   " --run_index 7" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 3 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "256"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch fov"
    cat_covariates_no_edges = "True False"
    cat_covariates_embeds_nums = "3 30"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 3
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 12" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 5" \
                   " --run_index 6" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 4 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "256"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch fov"
    cat_covariates_no_edges = "True False"
    cat_covariates_embeds_nums = "3 30"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 4
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 12" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None" \
                   " --seeds 4" \
                   " --run_index 5" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder (run 5 due to 2 day limit)
for subsample_pct in [50]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "256 256 256 256"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch fov"
    cat_covariates_no_edges = "True False"
    cat_covariates_embeds_nums = "3 30"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 5
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None" \
                   " --seeds 3 2 1 0" \
                   " --run_index 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

In [None]:
# GATv2 encoder
for subsample_pct in [25, 10, 5, 1]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    edge_batch_size_str = "256 256 256 256 256 256 256 256"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch fov"
    cat_covariates_no_edges = "True False"
    cat_covariates_embeds_nums = "3 30"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.
    conv_layer_encoder = "gatv2conv"

    job_name_prefix = f"{dataset}_nichecompass_{conv_layer_encoder}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 16 16 12 12 8 8 4 4" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None" \
                   " --seeds 7 6 5 4 3 2 1 0" \
                   " --run_index 8 7 6 5 4 3 2 1" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1.0" \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {conv_layer_encoder}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --n_addon_gp 10" \
                   " --active_gp_thresh_ratio 0." \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method one-hop-norm" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   f" --conv_layer_encoder {conv_layer_encoder}" \
                   " --n_epochs 400" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 5000000." \
                   " --lambda_gene_expr_recon 3000." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   " --lambda_l1_addon 0." \
                   f" --n_sampled_neighbors 4" \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

### 4.4 Metrics Computation

#### 4.4.1 NicheCompass

In [None]:
# GCN encoder
job_id = 1
for dataset in ["seqfish_mouse_organogenesis",
                "seqfish_mouse_organogenesis_subsample_50pct",
                "seqfish_mouse_organogenesis_subsample_25pct",
                "seqfish_mouse_organogenesis_subsample_10pct",
                "seqfish_mouse_organogenesis_subsample_5pct",
                "seqfish_mouse_organogenesis_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_nichecompass_gcnconv.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"nichecompass_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key nichecompass_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GATv2 encoder
job_id = 2
for dataset in ["seqfish_mouse_organogenesis",
                "seqfish_mouse_organogenesis_subsample_50pct",
                "seqfish_mouse_organogenesis_subsample_25pct",
                "seqfish_mouse_organogenesis_subsample_10pct",
                "seqfish_mouse_organogenesis_subsample_5pct",
                "seqfish_mouse_organogenesis_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_nichecompass_gatv2conv.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"nichecompass_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key nichecompass_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GCN encoder
job_id = 1
for dataset in ["seqfish_mouse_organogenesis_imputed",
                "seqfish_mouse_organogenesis_imputed_subsample_50pct",
                "seqfish_mouse_organogenesis_imputed_subsample_25pct",
                "seqfish_mouse_organogenesis_imputed_subsample_10pct",
                "seqfish_mouse_organogenesis_imputed_subsample_5pct",
                "seqfish_mouse_organogenesis_imputed_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_nichecompass_gcnconv.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"nichecompass_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key nichecompass_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GATv2 encoder
job_id = 2
for dataset in ["seqfish_mouse_organogenesis_imputed",
                "seqfish_mouse_organogenesis_imputed_subsample_50pct",
                "seqfish_mouse_organogenesis_imputed_subsample_25pct",
                "seqfish_mouse_organogenesis_imputed_subsample_10pct",
                "seqfish_mouse_organogenesis_imputed_subsample_5pct",
                "seqfish_mouse_organogenesis_imputed_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_nichecompass_gatv2conv.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"nichecompass_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key nichecompass_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GCN encoder W/O FoV embedding
job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc",
                "nanostring_cosmx_human_nsclc_subsample_50pct",
                "nanostring_cosmx_human_nsclc_subsample_25pct",
                "nanostring_cosmx_human_nsclc_subsample_10pct",
                "nanostring_cosmx_human_nsclc_subsample_5pct",
                "nanostring_cosmx_human_nsclc_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3"
    file_name = f"{dataset}_nichecompass_gcnconv.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"nichecompass_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key nichecompass_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GCN encoder with FoV embedding
job_id = 2
for dataset in ["nanostring_cosmx_human_nsclc",
                "nanostring_cosmx_human_nsclc_subsample_50pct",
                "nanostring_cosmx_human_nsclc_subsample_25pct",
                "nanostring_cosmx_human_nsclc_subsample_10pct",
                "nanostring_cosmx_human_nsclc_subsample_5pct",
                "nanostring_cosmx_human_nsclc_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3"
    file_name = f"{dataset}_nichecompass_gcnconv_fov.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"nichecompass_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key nichecompass_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GCN encoder with contrastive loss
job_id = 3
for dataset in ["nanostring_cosmx_human_nsclc",
                "nanostring_cosmx_human_nsclc_subsample_50pct",
                "nanostring_cosmx_human_nsclc_subsample_25pct",
                "nanostring_cosmx_human_nsclc_subsample_10pct",
                "nanostring_cosmx_human_nsclc_subsample_5pct",
                "nanostring_cosmx_human_nsclc_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3"
    file_name = f"{dataset}_nichecompass_gcnconv_cont.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"nichecompass_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key nichecompass_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GATv2 encoder W/O FoV embedding
job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc",
                "nanostring_cosmx_human_nsclc_subsample_50pct",
                "nanostring_cosmx_human_nsclc_subsample_25pct",
                "nanostring_cosmx_human_nsclc_subsample_10pct",
                "nanostring_cosmx_human_nsclc_subsample_5pct",
                "nanostring_cosmx_human_nsclc_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3"
    file_name = f"{dataset}_nichecompass_gatv2conv.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"nichecompass_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key nichecompass_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GATv2 encoder with FoV embedding
job_id = 2
for dataset in ["nanostring_cosmx_human_nsclc",
                "nanostring_cosmx_human_nsclc_subsample_50pct",
                "nanostring_cosmx_human_nsclc_subsample_25pct",
                "nanostring_cosmx_human_nsclc_subsample_10pct",
                "nanostring_cosmx_human_nsclc_subsample_5pct",
                "nanostring_cosmx_human_nsclc_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3"
    file_name = f"{dataset}_nichecompass_gatv2conv_fov.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"nichecompass_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key nichecompass_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# GATv2 encoder with contrastive loss
job_id = 3
for dataset in ["nanostring_cosmx_human_nsclc",
                "nanostring_cosmx_human_nsclc_subsample_50pct",
                "nanostring_cosmx_human_nsclc_subsample_25pct",
                "nanostring_cosmx_human_nsclc_subsample_10pct",
                "nanostring_cosmx_human_nsclc_subsample_5pct",
                "nanostring_cosmx_human_nsclc_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batches = "batch1 batch2 batch3"
    file_name = f"{dataset}_nichecompass_gatv2conv_cont.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"nichecompass_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key nichecompass_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

#### 4.4.2 STACI

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis",
                "seqfish_mouse_organogenesis_subsample_50pct",
                "seqfish_mouse_organogenesis_subsample_25pct",
                "seqfish_mouse_organogenesis_subsample_10pct",
                "seqfish_mouse_organogenesis_subsample_5pct",
                "seqfish_mouse_organogenesis_subsample_1pct"
               ]:    
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_staci.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"staci_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key staci_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis_imputed",
                #"seqfish_mouse_organogenesis_imputed_subsample_50pct",
                #"seqfish_mouse_organogenesis_imputed_subsample_25pct",
                #"seqfish_mouse_organogenesis_imputed_subsample_10pct",
                #"seqfish_mouse_organogenesis_imputed_subsample_5pct",
                #"seqfish_mouse_organogenesis_imputed_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_staci.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"staci_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key staci_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis_imputed",
                #"seqfish_mouse_organogenesis_imputed_subsample_50pct",
                #"seqfish_mouse_organogenesis_imputed_subsample_25pct",
                #"seqfish_mouse_organogenesis_imputed_subsample_10pct",
                #"seqfish_mouse_organogenesis_imputed_subsample_5pct",
                #"seqfish_mouse_organogenesis_imputed_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    run_number = "8"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_staci_run{run_number}.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"staci_sample_integration_method_benchmarking_{dataset}_metrics_computation_run{run_number}"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key staci_latent" \
                  f" --metrics {metrics}" \
                  f" --run_number {run_number}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc_subsample_10pct",
                "nanostring_cosmx_human_nsclc_subsample_5pct",
                "nanostring_cosmx_human_nsclc_subsample_1pct"
               ]:     
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3"
    file_name = f"{dataset}_staci.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"staci_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key staci_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

#### 4.4.3 GraphST

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis",
                "seqfish_mouse_organogenesis_subsample_50pct",
                "seqfish_mouse_organogenesis_subsample_25pct",
                "seqfish_mouse_organogenesis_subsample_10pct",
                "seqfish_mouse_organogenesis_subsample_5pct",
                "seqfish_mouse_organogenesis_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_graphst.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"graphst_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key graphst_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# PASTE alignment
job_id = 2
for dataset in ["seqfish_mouse_organogenesis",
                "seqfish_mouse_organogenesis_subsample_50pct",
                "seqfish_mouse_organogenesis_subsample_25pct",
                "seqfish_mouse_organogenesis_subsample_10pct",
                "seqfish_mouse_organogenesis_subsample_5pct",
                "seqfish_mouse_organogenesis_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_graphst_paste.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"graphst_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key graphst_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis_imputed",
                "seqfish_mouse_organogenesis_imputed_subsample_50pct",
                "seqfish_mouse_organogenesis_imputed_subsample_25pct",
                "seqfish_mouse_organogenesis_imputed_subsample_10pct",
                "seqfish_mouse_organogenesis_imputed_subsample_5pct",
                "seqfish_mouse_organogenesis_imputed_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_graphst.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"graphst_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key graphst_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# PASTE alignment
job_id = 2
for dataset in ["seqfish_mouse_organogenesis_imputed",
                "seqfish_mouse_organogenesis_imputed_subsample_50pct",
                "seqfish_mouse_organogenesis_imputed_subsample_25pct",
                "seqfish_mouse_organogenesis_imputed_subsample_10pct",
                "seqfish_mouse_organogenesis_imputed_subsample_5pct",
                "seqfish_mouse_organogenesis_imputed_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_graphst_paste.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"graphst_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key graphst_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc_subsample_10pct",
                "nanostring_cosmx_human_nsclc_subsample_5pct",
                "nanostring_cosmx_human_nsclc_subsample_1pct"]:     
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3"
    file_name = f"{dataset}_graphst.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"graphst_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key graphst_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
# PASTE alignment
job_id = 2
for dataset in ["nanostring_cosmx_human_nsclc_subsample_10pct",
                "nanostring_cosmx_human_nsclc_subsample_5pct",
                "nanostring_cosmx_human_nsclc_subsample_1pct"
               ]:     
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3"
    file_name = f"{dataset}_graphst_paste.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"graphst_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key graphst_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

#### 4.4.4 CellCharter

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis",
                "seqfish_mouse_organogenesis_subsample_50pct",
                "seqfish_mouse_organogenesis_subsample_25pct",
                "seqfish_mouse_organogenesis_subsample_10pct",
                "seqfish_mouse_organogenesis_subsample_5pct",
                "seqfish_mouse_organogenesis_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_cellcharter.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"cellcharter_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key cellcharter_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis_imputed",
                "seqfish_mouse_organogenesis_imputed_subsample_50pct",
                "seqfish_mouse_organogenesis_imputed_subsample_25pct",
                "seqfish_mouse_organogenesis_imputed_subsample_10pct",
                "seqfish_mouse_organogenesis_imputed_subsample_5pct",
                "seqfish_mouse_organogenesis_imputed_subsample_1pct"]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_cellcharter.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"cellcharter_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key cellcharter_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc",
                "nanostring_cosmx_human_nsclc_subsample_50pct",
                "nanostring_cosmx_human_nsclc_subsample_25pct",
                "nanostring_cosmx_human_nsclc_subsample_10pct",
                "nanostring_cosmx_human_nsclc_subsample_5pct",
                "nanostring_cosmx_human_nsclc_subsample_1pct"
               ]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3"
    file_name = f"{dataset}_cellcharter.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"cellcharter_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key cellcharter_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

#### 4.4.5 BANKSY

In [None]:
method = "banksy"
task = "sample_integration_method_benchmarking"
cell_type_key = "cell_type"
batch_key = "batch"
batches = "batch1 batch2 batch3 batch4 batch5 batch6"

job_id = 1
for dataset in ["seqfish_mouse_organogenesis",
                "seqfish_mouse_organogenesis_subsample_50pct",
                "seqfish_mouse_organogenesis_subsample_25pct",
                "seqfish_mouse_organogenesis_subsample_10pct",
                "seqfish_mouse_organogenesis_subsample_5pct",
                "seqfish_mouse_organogenesis_subsample_1pct"
               ]:   
    file_name = f"{dataset}_{method}.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"{method}_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "banksy"

job_id = 1
for dataset in ["seqfish_mouse_organogenesis_imputed",
                "seqfish_mouse_organogenesis_imputed_subsample_50pct",
                "seqfish_mouse_organogenesis_imputed_subsample_25pct",
                "seqfish_mouse_organogenesis_imputed_subsample_10pct",
                "seqfish_mouse_organogenesis_imputed_subsample_5pct",
                "seqfish_mouse_organogenesis_imputed_subsample_1pct"
               ]:  
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_{method}.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"{method}_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
method = "banksy"
task = "sample_integration_method_benchmarking"
cell_type_key = "cell_type"
batch_key = "batch"
batches = "batch1 batch2 batch3"

job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc",
                "nanostring_cosmx_human_nsclc_subsample_50pct",
                "nanostring_cosmx_human_nsclc_subsample_25pct",
                "nanostring_cosmx_human_nsclc_subsample_10pct",
                "nanostring_cosmx_human_nsclc_subsample_5pct",
               ]:
    file_name = f"{dataset}_{method}.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"{method}_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./benchmarking/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

#### 4.4.6 scVI (Not Included)

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis",
                "seqfish_mouse_organogenesis_subsample_50pct",
                "seqfish_mouse_organogenesis_subsample_25pct",
                "seqfish_mouse_organogenesis_subsample_10pct",
                "seqfish_mouse_organogenesis_subsample_5pct",
                "seqfish_mouse_organogenesis_subsample_1pct"]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_scvi.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"scvi_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key scvi_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["seqfish_mouse_organogenesis_imputed",
                "seqfish_mouse_organogenesis_imputed_subsample_50pct",
                "seqfish_mouse_organogenesis_imputed_subsample_25pct",
                "seqfish_mouse_organogenesis_imputed_subsample_10pct",
                "seqfish_mouse_organogenesis_imputed_subsample_5pct",
                "seqfish_mouse_organogenesis_imputed_subsample_1pct"]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    file_name = f"{dataset}_scvi.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"scvi_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key scvi_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
job_id = 1
for dataset in ["nanostring_cosmx_human_nsclc",
                "nanostring_cosmx_human_nsclc_subsample_50pct",
                "nanostring_cosmx_human_nsclc_subsample_25pct",
                "nanostring_cosmx_human_nsclc_subsample_10pct",
                "nanostring_cosmx_human_nsclc_subsample_5pct",
                "nanostring_cosmx_human_nsclc_subsample_1pct"]:   
    task = "sample_integration_method_benchmarking"
    cell_type_key = "cell_type"
    batch_key = "batch"
    batches = "batch1 batch2 batch3"
    file_name = f"{dataset}_scvi.h5ad"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw blisi kbet pcr"

    job_name_prefix = f"scvi_sample_integration_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --batches {batches}" \
                  f" --latent_key scvi_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

## 5. NicheCompass Reference Model

### 5.1 seqFISH Mouse Organogenesis Imputed

In [None]:
job_id = 1
for lambda_edge_recon, lambda_gene_expr_recon in zip([5000000], [3000]):
    for active_gp_thresh_ratio in [0.]:
        for lambda_l1_masked, lambda_l1_addon in zip([0], [0]):
            for n_addon_gp in [10]:
                for n_svg in [3000]:
                    for conv_layer_encoder in ["gcnconv"]:
                        for n_neighbors in [4]:
                            task = "reference"
                            dataset = "seqfish_mouse_organogenesis_imputed"
                            reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
                            n_sampled_neighbors = 4
                            n_hvg = 0
                            species = "mouse"
                            node_label_method = "one-hop-norm"
                            edge_batch_size = "256" # "512"
                            cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
                            cat_covariates_keys = "batch"
                            cat_covariates_no_edges = "True"
                            cat_covariates_embeds_nums = "3"
                            lambda_cat_covariates_contrastive = 0.
                            contrastive_logits_pos_ratio = 0.
                            contrastive_logits_neg_ratio = 0.

                            job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
                            job_folder_path = f"./reference/slurm_jobs"
                            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
                            script_name = "train_nichecompass_reference_model.py"
                            script_args = f" --dataset {dataset}" \
                                          f" --reference_batches {reference_batches}" \
                                          f" --n_neighbors {n_neighbors}" \
                                          " --filter_genes" \
                                          f" --n_hvg {n_hvg}" \
                                          f" --n_svg {n_svg}" \
                                          " --nichenet_keep_target_genes_ratio 1.0" \
                                          " --nichenet_max_n_target_genes_per_gp 250" \
                                          " --include_mebocost_gps" \
                                          f" --species {species}" \
                                          " --gp_filter_mode subset" \
                                          " --combine_overlap_gps" \
                                          " --overlap_thresh_source_genes 0.9" \
                                          " --overlap_thresh_target_genes 0.9" \
                                          " --overlap_thresh_genes 0.9" \
                                          " --counts_key counts" \
                                          " --condition_key batch" \
                                          f" --cat_covariates_keys {cat_covariates_keys}" \
                                          f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                                          " --spatial_key spatial" \
                                          " --adj_key spatial_connectivities" \
                                          " --mapping_entity_key mapping_entity" \
                                          " --gp_targets_mask_key nichecompass_gp_targets" \
                                          " --gp_sources_mask_key nichecompass_gp_sources" \
                                          " --gp_names_key nichecompass_gp_names" \
                                          f" --model_label {task}" \
                                          " --active_gp_names_key nichecompass_active_gp_names" \
                                          " --latent_key nichecompass_latent" \
                                          f" --n_addon_gp {n_addon_gp}" \
                                          f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                                          " --gene_expr_recon_dist nb" \
                                          f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                                          f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                                          " --log_variational" \
                                          f" --node_label_method {node_label_method}" \
                                          " --n_layers_encoder 1" \
                                          " --n_hidden_encoder None" \
                                          f" --conv_layer_encoder {conv_layer_encoder}" \
                                          " --n_epochs 100" \
                                          " --n_epochs_all_gps 25" \
                                          " --n_epochs_no_cat_covariates_contrastive 0" \
                                          " --lr 0.001" \
                                          f" --lambda_edge_recon {lambda_edge_recon}" \
                                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                                          f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                                          f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                                          f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                                          " --lambda_group_lasso 0." \
                                          f" --lambda_l1_masked {lambda_l1_masked}" \
                                          f" --lambda_l1_addon {lambda_l1_addon}" \
                                          f" --edge_batch_size {edge_batch_size}" \
                                          " --node_batch_size None" \
                                          f" --n_sampled_neighbors {n_sampled_neighbors}" \
                                          f" --timestamp_suffix _{job_id}"

                            submit_python_script(
                                    job_name_prefix=job_name_prefix,
                                    job_id=job_id,
                                    job_folder_path=job_folder_path,
                                    conda_env_name=conda_env_name,
                                    script_folder_path=script_folder_path,
                                    script_name=script_name,
                                    script_args=script_args,
                                    nice=10000)

                            job_id += 1

In [None]:
job_id = 2
for lambda_edge_recon, lambda_gene_expr_recon in zip([5000000], [3000]):
    for active_gp_thresh_ratio in [0.01]:
        for lambda_l1_masked, lambda_l1_addon in zip([0], [0]):
            for n_addon_gp in [100]:
                for n_svg in [5000]:
                    for conv_layer_encoder in ["gatv2conv"]:
                        for n_neighbors in [8]:
                            task = "reference"
                            dataset = "seqfish_mouse_organogenesis_imputed"
                            reference_batches = "batch1 batch2 batch5 batch6"
                            n_sampled_neighbors = 4
                            n_hvg = 0
                            species = "mouse"
                            node_label_method = "one-hop-norm"
                            edge_batch_size = "256" # "512"
                            cat_covariates_embeds_injection = "gene_expr_decoder"
                            cat_covariates_keys = "batch"
                            cat_covariates_no_edges = "True"
                            cat_covariates_embeds_nums = "3"
                            lambda_cat_covariates_contrastive = 0.
                            contrastive_logits_pos_ratio = 0.
                            contrastive_logits_neg_ratio = 0.

                            job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
                            job_folder_path = f"./data_analysis/{task}/slurm_jobs"
                            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
                            script_name = "train_nichecompass_reference_model.py"
                            script_args = f" --dataset {dataset}" \
                                          f" --reference_batches {reference_batches}" \
                                          f" --n_neighbors {n_neighbors}" \
                                          " --filter_genes" \
                                          f" --n_hvg {n_hvg}" \
                                          f" --n_svg {n_svg}" \
                                          " --nichenet_keep_target_genes_ratio 1.0" \
                                          " --nichenet_max_n_target_genes_per_gp 250" \
                                          " --include_mebocost_gps" \
                                          f" --species {species}" \
                                          " --gp_filter_mode subset" \
                                          " --combine_overlap_gps" \
                                          " --overlap_thresh_source_genes 0.9" \
                                          " --overlap_thresh_target_genes 0.9" \
                                          " --overlap_thresh_genes 0.9" \
                                          " --counts_key counts" \
                                          " --condition_key batch" \
                                          f" --cat_covariates_keys {cat_covariates_keys}" \
                                          f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                                          " --spatial_key spatial" \
                                          " --adj_key spatial_connectivities" \
                                          " --mapping_entity_key mapping_entity" \
                                          " --gp_targets_mask_key nichecompass_gp_targets" \
                                          " --gp_sources_mask_key nichecompass_gp_sources" \
                                          " --gp_names_key nichecompass_gp_names" \
                                          f" --model_label {task}" \
                                          " --active_gp_names_key nichecompass_active_gp_names" \
                                          " --latent_key nichecompass_latent" \
                                          f" --n_addon_gp {n_addon_gp}" \
                                          f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                                          " --gene_expr_recon_dist nb" \
                                          f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                                          f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                                          " --log_variational" \
                                          f" --node_label_method {node_label_method}" \
                                          " --n_layers_encoder 1" \
                                          " --n_hidden_encoder None" \
                                          f" --conv_layer_encoder {conv_layer_encoder}" \
                                          " --n_epochs 100" \
                                          " --n_epochs_all_gps 25" \
                                          " --n_epochs_no_cat_covariates_contrastive 0" \
                                          " --lr 0.001" \
                                          f" --lambda_edge_recon {lambda_edge_recon}" \
                                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                                          f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                                          f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                                          f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                                          " --lambda_group_lasso 0." \
                                          f" --lambda_l1_masked {lambda_l1_masked}" \
                                          f" --lambda_l1_addon {lambda_l1_addon}" \
                                          f" --edge_batch_size {edge_batch_size}" \
                                          " --node_batch_size None" \
                                          f" --n_sampled_neighbors {n_sampled_neighbors}" \
                                          f" --timestamp_suffix _{job_id}"

                            submit_python_script(
                                    job_name_prefix=job_name_prefix,
                                    job_id=job_id,
                                    job_folder_path=job_folder_path,
                                    conda_env_name=conda_env_name,
                                    script_folder_path=script_folder_path,
                                    script_name=script_name,
                                    script_args=script_args,
                                    nice=10000)

                            job_id += 1

In [None]:
job_id = 3
for lambda_edge_recon, lambda_gene_expr_recon in zip([5000000], [3000]):
    for active_gp_thresh_ratio in [0.01]:
        for lambda_l1_masked, lambda_l1_addon in zip([0], [0.]):
            for n_addon_gp in [100]:
                for n_svg in [5000]:
                    for conv_layer_encoder in ["gatv2conv"]:
                        for n_neighbors in [8]:
                            task = "reference"
                            dataset = "seqfish_mouse_organogenesis_imputed"
                            reference_batches = "batch1 batch2 batch3 batch4"
                            n_sampled_neighbors = 4
                            n_hvg = 0
                            species = "mouse"
                            node_label_method = "one-hop-norm"
                            edge_batch_size = "256" # "512"
                            cat_covariates_embeds_injection = "gene_expr_decoder"
                            cat_covariates_keys = "batch"
                            cat_covariates_no_edges = "True"
                            cat_covariates_embeds_nums = "3"
                            lambda_cat_covariates_contrastive = 0.
                            contrastive_logits_pos_ratio = 0.
                            contrastive_logits_neg_ratio = 0.

                            job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
                            job_folder_path = f"./data_analysis/{task}/slurm_jobs"
                            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
                            script_name = "train_nichecompass_reference_model.py"
                            script_args = f" --dataset {dataset}" \
                                          f" --reference_batches {reference_batches}" \
                                          f" --n_neighbors {n_neighbors}" \
                                          " --filter_genes" \
                                          f" --n_hvg {n_hvg}" \
                                          f" --n_svg {n_svg}" \
                                          " --nichenet_keep_target_genes_ratio 1.0" \
                                          " --nichenet_max_n_target_genes_per_gp 250" \
                                          " --include_mebocost_gps" \
                                          f" --species {species}" \
                                          " --gp_filter_mode subset" \
                                          " --combine_overlap_gps" \
                                          " --overlap_thresh_source_genes 0.9" \
                                          " --overlap_thresh_target_genes 0.9" \
                                          " --overlap_thresh_genes 0.9" \
                                          " --counts_key counts" \
                                          " --condition_key batch" \
                                          f" --cat_covariates_keys {cat_covariates_keys}" \
                                          f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                                          " --spatial_key spatial" \
                                          " --adj_key spatial_connectivities" \
                                          " --mapping_entity_key mapping_entity" \
                                          " --gp_targets_mask_key nichecompass_gp_targets" \
                                          " --gp_sources_mask_key nichecompass_gp_sources" \
                                          " --gp_names_key nichecompass_gp_names" \
                                          f" --model_label {task}" \
                                          " --active_gp_names_key nichecompass_active_gp_names" \
                                          " --latent_key nichecompass_latent" \
                                          f" --n_addon_gp {n_addon_gp}" \
                                          f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                                          " --gene_expr_recon_dist nb" \
                                          f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                                          f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                                          " --log_variational" \
                                          f" --node_label_method {node_label_method}" \
                                          " --n_layers_encoder 1" \
                                          " --n_hidden_encoder None" \
                                          f" --conv_layer_encoder {conv_layer_encoder}" \
                                          " --n_epochs 100" \
                                          " --n_epochs_all_gps 25" \
                                          " --n_epochs_no_cat_covariates_contrastive 0" \
                                          " --lr 0.001" \
                                          f" --lambda_edge_recon {lambda_edge_recon}" \
                                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                                          f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                                          f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                                          f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                                          " --lambda_group_lasso 0." \
                                          f" --lambda_l1_masked {lambda_l1_masked}" \
                                          f" --lambda_l1_addon {lambda_l1_addon}" \
                                          f" --edge_batch_size {edge_batch_size}" \
                                          " --node_batch_size None" \
                                          f" --n_sampled_neighbors {n_sampled_neighbors}" \
                                          f" --timestamp_suffix _{job_id}"

                            submit_python_script(
                                    job_name_prefix=job_name_prefix,
                                    job_id=job_id,
                                    job_folder_path=job_folder_path,
                                    conda_env_name=conda_env_name,
                                    script_folder_path=script_folder_path,
                                    script_name=script_name,
                                    script_args=script_args,
                                    nice=10000)

                            job_id += 1

In [None]:
job_id = 4
for lambda_edge_recon, lambda_gene_expr_recon in zip([5000000], [3000]):
    for active_gp_thresh_ratio in [0.01]:
        for lambda_l1_masked, lambda_l1_addon in zip([0], [0.]):
            for n_addon_gp in [100]:
                for n_svg in [5000]:
                    for conv_layer_encoder in ["gatv2conv"]:
                        for n_neighbors in [4]:
                            task = "reference"
                            dataset = "seqfish_mouse_organogenesis_imputed"
                            reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
                            n_sampled_neighbors = 4
                            n_hvg = 0
                            species = "mouse"
                            node_label_method = "one-hop-norm"
                            edge_batch_size = "256" # "512"
                            cat_covariates_embeds_injection = "gene_expr_decoder"
                            cat_covariates_keys = "batch"
                            cat_covariates_no_edges = "True"
                            cat_covariates_embeds_nums = "3"
                            lambda_cat_covariates_contrastive = 0.
                            contrastive_logits_pos_ratio = 0.
                            contrastive_logits_neg_ratio = 0.

                            job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
                            job_folder_path = f"./data_analysis/{task}/slurm_jobs"
                            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
                            script_name = "train_nichecompass_reference_model.py"
                            script_args = f" --dataset {dataset}" \
                                          f" --reference_batches {reference_batches}" \
                                          f" --n_neighbors {n_neighbors}" \
                                          " --filter_genes" \
                                          f" --n_hvg {n_hvg}" \
                                          f" --n_svg {n_svg}" \
                                          " --nichenet_keep_target_genes_ratio 1.0" \
                                          " --nichenet_max_n_target_genes_per_gp 250" \
                                          " --include_mebocost_gps" \
                                          f" --species {species}" \
                                          " --gp_filter_mode subset" \
                                          " --combine_overlap_gps" \
                                          " --overlap_thresh_source_genes 0.9" \
                                          " --overlap_thresh_target_genes 0.9" \
                                          " --overlap_thresh_genes 0.9" \
                                          " --counts_key counts" \
                                          " --condition_key batch" \
                                          f" --cat_covariates_keys {cat_covariates_keys}" \
                                          f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                                          " --spatial_key spatial" \
                                          " --adj_key spatial_connectivities" \
                                          " --mapping_entity_key mapping_entity" \
                                          " --gp_targets_mask_key nichecompass_gp_targets" \
                                          " --gp_sources_mask_key nichecompass_gp_sources" \
                                          " --gp_names_key nichecompass_gp_names" \
                                          f" --model_label {task}" \
                                          " --active_gp_names_key nichecompass_active_gp_names" \
                                          " --latent_key nichecompass_latent" \
                                          f" --n_addon_gp {n_addon_gp}" \
                                          f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                                          " --gene_expr_recon_dist nb" \
                                          f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                                          f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                                          " --log_variational" \
                                          f" --node_label_method {node_label_method}" \
                                          " --n_layers_encoder 1" \
                                          " --n_hidden_encoder None" \
                                          f" --conv_layer_encoder {conv_layer_encoder}" \
                                          " --n_epochs 100" \
                                          " --n_epochs_all_gps 25" \
                                          " --n_epochs_no_cat_covariates_contrastive 0" \
                                          " --lr 0.001" \
                                          f" --lambda_edge_recon {lambda_edge_recon}" \
                                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                                          f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                                          f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                                          f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                                          " --lambda_group_lasso 0." \
                                          f" --lambda_l1_masked {lambda_l1_masked}" \
                                          f" --lambda_l1_addon {lambda_l1_addon}" \
                                          f" --edge_batch_size {edge_batch_size}" \
                                          " --node_batch_size None" \
                                          f" --n_sampled_neighbors {n_sampled_neighbors}" \
                                          f" --timestamp_suffix _{job_id}"

                            submit_python_script(
                                    job_name_prefix=job_name_prefix,
                                    job_id=job_id,
                                    job_folder_path=job_folder_path,
                                    conda_env_name=conda_env_name,
                                    script_folder_path=script_folder_path,
                                    script_name=script_name,
                                    script_args=script_args,
                                    nice=10000)

                            job_id += 1

In [None]:
job_id = 5
for lambda_edge_recon, lambda_gene_expr_recon in zip([5000000], [3000]):
    for active_gp_thresh_ratio in [0.01]:
        for lambda_l1_masked, lambda_l1_addon in zip([0], [0.]):
            for n_addon_gp in [100]:
                for n_svg in [5000]:
                    for conv_layer_encoder in ["gatv2conv"]:
                        for n_neighbors in [12]:
                            task = "reference"
                            dataset = "seqfish_mouse_organogenesis_imputed"
                            reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
                            n_sampled_neighbors = 4
                            n_hvg = 0
                            species = "mouse"
                            node_label_method = "one-hop-norm"
                            edge_batch_size = "256" # "512"
                            cat_covariates_embeds_injection = "gene_expr_decoder"
                            cat_covariates_keys = "batch"
                            cat_covariates_no_edges = "True"
                            cat_covariates_embeds_nums = "3"
                            lambda_cat_covariates_contrastive = 0.
                            contrastive_logits_pos_ratio = 0.
                            contrastive_logits_neg_ratio = 0.

                            job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
                            job_folder_path = f"./data_analysis/{task}/slurm_jobs"
                            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
                            script_name = "train_nichecompass_reference_model.py"
                            script_args = f" --dataset {dataset}" \
                                          f" --reference_batches {reference_batches}" \
                                          f" --n_neighbors {n_neighbors}" \
                                          " --filter_genes" \
                                          f" --n_hvg {n_hvg}" \
                                          f" --n_svg {n_svg}" \
                                          " --nichenet_keep_target_genes_ratio 1.0" \
                                          " --nichenet_max_n_target_genes_per_gp 250" \
                                          " --include_mebocost_gps" \
                                          f" --species {species}" \
                                          " --gp_filter_mode subset" \
                                          " --combine_overlap_gps" \
                                          " --overlap_thresh_source_genes 0.9" \
                                          " --overlap_thresh_target_genes 0.9" \
                                          " --overlap_thresh_genes 0.9" \
                                          " --counts_key counts" \
                                          " --condition_key batch" \
                                          f" --cat_covariates_keys {cat_covariates_keys}" \
                                          f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                                          " --spatial_key spatial" \
                                          " --adj_key spatial_connectivities" \
                                          " --mapping_entity_key mapping_entity" \
                                          " --gp_targets_mask_key nichecompass_gp_targets" \
                                          " --gp_sources_mask_key nichecompass_gp_sources" \
                                          " --gp_names_key nichecompass_gp_names" \
                                          f" --model_label {task}" \
                                          " --active_gp_names_key nichecompass_active_gp_names" \
                                          " --latent_key nichecompass_latent" \
                                          f" --n_addon_gp {n_addon_gp}" \
                                          f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                                          " --gene_expr_recon_dist nb" \
                                          f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                                          f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                                          " --log_variational" \
                                          f" --node_label_method {node_label_method}" \
                                          " --n_layers_encoder 1" \
                                          " --n_hidden_encoder None" \
                                          f" --conv_layer_encoder {conv_layer_encoder}" \
                                          " --n_epochs 100" \
                                          " --n_epochs_all_gps 25" \
                                          " --n_epochs_no_cat_covariates_contrastive 0" \
                                          " --lr 0.001" \
                                          f" --lambda_edge_recon {lambda_edge_recon}" \
                                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                                          f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                                          f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                                          f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                                          " --lambda_group_lasso 0." \
                                          f" --lambda_l1_masked {lambda_l1_masked}" \
                                          f" --lambda_l1_addon {lambda_l1_addon}" \
                                          f" --edge_batch_size {edge_batch_size}" \
                                          " --node_batch_size None" \
                                          f" --n_sampled_neighbors {n_sampled_neighbors}" \
                                          f" --timestamp_suffix _{job_id}"

                            submit_python_script(
                                    job_name_prefix=job_name_prefix,
                                    job_id=job_id,
                                    job_folder_path=job_folder_path,
                                    conda_env_name=conda_env_name,
                                    script_folder_path=script_folder_path,
                                    script_name=script_name,
                                    script_args=script_args,
                                    nice=10000)

                            job_id += 1

In [None]:
job_id = 6 # different random seed #1
lambda_edge_recon = 5000000
lambda_gene_expr_recon = 3000
active_gp_thresh_ratio = 0.01
lambda_l1_masked = 0.
lambda_l1_addon = 0.
n_addon_gp = 100
n_svg = 5000
conv_layer_encoder = "gatv2conv"
n_neighbors = 8
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
n_sampled_neighbors = 4
n_hvg = 0
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
seed = 42

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --seed {seed}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

In [None]:
job_id = 6 # different random seed #2
lambda_edge_recon = 5000000
lambda_gene_expr_recon = 3000
active_gp_thresh_ratio = 0.01
lambda_l1_masked = 0.
lambda_l1_addon = 0.
n_addon_gp = 100
n_svg = 5000
conv_layer_encoder = "gatv2conv"
n_neighbors = 8
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
n_sampled_neighbors = 4
n_hvg = 0
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
seed = 43 # 43

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --seed {seed}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

In [None]:
job_id = 7
lambda_edge_recon = 5000000
lambda_gene_expr_recon = 3000
active_gp_thresh_ratio = 0.01
lambda_l1_masked = 0.
lambda_l1_addon = 0.
n_addon_gp = 100
n_svg = 5000
conv_layer_encoder = "gatv2conv"
n_neighbors = 8
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4"
n_sampled_neighbors = 4
n_hvg = 0
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
seed = 0

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --seed {seed}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

In [None]:
job_id = 8
lambda_edge_recon = 5000000
lambda_gene_expr_recon = 3000
active_gp_thresh_ratio = 0.01
lambda_l1_masked = 0.
lambda_l1_addon = 0.
n_addon_gp = 100
n_svg = 5000
conv_layer_encoder = "gatv2conv"
n_neighbors = 8
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch3 batch4"
n_sampled_neighbors = 4
n_hvg = 0
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
seed = 0

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --seed {seed}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

In [None]:
job_id = 8 # Different prior GP sets (Omnipath)
lambda_edge_recon = 5000000
lambda_gene_expr_recon = 3000
active_gp_thresh_ratio = 0.01
lambda_l1_masked = 0.
lambda_l1_addon = 0.
n_addon_gp = 0
n_svg = 5000
conv_layer_encoder = "gatv2conv"
n_neighbors = 8
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
n_sampled_neighbors = 4
n_hvg = 0
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
seed = 0

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_omnipath_gps" \
              " --no-include_nichenet_gps" \
              " --no-include_mebocost_gps" \
              " --no-include_collectri_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --seed {seed}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

In [None]:
job_id = 9 # Different prior GP sets (NicheNet)
lambda_edge_recon = 5000000
lambda_gene_expr_recon = 3000
active_gp_thresh_ratio = 0.01
lambda_l1_masked = 0.
lambda_l1_addon = 0.
n_addon_gp = 0
n_svg = 5000
conv_layer_encoder = "gatv2conv"
n_neighbors = 8
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
n_sampled_neighbors = 4
n_hvg = 0
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
seed = 0

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --no-include_omnipath_gps" \
              " --include_nichenet_gps" \
              " --no-include_mebocost_gps" \
              " --no-include_collectri_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --seed {seed}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

In [None]:
job_id = 10 # Different prior GP sets (MEBOCOST)
lambda_edge_recon = 5000000
lambda_gene_expr_recon = 3000
active_gp_thresh_ratio = 0.01
lambda_l1_masked = 0.
lambda_l1_addon = 0.
n_addon_gp = 0
n_svg = 5000
conv_layer_encoder = "gatv2conv"
n_neighbors = 8
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
n_sampled_neighbors = 4
n_hvg = 0
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
seed = 0

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --no-include_omnipath_gps" \
              " --no-include_nichenet_gps" \
              " --include_mebocost_gps" \
              " --no-include_collectri_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --seed {seed}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

In [None]:
job_id = 11 # Different prior GP sets (CollecTRI)
lambda_edge_recon = 5000000
lambda_gene_expr_recon = 3000
active_gp_thresh_ratio = 0.01
lambda_l1_masked = 0.
lambda_l1_addon = 0.
n_addon_gp = 0
n_svg = 5000
conv_layer_encoder = "gatv2conv"
n_neighbors = 8
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
n_sampled_neighbors = 4
n_hvg = 0
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
seed = 0

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --no-include_omnipath_gps" \
              " --no-include_nichenet_gps" \
              " --no-include_mebocost_gps" \
              " --include_collectri_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --seed {seed}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

In [None]:
job_id = 12 # Subsample Leave out Embryo 3
lambda_edge_recon = 5000000
lambda_gene_expr_recon = 3000
active_gp_thresh_ratio = 0.01
lambda_l1_masked = 0.
lambda_l1_addon = 0.
n_addon_gp = 0
n_svg = 5000
conv_layer_encoder = "gatv2conv"
n_neighbors = 8
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed_subsample_50pct"
reference_batches = "batch1 batch2 batch3 batch4"
n_sampled_neighbors = 4
n_hvg = 0
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
seed = 0

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --seed {seed}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

In [None]:
job_id = 13 # Subsample Leave Out Embryo 3
lambda_edge_recon = 5000000
lambda_gene_expr_recon = 3000
active_gp_thresh_ratio = 0.01
lambda_l1_masked = 0.
lambda_l1_addon = 0.
n_addon_gp = 0
n_svg = 5000
conv_layer_encoder = "gatv2conv"
n_neighbors = 8
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed_subsample_25pct"
reference_batches = "batch1 batch2 batch3 batch4"
n_sampled_neighbors = 4
n_hvg = 0
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
seed = 0

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --seed {seed}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

In [None]:
job_id = 14 # Subsample Leave Out Embryo 3
lambda_edge_recon = 5000000
lambda_gene_expr_recon = 3000
active_gp_thresh_ratio = 0.01
lambda_l1_masked = 0.
lambda_l1_addon = 0.
n_addon_gp = 0
n_svg = 5000
conv_layer_encoder = "gatv2conv"
n_neighbors = 8
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed_subsample_10pct"
reference_batches = "batch1 batch2 batch3 batch4"
n_sampled_neighbors = 4
n_hvg = 0
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
seed = 0

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --seed {seed}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

In [None]:
job_id = 15 # Subsample Leave Out Embryo 3
lambda_edge_recon = 5000000
lambda_gene_expr_recon = 3000
active_gp_thresh_ratio = 0.01
lambda_l1_masked = 0.
lambda_l1_addon = 0.
n_addon_gp = 0
n_svg = 5000
conv_layer_encoder = "gatv2conv"
n_neighbors = 8
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed_subsample_5pct"
reference_batches = "batch1 batch2 batch3 batch4"
n_sampled_neighbors = 4
n_hvg = 0
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
seed = 0

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --seed {seed}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

In [None]:
job_id = 16 # Subsample Leave Out Embryo 3
lambda_edge_recon = 5000000
lambda_gene_expr_recon = 3000
active_gp_thresh_ratio = 0.01
lambda_l1_masked = 0.
lambda_l1_addon = 0.
n_addon_gp = 0
n_svg = 5000
conv_layer_encoder = "gatv2conv"
n_neighbors = 8
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed_subsample_1pct"
reference_batches = "batch1 batch2 batch3 batch4"
n_sampled_neighbors = 4
n_hvg = 0
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
seed = 0

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --seed {seed}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

In [None]:
job_id = 17 # radius-based knn graph
lambda_edge_recon = 5000000
lambda_gene_expr_recon = 3000
active_gp_thresh_ratio = 0.01
lambda_l1_masked = 0.
lambda_l1_addon = 0.
n_addon_gp = 100
n_svg = 5000
conv_layer_encoder = "gatv2conv"
graph_type = "radius"
radius = 0.08
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
n_sampled_neighbors = 4
n_hvg = 0
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "256"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
seed = 0

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --graph_type {graph_type}" \
              f" --radius {radius}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --seed {seed}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

### 5.2 STARmap PLUS Mouse Central Nervous System

In [None]:
adata = sc.read_h5ad("../datasets/st_data/gold/starmap_plus_mouse_alzheimers.h5ad")
adata2 = sc.read_h5ad("../datasets/st_data/gold/starmap_plus_mouse_cns.h5ad")

In [None]:
genes = set([gene.upper() for gene in adata.var.index.tolist()])
genes2 = set(adata2.var.index.tolist())
genes_intersect = genes.intersection(genes2)

In [None]:
for idx in range(1,21):
    adata_test = sc.read_h5ad(f"../datasets/st_data/gold/starmap_plus_mouse_cns_batch{idx}.h5ad")
    adata_test.write(f"../datasets/st_data/gold/starmap_plus_mouse_batch{idx}.h5ad")

In [None]:
for idx in range(1,9):
    adata_test = sc.read_h5ad(f"../datasets/st_data/gold/starmap_plus_mouse_alzheimers_batch{idx}.h5ad")
    adata_test.write(f"../datasets/st_data/gold/starmap_plus_mouse_batch{20+idx}.h5ad")

In [None]:
adata_test = sc.read_h5ad(f"../datasets/st_data/gold/starmap_plus_mouse_batch25.h5ad")
adata_test = adata_test[:, [gene for gene in adata_test.var.index.tolist() if gene.upper() in genes_intersect]]
selected_genes = adata_test.var.index

In [None]:
for idx in range(1,29):
    adata_test = sc.read_h5ad(f"../datasets/st_data/gold/starmap_plus_mouse_batch{idx}.h5ad")
    adata_test = adata_test[:, [gene for gene in adata_test.var.index.tolist() if gene.upper() in genes_intersect]]
    adata_test.obs["batch"] = f"batch{idx}"
    adata_test.var.index = selected_genes
    adata_test.write(f"../datasets/st_data/gold/starmap_plus_mouse_batch{idx}.h5ad")

In [None]:
import scanpy as sc
# batch, dataset, experimental_batch (?)
adata = sc.read_h5ad("../datasets/st_data/gold/starmap_plus_mouse_alzheimers.h5ad")
adata.obs.rename(columns={'batch': 'experimental_batch'}, inplace=True)
adata.obs["dataset"] = "alzheimers"
adata.layers["counts"] = adata.X.copy()
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
for idx, donor in enumerate(adata.obs["donor_id"].unique().tolist()):
    adata_donor = adata[adata.obs["donor_id"] == donor]
    adata_donor.obs["batch"] = f"batch{idx+1}"
    adata_donor.write(f"../datasets/st_data/gold/starmap_plus_mouse_alzheimers_batch{idx+1}.h5ad")

In [None]:
0.0625 / 4

In [None]:
task = "reference"
dataset = "starmap_plus_mouse"
job_id = 26
#reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8" \
#                    " batch9 batch10 batch11 batch12 batch13 batch14 batch15 batch16 batch17 batch18" \
#                    " batch19 batch20 batch21 batch22 batch23 batch24 batch25 batch26 batch27 batch28"
reference_batches = "batch10 batch15 batch21 batch22 batch23"
n_neighbors = 8
n_sampled_neighbors = 4
species = "mouse"
edge_batch_size = 512
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "dataset"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "50"
lambda_cat_covariates_contrastive = 300.
contrastive_logits_pos_ratio = 0.015625
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.
lambda_l1_addon = 100.

job_name_prefix = f"{dataset}_nichecompass_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.01" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method one-hop-norm" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gatv2conv" \
              " --n_epochs 400" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
task = "reference"
dataset = "starmap_plus_mouse_cns"
job_id = 1
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8" \
                    " batch9 batch10 batch11 batch12 batch13 batch14 batch15 batch16 batch17 batch18" \
                    " batch19 batch20"
n_neighbors = 8 # 8, 12
n_sampled_neighbors = 4
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = 512 # 2048
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "20"
lambda_cat_covariates_contrastive = 0. # 0., 10000, 100000, 500000
contrastive_logits_pos_ratio = 0. # 0., 0.125, 0.0625, 0.03125
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.

job_name_prefix = f"{dataset}_nichecompass_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0." \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gatv2conv" \
              " --n_epochs 35" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 5000000." \
              " --lambda_gene_expr_recon 3000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
task = "reference"
dataset = "starmap_plus_mouse_cns"
job_id = 2
reference_batches = "batch1 batch2 batch3"
n_neighbors = 8 # 8, 12
n_sampled_neighbors = 4
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = 512 # 2048
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0. # 0., 10000, 100000, 500000
contrastive_logits_pos_ratio = 0. # 0., 0.125, 0.0625, 0.03125
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.

job_name_prefix = f"{dataset}_nichecompass_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0." \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gatv2conv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 5000000." \
              " --lambda_gene_expr_recon 3000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

### 5.3 nanoString CosMx SMI Human Non-Small-Cell Lung Cancer (NSCLC)

In [None]:
task = "reference"
dataset = "nanostring_cosmx_human_nsclc"
job_id = 5
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8" # leave out: batch3, batch8
n_neighbors = 9
n_sampled_neighbors = 4
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size = 512 # 4096, 2048, 512
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "batch fov patient"
cat_covariates_no_edges = "True False True"
cat_covariates_embeds_nums = "16 60 10"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0. # 0.015625, 0.0078125
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 300.

job_name_prefix = f"{dataset}_nichecompass_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --n_addon_gp 100" \
              " --active_gp_thresh_ratio 0.01" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gatv2conv" \
              " --n_epochs 400" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 50000000." \
              " --lambda_gene_expr_recon 30000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              " --lambda_l1_addon 300." \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

### 5.5 Spatial ATAC-RNA-Seq Mouse Brain

In [None]:
job_id = 1

task = "reference"
dataset = "spatial_atac_rna_seq_mouse_brain_batch2"
reference_batches = "None"
species = "mouse"
lambda_edge_recon = 5000000.
lambda_gene_expr_recon = 3000.
lambda_chrom_access_recon = 1000.
active_gp_thresh_ratio = 0.03
lambda_l1_masked = 300.
lambda_l1_addon = 300.
n_svg = 3000
n_svp = 15000
n_neighbors = 8
nichenet_max_n_target_genes_per_gp = 250
node_label_method = "one-hop-norm"
conv_layer_encoder = "gatv2conv"
edge_batch_size = "256"
n_sampled_neighbors = 4
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
cat_covariates_embeds_injection = "None"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = 0
n_hvg = 0
n_addon_gp = 100

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              f" --n_svg {n_svg}" \
              f" --n_svp {n_svp}" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              f" --nichenet_max_n_target_genes_per_gp {nichenet_max_n_target_genes_per_gp}" \
              " --include_mebocost_gps" \
              " --include_collectri_gps" \
              " --include_brain_marker_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              " --include_atac_modality" \
              " --filter_peaks" \
              " --min_cell_peak_thresh_ratio 0.005" \
              " --min_cell_gene_thresh_ratio 0.005" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              f" --n_addon_gp {n_addon_gp}" \
              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              f" --conv_layer_encoder {conv_layer_encoder}" \
              " --n_epochs 400" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              f" --lambda_edge_recon {lambda_edge_recon}" \
              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
              f" --lambda_chrom_access_recon {lambda_chrom_access_recon}" \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --lambda_l1_addon {lambda_l1_addon}." \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

job_id += 1

### 5.6 Xenium Human Breast Cancer

In [None]:
task = "reference"
dataset = "xenium_human_breast_cancer"
job_id = 1
reference_batches = "batch1 batch2"
n_neighbors = 8
n_sampled_neighbors = 4
species = "human"
edge_batch_size = 512
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "2"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.
lambda_l1_addon = 100.

job_name_prefix = f"{dataset}_nichecompass_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.01" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method one-hop-norm" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gatv2conv" \
              " --n_epochs 400" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 5000000." \
              " --lambda_gene_expr_recon 3000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --timestamp_suffix _{job_id}"


submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

### 5.8 MERFISH Mouse Brain

In [None]:
task = "reference"
dataset = "merfish_mouse_brain"
job_id = 1
reference_batches = " ".join([f"batch{i}" for i in range(1, 240)])
n_neighbors = 4
n_sampled_neighbors = 4
species = "mouse"
edge_batch_size = 1024 # 1024 #4096 # 1024
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "donor_id batch"
cat_covariates_no_edges = "True True"
cat_covariates_embeds_nums = "4 239"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.
lambda_l1_addon = 0.

job_name_prefix = f"{dataset}_nichecompass_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1.0" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.01" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method one-hop-norm" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 8" \
              " --n_epochs_all_gps 2" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              " --edge_val_ratio 0.01" \
              " --node_val_ratio 0.01" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --no-mlflow_tracking" \
              f" --no-compute_knn_graph" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
dataset = "merfish_mouse_brain"
task = "postprocessing"
model_label = "reference"
load_timestamp = "24022024_170907_82" # "21022024_194703_55" # "19022024_170806_46" # "21022024_194703_56"
job_id = 7

job_name_prefix = f"{dataset}_nichecompass_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "postprocess_nichecompass_model.py"
script_args = f" --dataset {dataset}" \
              f" --model_label {model_label}" \
              f" --load_timestamp {load_timestamp}" \
              " --gp_names_key nichecompass_gp_names" \
              " --compute_latent" \
              " --compute_pca" \
              " --compute_knn_graph" \
              " --compute_umap" \
              " --compute_leiden"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        p="cpu_p",
        gres="cpu:1",
        qos="cpu_normal",
        mem="300G",
        nice=10000)

In [None]:
dataset = "merfish_mouse_brain"
task = "postprocessing"
model_label = "reference"
load_timestamp = "21022024_194703_55" # "21022024_194703_55" # "19022024_170806_46" # "21022024_194703_56"
job_id = 9

job_name_prefix = f"{dataset}_nichecompass_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "postprocess_nichecompass_model.py"
script_args = f" --dataset {dataset}" \
              f" --model_label {model_label}" \
              f" --load_timestamp {load_timestamp}" \
              " --gp_names_key nichecompass_gp_names" \
              " --compute_umap" \
              " --no-compute_leiden"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        p="cpu_p",
        gres="cpu:1",
        qos="cpu_normal",
        mem="300G",
        nice=10000)

## 6. NicheCompass Query Mapping

### 6.1 nanoString CosMx Human NSCLC

In [None]:
task = "reference_query"
dataset = "nanostring_cosmx_human_nsclc"
job_id = 105
query_batches = "batch3 batch8"
n_neighbors = 4
n_sampled_neighbors = 4
load_timestamp = "25092023_232129_105"  #
lambda_cat_covariates_contrastive = 1000000.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.
edge_batch_size = 512

job_name_prefix = f"{dataset}_nichecompass_{task}_query_mapping"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --query_batches {query_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --spatial_key spatial" \
              " --mapping_entity_key mapping_entity" \
              " --gp_names_key nichecompass_gp_names" \
              f" --reference_model_label reference" \
              f" --load_timestamp {load_timestamp}" \
              f" --query_model_label query" \
              f" --reference_query_model_label reference_query_mapping" \
              " --n_epochs 400" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 5000000." \
              " --lambda_gene_expr_recon 3000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args)

### 6.1 seqFISH Mouse Organogenesis Imputed

In [None]:
task = "reference_query"
dataset = "seqfish_mouse_organogenesis_imputed"
job_id = 1
query_batches = "batch5 batch6"
n_neighbors = 12
node_label_method = "one-hop-norm"
load_timestamp = "01072023_165203_1"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 5.
edge_batch_size = 4096

job_name_prefix = f"{dataset}_nichecompass_{task}"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --query_batches {query_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --spatial_key spatial" \
              " --mapping_entity_key mapping_entity" \
              " --gp_names_key nichecompass_gp_names" \
              f" --reference_model_label reference" \
              f" --load_timestamp {load_timestamp}" \
              f" --query_model_label query" \
              f" --reference_query_model_label {task}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        t="12:00:00",
        p="interactive_gpu_p",
        gres="gpu:1",
        qos="interactive_gpu",
        nice=9999)

In [None]:
task = "reference_query"
dataset = "seqfish_mouse_organogenesis_imputed"
job_id = 1
query_batches = "batch3 batch4"
n_neighbors = 8
node_label_method = "one-hop-norm"
load_timestamp = "01082024_191835_2"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
edge_batch_size = 256

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}_query_mapping"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --query_batches {query_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --spatial_key spatial" \
              " --mapping_entity_key mapping_entity" \
              " --gp_names_key nichecompass_gp_names" \
              f" --reference_model_label reference" \
              f" --load_timestamp {load_timestamp}" \
              f" --query_model_label query" \
              f" --reference_query_model_label {task}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 5000000." \
              " --lambda_gene_expr_recon 3000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked 0." \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args)

In [None]:
task = "reference_query"
dataset = "seqfish_mouse_organogenesis_imputed"
job_id = 1
query_batches = "batch5 batch6"
n_neighbors = 8
node_label_method = "one-hop-norm"
load_timestamp = "01082024_191836_3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
edge_batch_size = 256

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}_query_mapping"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --query_batches {query_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --spatial_key spatial" \
              " --mapping_entity_key mapping_entity" \
              " --gp_names_key nichecompass_gp_names" \
              f" --reference_model_label reference" \
              f" --load_timestamp {load_timestamp}" \
              f" --query_model_label query" \
              f" --reference_query_model_label {task}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 5000000." \
              " --lambda_gene_expr_recon 3000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked 0." \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args)

In [None]:
task = "reference_query"
dataset = "seqfish_mouse_organogenesis_imputed"
job_id = 10
query_batches = "batch1 batch2 batch5 batch6"
n_neighbors = 8
node_label_method = "one-hop-norm"
load_timestamp = "07082024_124745_8"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
edge_batch_size = 256

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}_query_mapping"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --query_batches {query_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --spatial_key spatial" \
              " --mapping_entity_key mapping_entity" \
              " --gp_names_key nichecompass_gp_names" \
              f" --reference_model_label reference" \
              f" --load_timestamp {load_timestamp}" \
              f" --query_model_label query" \
              f" --reference_query_model_label {task}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 5000000." \
              " --lambda_gene_expr_recon 3000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked 0." \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args)

In [None]:
task = "reference_query"
dataset = "seqfish_mouse_organogenesis_imputed"
job_id = 11
query_batches = "batch5 batch6"
n_neighbors = 8
node_label_method = "one-hop-norm"
load_timestamp = "07082024_124745_7"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
edge_batch_size = 256

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}_query_mapping"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --query_batches {query_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --spatial_key spatial" \
              " --mapping_entity_key mapping_entity" \
              " --gp_names_key nichecompass_gp_names" \
              f" --reference_model_label reference" \
              f" --load_timestamp {load_timestamp}" \
              f" --query_model_label query" \
              f" --reference_query_model_label {task}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 5000000." \
              " --lambda_gene_expr_recon 3000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked 0." \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args)

In [None]:
task = "reference_query"
dataset = "seqfish_mouse_organogenesis_imputed_subsample_50pct"
query_dataset = "seqfish_mouse_organogenesis_imputed"
job_id = 12
query_batches = "batch5 batch6"
n_neighbors = 8
node_label_method = "one-hop-norm"
load_timestamp = "08082024_095147_12"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
edge_batch_size = 256

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}_query_mapping"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = f" --reference_dataset {dataset}" \
              f" --query_dataset {query_dataset}" \
              f" --query_batches {query_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --spatial_key spatial" \
              " --mapping_entity_key mapping_entity" \
              " --gp_names_key nichecompass_gp_names" \
              f" --reference_model_label reference" \
              f" --load_timestamp {load_timestamp}" \
              f" --query_model_label query" \
              f" --reference_query_model_label {task}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 5000000." \
              " --lambda_gene_expr_recon 3000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked 0." \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args)

In [None]:
task = "reference_query"
dataset = "seqfish_mouse_organogenesis_imputed_subsample_25pct"
query_dataset = "seqfish_mouse_organogenesis_imputed"
job_id = 13
query_batches = "batch5 batch6"
n_neighbors = 8
node_label_method = "one-hop-norm"
load_timestamp = "08082024_095147_13"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
edge_batch_size = 256

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}_query_mapping"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = f" --reference_dataset {dataset}" \
              f" --query_dataset {query_dataset}" \
              f" --query_batches {query_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --spatial_key spatial" \
              " --mapping_entity_key mapping_entity" \
              " --gp_names_key nichecompass_gp_names" \
              f" --reference_model_label reference" \
              f" --load_timestamp {load_timestamp}" \
              f" --query_model_label query" \
              f" --reference_query_model_label {task}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 5000000." \
              " --lambda_gene_expr_recon 3000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked 0." \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args)

In [None]:
task = "reference_query"
dataset = "seqfish_mouse_organogenesis_imputed_subsample_10pct"
query_dataset = "seqfish_mouse_organogenesis_imputed"
job_id = 14
query_batches = "batch5 batch6"
n_neighbors = 8
node_label_method = "one-hop-norm"
load_timestamp = "08082024_140042_14"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
edge_batch_size = 256

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}_query_mapping"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = f" --reference_dataset {dataset}" \
              f" --query_dataset {query_dataset}" \
              f" --query_batches {query_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --spatial_key spatial" \
              " --mapping_entity_key mapping_entity" \
              " --gp_names_key nichecompass_gp_names" \
              f" --reference_model_label reference" \
              f" --load_timestamp {load_timestamp}" \
              f" --query_model_label query" \
              f" --reference_query_model_label {task}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 5000000." \
              " --lambda_gene_expr_recon 3000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked 0." \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args)

In [None]:
task = "reference_query"
dataset = "seqfish_mouse_organogenesis_imputed_subsample_5pct"
query_dataset = "seqfish_mouse_organogenesis_imputed"
job_id = 15
query_batches = "batch5 batch6"
n_neighbors = 8
node_label_method = "one-hop-norm"
load_timestamp = "08082024_140042_15"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
edge_batch_size = 256

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}_query_mapping"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = f" --reference_dataset {dataset}" \
              f" --query_dataset {query_dataset}" \
              f" --query_batches {query_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --spatial_key spatial" \
              " --mapping_entity_key mapping_entity" \
              " --gp_names_key nichecompass_gp_names" \
              f" --reference_model_label reference" \
              f" --load_timestamp {load_timestamp}" \
              f" --query_model_label query" \
              f" --reference_query_model_label {task}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 5000000." \
              " --lambda_gene_expr_recon 3000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked 0." \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args)

In [None]:
task = "reference_query"
dataset = "seqfish_mouse_organogenesis_imputed_subsample_1pct"
query_dataset = "seqfish_mouse_organogenesis_imputed"
job_id = 15
query_batches = "batch5 batch6"
n_neighbors = 8
node_label_method = "one-hop-norm"
load_timestamp = "08082024_140042_16"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
edge_batch_size = 256

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}_query_mapping"
job_folder_path = f"./data_analysis/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/data_analysis/{task}"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = f" --reference_dataset {dataset}" \
              f" --query_dataset {query_dataset}" \
              f" --query_batches {query_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --spatial_key spatial" \
              " --mapping_entity_key mapping_entity" \
              " --gp_names_key nichecompass_gp_names" \
              f" --reference_model_label reference" \
              f" --load_timestamp {load_timestamp}" \
              f" --query_model_label query" \
              f" --reference_query_model_label {task}" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 5000000." \
              " --lambda_gene_expr_recon 3000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked 0." \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args)

In [None]:
method = "nichecompass"
task = "single_sample_method_benchmarking"
cell_type_key = "celltype_mapped_refined"
batch_key = "None"
metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

job_id = 1
for dataset in ["seqfish_mouse_organogenesis",
                "seqfish_mouse_organogenesis_subsample_50pct",
                "seqfish_mouse_organogenesis_subsample_25pct",
                "seqfish_mouse_organogenesis_subsample_10pct",
                "seqfish_mouse_organogenesis_subsample_5pct",
                "seqfish_mouse_organogenesis_subsample_1pct"
               ]:
    file_name = f"{dataset}_{method}.h5ad"

    job_name_prefix = f"{method}_single_sample_method_benchmarking_{dataset}_metrics_computation"
    job_folder_path = f"./data_analysis/reference_query/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/analysis/benchmarking/{task}"
    script_name = "compute_benchmarking_metrics.py"
    script_args = f" --dataset {dataset}" \
                  f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --cell_type_key {cell_type_key}" \
                  f" --batch_key {batch_key}" \
                  f" --latent_key {method}_latent" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)