# Slurm Job Submission

- **Creator**: Sebastian Birk (<sebastian.birk@helmholtz-munich.de>).
- **Affiliation:** Helmholtz Munich, Institute of Computational Biology (ICB), Talavera-López Lab
- **Date of Creation:** 20.03.2023
- **Date of Last Modification:** 28.07.2023

## 1. Setup

### 1.1 Import Libraries

In [1]:
import numpy as np
import os

### 1.2 Define Parameters

In [2]:
conda_env_name = "nichecompass"
reproducibility_conda_env_name = "nichecompass-reproducibility"
reproducibility_conda_env_name = "nichecompass-test"
conda_env_name = "nichecompass-test"

### 1.3 Define Functions

In [3]:
def submit_python_script(
        job_name_prefix,
        job_id,
        job_folder_path,
        conda_env_name,
        script_folder_path,
        script_name,
        script_args,
        t="48:00:00",
        p="gpu_p",
        gres="gpu:1",
        qos="gpu",
        nice=10000):
    job_name = f"{job_name_prefix}_{job_id}"
    # Account for fact that submit node has different home path than compute node
    job_file_path = f"{job_folder_path.replace('/aih', '')}/job_{job_name}.cmd"
    out_file_path = f"{job_folder_path}/logs/out_{job_name}.txt"
    err_file_path = f"{job_folder_path}/logs/err_{job_name}.txt"
    
    os.makedirs(job_folder_path + "/logs", exist_ok=True)
    
    with open(job_file_path, "w") as handle:
        handle.writelines("#!/bin/bash\n")
        handle.writelines(f"#SBATCH -J {job_name}\n")
        handle.writelines(f"#SBATCH -o {out_file_path}\n")
        handle.writelines(f"#SBATCH -e {err_file_path}\n")
        handle.writelines(f"#SBATCH -t {t}\n")
        handle.writelines(f"#SBATCH -p {p}\n")
        handle.writelines("#SBATCH -c 6\n")
        handle.writelines(f"#SBATCH --gres={gres}\n")
        handle.writelines(f"#SBATCH --qos={qos}\n")       
        handle.writelines("#SBATCH --mem=128GB\n")
        handle.writelines(f"#SBATCH --nice={nice}\n")
        handle.writelines("source $HOME/.bashrc\n")
        handle.writelines(f"conda activate {conda_env_name}\n")
        handle.writelines("cd /\n")
        handle.writelines(f"cd {script_folder_path}\n")
        handle.writelines(f"python ../{script_name}")
        handle.writelines(f"{script_args}")
        handle.writelines("\n")
        
    os.system(f"sbatch {job_file_path}")

## 2. NicheCompass Ablation

### 2.1 Loss Weights, Gene Program Mask

Ablating:
- Loss weights: Gene Expression Reconstruction, Edge Reconstruction, KL Divergence
- Gene program Mask: FC GP Mask, NicheCompass Default GP Mask (Not Filtered, Weakly Filtered, Strongly Filtered)

Additional covariate:
- Spatial Neighborhood Graph: Number of Neighbors

#### 2.1.1 Xenium Human Breast Cancer

##### 2.1.1.1 Fully Connected GP Mask

In [None]:
job_id = 1
for lambda_edge_recon in [0, 50000, 500000]:
    for lambda_gene_expr_recon in [0, 30, 300]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "xenium_human_breast_cancer"
            reference_batches = "batch1"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          f" --add_fc_gps_instead_of_gp_dict_gps" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

In [None]:
job_id = 37
for lambda_edge_recon in [5000000, 50000000]:
    for lambda_gene_expr_recon in [3000, 30000]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "xenium_human_breast_cancer"
            reference_batches = "batch1"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          f" --add_fc_gps_instead_of_gp_dict_gps" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

##### 2.1.1.2 NicheCompass GP Mask (Keep All Target Genes)

In [7]:
job_id = 53
for lambda_edge_recon in [0, 50000, 500000]:
    for lambda_gene_expr_recon in [0, 30, 300]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "xenium_human_breast_cancer"
            reference_batches = "batch1"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

Submitted batch job 12662836
Submitted batch job 12662837
Submitted batch job 12662838
Submitted batch job 12662839
Submitted batch job 12662840
Submitted batch job 12662841
Submitted batch job 12662842
Submitted batch job 12662843
Submitted batch job 12662844
Submitted batch job 12662845
Submitted batch job 12662846
Submitted batch job 12662847
Submitted batch job 12662848
Submitted batch job 12662849
Submitted batch job 12662850
Submitted batch job 12662851
Submitted batch job 12662852
Submitted batch job 12662853
Submitted batch job 12662854
Submitted batch job 12662855
Submitted batch job 12662856
Submitted batch job 12662857
Submitted batch job 12662858
Submitted batch job 12662859
Submitted batch job 12662860
Submitted batch job 12662861
Submitted batch job 12662862
Submitted batch job 12662863
Submitted batch job 12662864
Submitted batch job 12662865
Submitted batch job 12662866
Submitted batch job 12662867
Submitted batch job 12662868
Submitted batch job 12662869
Submitted batc

In [None]:
job_id = 89
for lambda_edge_recon in [5000000, 50000000]:
    for lambda_gene_expr_recon in [3000, 30000]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "xenium_human_breast_cancer"
            reference_batches = "batch1"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

##### 2.1.1.3 NicheCompass GP Mask (Weakly Filter Target Genes)

In [None]:
job_id = 105
for lambda_edge_recon in [0, 50000, 500000]:
    for lambda_gene_expr_recon in [0, 30, 300]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "xenium_human_breast_cancer"
            reference_batches = "batch1"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 0.1" \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

In [None]:
job_id = 141
for lambda_edge_recon in [5000000, 50000000]:
    for lambda_gene_expr_recon in [3000, 30000]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "xenium_human_breast_cancer"
            reference_batches = "batch1"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 0.1" \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

##### 2.1.1.4 NicheCompass GP Mask (Strongly Filter Target Genes)

In [None]:
job_id = 157
for lambda_edge_recon in [0, 50000, 500000]:
    for lambda_gene_expr_recon in [0, 30, 300]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "xenium_human_breast_cancer"
            reference_batches = "batch1"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 0.01" \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

In [None]:
job_id = 193
for lambda_edge_recon in [5000000, 50000000]:
    for lambda_gene_expr_recon in [3000, 30000]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "xenium_human_breast_cancer"
            reference_batches = "batch1"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 0.01" \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

#### 2.1.2 STARmap PLUS Mouse Central Nervous System

##### 2.1.2.1 Fully Connected GP Mask

In [None]:
job_id = 1
for lambda_edge_recon in [0, 50000, 500000]:
    for lambda_gene_expr_recon in [0, 30, 300]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "starmap_plus_mouse_cns"
            reference_batches = "batch1"
            species = "mouse"
            node_label_method = "one-hop-norm"
            edge_batch_size = 1024

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          f" --add_fc_gps_instead_of_gp_dict_gps" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

In [None]:
job_id = 37
for lambda_edge_recon in [5000000, 50000000]:
    for lambda_gene_expr_recon in [3000, 30000]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "starmap_plus_mouse_cns"
            reference_batches = "batch1"
            species = "mouse"
            node_label_method = "one-hop-norm"
            edge_batch_size = 1024

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          f" --add_fc_gps_instead_of_gp_dict_gps" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

##### 2.1.2.2 NicheCompass GP Mask (Keep All Target Genes)

In [4]:
# tmp
job_id = 74
for lambda_edge_recon in [50000]:
    for lambda_gene_expr_recon in [300]:
        for n_neighbors in [8]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "starmap_plus_mouse_cns"
            reference_batches = "batch1"
            species = "mouse"
            node_label_method = "one-hop-norm"
            edge_batch_size = 1024

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

Submitted batch job 12666140


In [5]:
# tmp
job_id = 83
for lambda_edge_recon in [500000]:
    for lambda_gene_expr_recon in [30]:
        for n_neighbors in [12]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "starmap_plus_mouse_cns"
            reference_batches = "batch1"
            species = "mouse"
            node_label_method = "one-hop-norm"
            edge_batch_size = 1024

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

Submitted batch job 12666141


In [7]:
# tmp
job_id = 84
for lambda_edge_recon in [500000]:
    for lambda_gene_expr_recon in [30]:
        for n_neighbors in [16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "starmap_plus_mouse_cns"
            reference_batches = "batch1"
            species = "mouse"
            node_label_method = "one-hop-norm"
            edge_batch_size = 1024

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

Submitted batch job 12666456


In [8]:
# tmp
job_id = 85
for lambda_edge_recon in [500000]:
    for lambda_gene_expr_recon in [300]:
        for n_neighbors in [4]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "starmap_plus_mouse_cns"
            reference_batches = "batch1"
            species = "mouse"
            node_label_method = "one-hop-norm"
            edge_batch_size = 1024

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

Submitted batch job 12666548


In [6]:
job_id = 53
for lambda_edge_recon in [0, 50000, 500000]:
    for lambda_gene_expr_recon in [0, 30, 300]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "starmap_plus_mouse_cns"
            reference_batches = "batch1"
            species = "mouse"
            node_label_method = "one-hop-norm"
            edge_batch_size = 1024

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

Submitted batch job 12661274
Submitted batch job 12661275
Submitted batch job 12661276
Submitted batch job 12661277
Submitted batch job 12661278
Submitted batch job 12661279
Submitted batch job 12661280
Submitted batch job 12661281
Submitted batch job 12661282
Submitted batch job 12661283
Submitted batch job 12661284
Submitted batch job 12661285
Submitted batch job 12661286
Submitted batch job 12661287
Submitted batch job 12661288
Submitted batch job 12661289
Submitted batch job 12661290
Submitted batch job 12661291
Submitted batch job 12661292
Submitted batch job 12661293
Submitted batch job 12661294
Submitted batch job 12661295
Submitted batch job 12661296
Submitted batch job 12661297
Submitted batch job 12661298
Submitted batch job 12661299
Submitted batch job 12661300
Submitted batch job 12661301
Submitted batch job 12661302
Submitted batch job 12661303
Submitted batch job 12661304
Submitted batch job 12661305
Submitted batch job 12661306
Submitted batch job 12661307
Submitted batc

In [None]:
job_id = 89
for lambda_edge_recon in [5000000, 50000000]:
    for lambda_gene_expr_recon in [3000, 30000]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "starmap_plus_mouse_cns"
            reference_batches = "batch1"
            species = "mouse"
            node_label_method = "one-hop-norm"
            edge_batch_size = 1024

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

##### 2.1.2.3 NicheCompass GP Mask (Weakly Filter Target Genes)

In [None]:
job_id = 105
for lambda_edge_recon in [0, 50000, 500000]:
    for lambda_gene_expr_recon in [0, 30, 300]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "starmap_plus_mouse_cns"
            reference_batches = "batch1"
            species = "mouse"
            node_label_method = "one-hop-norm"
            edge_batch_size = 1024

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 0.1" \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

In [None]:
job_id = 141
for lambda_edge_recon in [5000000, 50000000]:
    for lambda_gene_expr_recon in [3000, 30000]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "starmap_plus_mouse_cns"
            reference_batches = "batch1"
            species = "mouse"
            node_label_method = "one-hop-norm"
            edge_batch_size = 1024

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 0.1" \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

##### 2.1.2.4 NicheCompass GP Mask (Strongly Filter Target Genes)

In [None]:
job_id = 157
for lambda_edge_recon in [0, 50000, 500000]:
    for lambda_gene_expr_recon in [0, 30, 300]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "starmap_plus_mouse_cns"
            reference_batches = "batch1"
            species = "mouse"
            node_label_method = "one-hop-norm"
            edge_batch_size = 1024

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 0.01" \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

In [None]:
job_id = 193
for lambda_edge_recon in [5000000, 50000000]:
    for lambda_gene_expr_recon in [3000, 30000]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "starmap_plus_mouse_cns"
            reference_batches = "batch1"
            species = "mouse"
            node_label_method = "one-hop-norm"
            edge_batch_size = 1024

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 0.01" \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

#### 2.1.3 Vizgen MERFISH Human Ovarian Cancer

In [None]:
# tmp
job_id = 100006
for lambda_edge_recon in [50000000]:
    for lambda_gene_expr_recon in [3000]:
        for n_neighbors in [16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "vizgen_merfish_human_ovarian_cancer"
            reference_batches = "batch2"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512
            n_sampled_neighbors = -1

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 0.1" \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 79" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors {n_sampled_neighbors}" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

##### 2.1.3.1 Fully Connected GP Mask

In [None]:
job_id = 1
for lambda_edge_recon in [0, 50000, 500000]:
    for lambda_gene_expr_recon in [0, 30, 300]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "vizgen_merfish_human_ovarian_cancer"
            reference_batches = "batch2"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          f" --add_fc_gps_instead_of_gp_dict_gps" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

In [None]:
job_id = 37
for lambda_edge_recon in [5000000, 50000000]:
    for lambda_gene_expr_recon in [3000, 30000]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "vizgen_merfish_human_ovarian_cancer"
            reference_batches = "batch2"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          f" --add_fc_gps_instead_of_gp_dict_gps" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

##### 2.1.3.2 NicheCompass GP Mask (Keep All Target Genes)

In [8]:
job_id = 53
for lambda_edge_recon in [0, 50000, 500000]:
    for lambda_gene_expr_recon in [0, 30, 300]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "vizgen_merfish_human_ovarian_cancer"
            reference_batches = "batch2"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

Submitted batch job 12663074
Submitted batch job 12663075
Submitted batch job 12663076
Submitted batch job 12663077
Submitted batch job 12663078
Submitted batch job 12663079
Submitted batch job 12663080
Submitted batch job 12663081
Submitted batch job 12663082
Submitted batch job 12663083
Submitted batch job 12663084
Submitted batch job 12663085
Submitted batch job 12663086
Submitted batch job 12663087
Submitted batch job 12663088
Submitted batch job 12663089
Submitted batch job 12663090
Submitted batch job 12663091
Submitted batch job 12663092
Submitted batch job 12663093
Submitted batch job 12663094
Submitted batch job 12663095
Submitted batch job 12663096
Submitted batch job 12663097
Submitted batch job 12663098
Submitted batch job 12663099
Submitted batch job 12663100
Submitted batch job 12663101
Submitted batch job 12663102
Submitted batch job 12663103
Submitted batch job 12663104
Submitted batch job 12663105
Submitted batch job 12663106
Submitted batch job 12663107
Submitted batc

In [None]:
# missing
job_id = 89
for lambda_edge_recon in [5000000, 50000000]:
    for lambda_gene_expr_recon in [3000, 30000]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "vizgen_merfish_human_ovarian_cancer"
            reference_batches = "batch2"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 1." \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

##### 2.1.3.3 NicheCompass GP Mask (Weakly Filter Target Genes)

In [None]:
job_id = 105
for lambda_edge_recon in [0, 50000, 500000]:
    for lambda_gene_expr_recon in [0, 30, 300]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "vizgen_merfish_human_ovarian_cancer"
            reference_batches = "batch2"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 0.1" \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

In [None]:
job_id = 141
for lambda_edge_recon in [5000000, 50000000]:
    for lambda_gene_expr_recon in [3000, 30000]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "vizgen_merfish_human_ovarian_cancer"
            reference_batches = "batch2"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 0.1" \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

##### 2.1.3.4 NicheCompass GP Mask (Strongly Filter Target Genes)

In [None]:
job_id = 157
for lambda_edge_recon in [0, 50000, 500000]:
    for lambda_gene_expr_recon in [0, 30, 300]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "vizgen_merfish_human_ovarian_cancer"
            reference_batches = "batch2"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 0.01" \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

In [None]:
job_id = 193
for lambda_edge_recon in [5000000, 50000000]:
    for lambda_gene_expr_recon in [3000, 30000]:
        for n_neighbors in [4, 8, 12, 16]:
            task = "ablation"
            ablation_task = "loss_weights"
            dataset = "vizgen_merfish_human_ovarian_cancer"
            reference_batches = "batch2"
            species = "human"
            node_label_method = "one-hop-norm"
            edge_batch_size = 512

            job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
            job_folder_path = f"../scripts/{task}/slurm_jobs"
            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
            script_name = "train_nichecompass_reference_model.py"
            script_args = f" --dataset {dataset}" \
                          f" --reference_batches {reference_batches}" \
                          f" --n_neighbors {n_neighbors}" \
                          " --no-filter_genes" \
                          " --nichenet_keep_target_genes_ratio 0.01" \
                          " --nichenet_max_n_target_genes_per_gp 250" \
                          " --include_mebocost_gps" \
                          f" --species {species}" \
                          " --gp_filter_mode subset" \
                          " --combine_overlap_gps" \
                          " --overlap_thresh_source_genes 0.9" \
                          " --overlap_thresh_target_genes 0.9" \
                          " --overlap_thresh_genes 0.9" \
                          " --counts_key counts" \
                          " --spatial_key spatial" \
                          " --adj_key spatial_connectivities" \
                          " --mapping_entity_key mapping_entity" \
                          " --gp_targets_mask_key nichecompass_gp_targets" \
                          " --gp_sources_mask_key nichecompass_gp_sources" \
                          " --gp_names_key nichecompass_gp_names" \
                          f" --model_label {ablation_task}_{task}" \
                          " --active_gp_names_key nichecompass_active_gp_names" \
                          " --latent_key nichecompass_latent" \
                          " --active_gp_thresh_ratio 0." \
                          " --gene_expr_recon_dist nb" \
                          " --log_variational" \
                          f" --node_label_method {node_label_method}" \
                          " --n_layers_encoder 1" \
                          " --n_hidden_encoder None" \
                          " --conv_layer_encoder gcnconv" \
                          " --n_epochs 100" \
                          " --n_epochs_all_gps 25" \
                          " --lr 0.001" \
                          f" --lambda_edge_recon {lambda_edge_recon}" \
                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                          " --lambda_group_lasso 0." \
                          f" --lambda_l1_masked 0." \
                          f" --edge_batch_size {edge_batch_size}" \
                          " --node_batch_size None" \
                          f" --n_sampled_neighbors -1" \
                          f" --timestamp_suffix _{job_id}"

            submit_python_script(
                    job_name_prefix=job_name_prefix,
                    job_id=job_id,
                    job_folder_path=job_folder_path,
                    conda_env_name=conda_env_name,
                    script_folder_path=script_folder_path,
                    script_name=script_name,
                    script_args=script_args,
                    nice=10000)

            job_id += 1

#### 2.1.4 Metrics Computation

In [None]:
# tmp
job_id = 1
for i in np.arange(1, 13, 1):
    task = "one-hop-norm_reference"
    dataset = "seqfish_mouse_organogenesis_imputed"
    ablation_task = "loss_weights"
    cell_type_keys = "celltype_mapped_refined"
    batch_keys = "batch"
    file_name = f"mlflow_summary_loss_weights_ablation_{dataset}_{i}.csv"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi basw bgc bilisi"

    job_name_prefix = f"nichecompass_loss_weights_ablation_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/ablation/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/ablation"
    script_name = "compute_metrics.py"
    script_args = f" --task {task}" \
                  f" --file_name {file_name}" \
                  f" --datasets {dataset}" \
                  f" --cell_type_keys {cell_type_keys}" \
                  f" --batch_keys {batch_keys}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=reproducibility_conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)
    
    job_id += 1

In [None]:
job_id = 1
for i in np.arange(20, 240, 20):
    task = "ablation"
    dataset = "xenium_human_breast_cancer"
    ablation_task = "loss_weights"
    cell_type_keys = "cell_states"
    batch_keys = "None"
    file_name = f"mlflow_summary_{ablation_task}_{task}_{dataset}_{i}.csv"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_{ablation_task}_{task}_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_metrics.py"
    script_args = f" --task {ablation_task}_{task}" \
                  f" --file_name {file_name}" \
                  f" --datasets {dataset}" \
                  f" --cell_type_keys {cell_type_keys}" \
                  f" --batch_keys {batch_keys}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=reproducibility_conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)
    
    job_id += 1

In [11]:
# tmp
job_id = 1
for i in np.arange(4, 40, 4):
    task = "ablation"
    dataset = "xenium_human_breast_cancer"
    ablation_task = "loss_weights"
    cell_type_keys = "cell_states"
    batch_keys = "None"
    file_name = f"mlflow_summary_{ablation_task}_{task}_{dataset}_{i}.csv"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_{ablation_task}_{task}_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_metrics.py"
    script_args = f" --task {ablation_task}_{task}" \
                  f" --file_name {file_name}" \
                  f" --datasets {dataset}" \
                  f" --cell_type_keys {cell_type_keys}" \
                  f" --batch_keys {batch_keys}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=reproducibility_conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)
    
    job_id += 1

Submitted batch job 12667108
Submitted batch job 12667109
Submitted batch job 12667110
Submitted batch job 12667111
Submitted batch job 12667112
Submitted batch job 12667113
Submitted batch job 12667114
Submitted batch job 12667115
Submitted batch job 12667116


In [None]:
# tmp
job_id = 1
for i in np.arange(20, 240, 20):
    task = "ablation"
    dataset = "xenium_human_breast_cancer"
    ablation_task = "loss_weights"
    cell_type_keys = "cell_states"
    batch_keys = "None"
    file_name = f"mlflow_summary_{ablation_task}_{task}_{dataset}_{i}.csv"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_{ablation_task}_{task}_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_metrics.py"
    script_args = f" --task {ablation_task}_{task}" \
                  f" --file_name {file_name}" \
                  f" --datasets {dataset}" \
                  f" --cell_type_keys {cell_type_keys}" \
                  f" --batch_keys {batch_keys}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=reproducibility_conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)
    
    job_id += 1

In [9]:
# tmp
job_id = 1
for i in np.arange(4, 36, 4):
    task = "ablation"
    dataset = "starmap_plus_mouse_cns"
    ablation_task = "loss_weights"
    cell_type_keys = "Main_molecular_cell_type"
    batch_keys = "None"
    file_name = f"mlflow_summary_{ablation_task}_{task}_{dataset}_{i}.csv"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_{ablation_task}_{task}_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_metrics.py"
    script_args = f" --task {ablation_task}_{task}" \
                  f" --file_name {file_name}" \
                  f" --datasets {dataset}" \
                  f" --cell_type_keys {cell_type_keys}" \
                  f" --batch_keys {batch_keys}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=reproducibility_conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)
    
    job_id += 1

Submitted batch job 12667077
Submitted batch job 12667078
Submitted batch job 12667079
Submitted batch job 12667080
Submitted batch job 12667081
Submitted batch job 12667082
Submitted batch job 12667083
Submitted batch job 12667084


In [None]:
job_id = 1
for i in np.arange(20, 240, 20):
    task = "ablation"
    dataset = "starmap_plus_mouse_cns"
    ablation_task = "loss_weights"
    cell_type_keys = "Main_molecular_cell_type"
    batch_keys = "None"
    file_name = f"mlflow_summary_{ablation_task}_{task}_{dataset}_{i}.csv"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_{ablation_task}_{task}_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_metrics.py"
    script_args = f" --task {ablation_task}_{task}" \
                  f" --file_name {file_name}" \
                  f" --datasets {dataset}" \
                  f" --cell_type_keys {cell_type_keys}" \
                  f" --batch_keys {batch_keys}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=reproducibility_conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)
    
    job_id += 1

In [10]:
# tmp
job_id = 1
for i in np.arange(4, 40, 4):
    task = "ablation"
    dataset = "vizgen_merfish_human_ovarian_cancer"
    ablation_task = "loss_weights"
    cell_type_keys = "cell_type"
    batch_keys = "None"
    file_name = f"mlflow_summary_{ablation_task}_{task}_{dataset}_{i}.csv"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_{ablation_task}_{task}_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_metrics.py"
    script_args = f" --task {ablation_task}_{task}" \
                  f" --file_name {file_name}" \
                  f" --datasets {dataset}" \
                  f" --cell_type_keys {cell_type_keys}" \
                  f" --batch_keys {batch_keys}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=reproducibility_conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)
    
    job_id += 1

Submitted batch job 12667085
Submitted batch job 12667086
Submitted batch job 12667087
Submitted batch job 12667088
Submitted batch job 12667089
Submitted batch job 12667090
Submitted batch job 12667091
Submitted batch job 12667092
Submitted batch job 12667093


In [None]:
job_id = 1
for i in np.arange(20, 240, 20):
    task = "ablation"
    dataset = "vizgen_merfish_human_ovarian_cancer"
    ablation_task = "loss_weights"
    cell_type_keys = "cell_type"
    batch_keys = "None"
    file_name = f"mlflow_summary_{ablation_task}_{task}_{dataset}_{i}.csv"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_{ablation_task}_{task}_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_metrics.py"
    script_args = f" --task {ablation_task}_{task}" \
                  f" --file_name {file_name}" \
                  f" --datasets {dataset}" \
                  f" --cell_type_keys {cell_type_keys}" \
                  f" --batch_keys {batch_keys}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=reproducibility_conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)
    
    job_id += 1

### 2.2 Neighbor Sampling

Best parameters from previous ablations:
- lambda_edge_recon = 50,000,000
- lambda_gene_expr_recon = 30,000
- nichenet_keep_target_genes_ratio = 1.0

Ablating:
- Neighbor sampling: no sampling, 2 neighbors sampling, 4 neighbors sampling, 8 neighbors sampling

#### 2.2.1 Xenium Human Breast Cancer

In [None]:
job_id = 1
for n_neighbors in [4, 8, 12, 16]:
    for n_sampled_neighbors in [2, 4, 8]:
        task = "ablation"
        ablation_task = "neighbor_sampling"
        dataset = "xenium_human_breast_cancer"
        reference_batches = "batch1"
        species = "human"
        node_label_method = "one-hop-norm"
        edge_batch_size = 128
        lambda_edge_recon = 5000000
        lambda_gene_expr_recon = 3000
        nichenet_keep_target_genes_ratio = 1.0

        job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
        job_folder_path = f"../scripts/{task}/slurm_jobs"
        script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
        script_name = "train_nichecompass_reference_model.py"
        script_args = f" --dataset {dataset}" \
                      f" --reference_batches {reference_batches}" \
                      f" --n_neighbors {n_neighbors}" \
                      " --no-filter_genes" \
                      f" --nichenet_keep_target_genes_ratio {nichenet_keep_target_genes_ratio}" \
                      " --nichenet_max_n_target_genes_per_gp 250" \
                      " --include_mebocost_gps" \
                      f" --species {species}" \
                      " --gp_filter_mode subset" \
                      " --combine_overlap_gps" \
                      " --overlap_thresh_source_genes 0.9" \
                      " --overlap_thresh_target_genes 0.9" \
                      " --overlap_thresh_genes 0.9" \
                      " --counts_key counts" \
                      " --spatial_key spatial" \
                      " --adj_key spatial_connectivities" \
                      " --mapping_entity_key mapping_entity" \
                      " --gp_targets_mask_key nichecompass_gp_targets" \
                      " --gp_sources_mask_key nichecompass_gp_sources" \
                      " --gp_names_key nichecompass_gp_names" \
                      f" --model_label {ablation_task}_{task}" \
                      " --active_gp_names_key nichecompass_active_gp_names" \
                      " --latent_key nichecompass_latent" \
                      " --active_gp_thresh_ratio 0." \
                      " --gene_expr_recon_dist nb" \
                      " --log_variational" \
                      f" --node_label_method {node_label_method}" \
                      " --n_layers_encoder 1" \
                      " --n_hidden_encoder None" \
                      " --conv_layer_encoder gcnconv" \
                      " --n_epochs 100" \
                      " --n_epochs_all_gps 25" \
                      " --lr 0.001" \
                      f" --lambda_edge_recon {lambda_edge_recon}" \
                      f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                      " --lambda_group_lasso 0." \
                      f" --lambda_l1_masked 0." \
                      f" --edge_batch_size {edge_batch_size}" \
                      " --node_batch_size None" \
                      f" --n_sampled_neighbors {n_sampled_neighbors}" \
                      f" --timestamp_suffix _{job_id}"

        submit_python_script(
                job_name_prefix=job_name_prefix,
                job_id=job_id,
                job_folder_path=job_folder_path,
                conda_env_name=conda_env_name,
                script_folder_path=script_folder_path,
                script_name=script_name,
                script_args=script_args,
                nice=10000)

        job_id += 1

In [None]:
job_id = 13
for n_neighbors in [20, 24, 28, 32]:
    for n_sampled_neighbors in [2, 4, 8]:
        task = "ablation"
        ablation_task = "neighbor_sampling"
        dataset = "xenium_human_breast_cancer"
        reference_batches = "batch1"
        species = "human"
        node_label_method = "one-hop-norm"
        edge_batch_size = 256
        lambda_edge_recon = 5000000
        lambda_gene_expr_recon = 3000
        nichenet_keep_target_genes_ratio = 1.0

        job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
        job_folder_path = f"../scripts/{task}/slurm_jobs"
        script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
        script_name = "train_nichecompass_reference_model.py"
        script_args = f" --dataset {dataset}" \
                      f" --reference_batches {reference_batches}" \
                      f" --n_neighbors {n_neighbors}" \
                      " --no-filter_genes" \
                      f" --nichenet_keep_target_genes_ratio {nichenet_keep_target_genes_ratio}" \
                      " --nichenet_max_n_target_genes_per_gp 250" \
                      " --include_mebocost_gps" \
                      f" --species {species}" \
                      " --gp_filter_mode subset" \
                      " --combine_overlap_gps" \
                      " --overlap_thresh_source_genes 0.9" \
                      " --overlap_thresh_target_genes 0.9" \
                      " --overlap_thresh_genes 0.9" \
                      " --counts_key counts" \
                      " --spatial_key spatial" \
                      " --adj_key spatial_connectivities" \
                      " --mapping_entity_key mapping_entity" \
                      " --gp_targets_mask_key nichecompass_gp_targets" \
                      " --gp_sources_mask_key nichecompass_gp_sources" \
                      " --gp_names_key nichecompass_gp_names" \
                      f" --model_label {ablation_task}_{task}" \
                      " --active_gp_names_key nichecompass_active_gp_names" \
                      " --latent_key nichecompass_latent" \
                      " --active_gp_thresh_ratio 0." \
                      " --gene_expr_recon_dist nb" \
                      " --log_variational" \
                      f" --node_label_method {node_label_method}" \
                      " --n_layers_encoder 1" \
                      " --n_hidden_encoder None" \
                      " --conv_layer_encoder gcnconv" \
                      " --n_epochs 100" \
                      " --n_epochs_all_gps 25" \
                      " --lr 0.001" \
                      f" --lambda_edge_recon {lambda_edge_recon}" \
                      f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                      " --lambda_group_lasso 0." \
                      f" --lambda_l1_masked 0." \
                      f" --edge_batch_size {edge_batch_size}" \
                      " --node_batch_size None" \
                      f" --n_sampled_neighbors {n_sampled_neighbors}" \
                      f" --timestamp_suffix _{job_id}"

        submit_python_script(
                job_name_prefix=job_name_prefix,
                job_id=job_id,
                job_folder_path=job_folder_path,
                conda_env_name=conda_env_name,
                script_folder_path=script_folder_path,
                script_name=script_name,
                script_args=script_args,
                nice=10000)

        job_id += 1

#### 2.2.2 STARmap PLUS Mouse Central Nervous System

In [None]:
job_id = 1
for n_neighbors in [4, 8, 12, 16]:
    for n_sampled_neighbors in [2, 4, 8]:
        task = "ablation"
        ablation_task = "neighbor_sampling"
        dataset = "starmap_plus_mouse_cns"
        reference_batches = "batch1"
        species = "mouse"
        node_label_method = "one-hop-norm"
        edge_batch_size = 1024
        lambda_edge_recon = 500000
        lambda_gene_expr_recon = 300
        nichenet_keep_target_genes_ratio = 0.1

        job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
        job_folder_path = f"../scripts/{task}/slurm_jobs"
        script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
        script_name = "train_nichecompass_reference_model.py"
        script_args = f" --dataset {dataset}" \
                      f" --reference_batches {reference_batches}" \
                      f" --n_neighbors {n_neighbors}" \
                      " --no-filter_genes" \
                      f" --nichenet_keep_target_genes_ratio {nichenet_keep_target_genes_ratio}" \
                      " --nichenet_max_n_target_genes_per_gp 250" \
                      " --include_mebocost_gps" \
                      f" --species {species}" \
                      " --gp_filter_mode subset" \
                      " --combine_overlap_gps" \
                      " --overlap_thresh_source_genes 0.9" \
                      " --overlap_thresh_target_genes 0.9" \
                      " --overlap_thresh_genes 0.9" \
                      " --counts_key counts" \
                      " --spatial_key spatial" \
                      " --adj_key spatial_connectivities" \
                      " --mapping_entity_key mapping_entity" \
                      " --gp_targets_mask_key nichecompass_gp_targets" \
                      " --gp_sources_mask_key nichecompass_gp_sources" \
                      " --gp_names_key nichecompass_gp_names" \
                      f" --model_label {ablation_task}_{task}" \
                      " --active_gp_names_key nichecompass_active_gp_names" \
                      " --latent_key nichecompass_latent" \
                      " --active_gp_thresh_ratio 0." \
                      " --gene_expr_recon_dist nb" \
                      " --log_variational" \
                      f" --node_label_method {node_label_method}" \
                      " --n_layers_encoder 1" \
                      " --n_hidden_encoder None" \
                      " --conv_layer_encoder gcnconv" \
                      " --n_epochs 100" \
                      " --n_epochs_all_gps 25" \
                      " --lr 0.001" \
                      f" --lambda_edge_recon {lambda_edge_recon}" \
                      f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                      " --lambda_group_lasso 0." \
                      f" --lambda_l1_masked 0." \
                      f" --edge_batch_size {edge_batch_size}" \
                      " --node_batch_size None" \
                      f" --n_sampled_neighbors {n_sampled_neighbors}" \
                      f" --timestamp_suffix _{job_id}"

        submit_python_script(
                job_name_prefix=job_name_prefix,
                job_id=job_id,
                job_folder_path=job_folder_path,
                conda_env_name=conda_env_name,
                script_folder_path=script_folder_path,
                script_name=script_name,
                script_args=script_args,
                nice=10000)

        job_id += 1

In [None]:
job_id = 13
for n_neighbors in [20, 24, 28, 32]:
    for n_sampled_neighbors in [2, 4, 8]:
        task = "ablation"
        ablation_task = "neighbor_sampling"
        dataset = "starmap_plus_mouse_cns"
        reference_batches = "batch1"
        species = "mouse"
        node_label_method = "one-hop-norm"
        edge_batch_size = 512
        lambda_edge_recon = 50000000
        lambda_gene_expr_recon = 30000
        nichenet_keep_target_genes_ratio = 0.1

        job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
        job_folder_path = f"../scripts/{task}/slurm_jobs"
        script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
        script_name = "train_nichecompass_reference_model.py"
        script_args = f" --dataset {dataset}" \
                      f" --reference_batches {reference_batches}" \
                      f" --n_neighbors {n_neighbors}" \
                      " --no-filter_genes" \
                      f" --nichenet_keep_target_genes_ratio {nichenet_keep_target_genes_ratio}" \
                      " --nichenet_max_n_target_genes_per_gp 250" \
                      " --include_mebocost_gps" \
                      f" --species {species}" \
                      " --gp_filter_mode subset" \
                      " --combine_overlap_gps" \
                      " --overlap_thresh_source_genes 0.9" \
                      " --overlap_thresh_target_genes 0.9" \
                      " --overlap_thresh_genes 0.9" \
                      " --counts_key counts" \
                      " --spatial_key spatial" \
                      " --adj_key spatial_connectivities" \
                      " --mapping_entity_key mapping_entity" \
                      " --gp_targets_mask_key nichecompass_gp_targets" \
                      " --gp_sources_mask_key nichecompass_gp_sources" \
                      " --gp_names_key nichecompass_gp_names" \
                      f" --model_label {ablation_task}_{task}" \
                      " --active_gp_names_key nichecompass_active_gp_names" \
                      " --latent_key nichecompass_latent" \
                      " --active_gp_thresh_ratio 0." \
                      " --gene_expr_recon_dist nb" \
                      " --log_variational" \
                      f" --node_label_method {node_label_method}" \
                      " --n_layers_encoder 1" \
                      " --n_hidden_encoder None" \
                      " --conv_layer_encoder gcnconv" \
                      " --n_epochs 100" \
                      " --n_epochs_all_gps 25" \
                      " --lr 0.001" \
                      f" --lambda_edge_recon {lambda_edge_recon}" \
                      f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                      " --lambda_group_lasso 0." \
                      f" --lambda_l1_masked 0." \
                      f" --edge_batch_size {edge_batch_size}" \
                      " --node_batch_size None" \
                      f" --n_sampled_neighbors {n_sampled_neighbors}" \
                      f" --timestamp_suffix _{job_id}"

        submit_python_script(
                job_name_prefix=job_name_prefix,
                job_id=job_id,
                job_folder_path=job_folder_path,
                conda_env_name=conda_env_name,
                script_folder_path=script_folder_path,
                script_name=script_name,
                script_args=script_args,
                nice=10000)

        job_id += 1

#### 2.2.3 Vizgen MERFISH Human Ovarian Cancer

In [None]:
job_id = 1
for n_neighbors in [4, 8, 12, 16]:
    for n_sampled_neighbors in [2, 4, 8]:
        task = "ablation"
        ablation_task = "neighbor_sampling"
        dataset = "vizgen_merfish_human_ovarian_cancer"
        reference_batches = "batch2"
        species = "human"
        node_label_method = "one-hop-norm"
        edge_batch_size = 512
        lambda_edge_recon = 
        lambda_gene_expr_recon =
        nichenet_keep_target_genes_ratio = 1.0

        job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
        job_folder_path = f"../scripts/{task}/slurm_jobs"
        script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
        script_name = "train_nichecompass_reference_model.py"
        script_args = f" --dataset {dataset}" \
                      f" --reference_batches {reference_batches}" \
                      f" --n_neighbors {n_neighbors}" \
                      " --no-filter_genes" \
                      f" --nichenet_keep_target_genes_ratio {nichenet_keep_target_genes_ratio}" \
                      " --nichenet_max_n_target_genes_per_gp 250" \
                      " --include_mebocost_gps" \
                      f" --species {species}" \
                      " --gp_filter_mode subset" \
                      " --combine_overlap_gps" \
                      " --overlap_thresh_source_genes 0.9" \
                      " --overlap_thresh_target_genes 0.9" \
                      " --overlap_thresh_genes 0.9" \
                      " --counts_key counts" \
                      " --spatial_key spatial" \
                      " --adj_key spatial_connectivities" \
                      " --mapping_entity_key mapping_entity" \
                      " --gp_targets_mask_key nichecompass_gp_targets" \
                      " --gp_sources_mask_key nichecompass_gp_sources" \
                      " --gp_names_key nichecompass_gp_names" \
                      f" --model_label {ablation_task}_{task}" \
                      " --active_gp_names_key nichecompass_active_gp_names" \
                      " --latent_key nichecompass_latent" \
                      " --active_gp_thresh_ratio 0." \
                      " --gene_expr_recon_dist nb" \
                      " --log_variational" \
                      f" --node_label_method {node_label_method}" \
                      " --n_layers_encoder 1" \
                      " --n_hidden_encoder None" \
                      " --conv_layer_encoder gcnconv" \
                      " --n_epochs 100" \
                      " --n_epochs_all_gps 25" \
                      " --lr 0.001" \
                      f" --lambda_edge_recon {lambda_edge_recon}" \
                      f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                      " --lambda_group_lasso 0." \
                      f" --lambda_l1_masked 0." \
                      f" --edge_batch_size {edge_batch_size}" \
                      " --node_batch_size None" \
                      f" --n_sampled_neighbors {n_sampled_neighbors}" \
                      f" --timestamp_suffix _{job_id}"

        submit_python_script(
                job_name_prefix=job_name_prefix,
                job_id=job_id,
                job_folder_path=job_folder_path,
                conda_env_name=conda_env_name,
                script_folder_path=script_folder_path,
                script_name=script_name,
                script_args=script_args,
                nice=10000)

        job_id += 1

In [None]:
job_id = 1
for n_neighbors in [20, 24, 28, 32]:
    for n_sampled_neighbors in [2, 4, 8]:
        task = "ablation"
        ablation_task = "neighbor_sampling"
        dataset = "vizgen_merfish_human_ovarian_cancer"
        reference_batches = "batch2"
        species = "human"
        node_label_method = "one-hop-norm"
        edge_batch_size = 256
        lambda_edge_recon = 
        lambda_gene_expr_recon =
        nichenet_keep_target_genes_ratio = 1.0

        job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
        job_folder_path = f"../scripts/{task}/slurm_jobs"
        script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
        script_name = "train_nichecompass_reference_model.py"
        script_args = f" --dataset {dataset}" \
                      f" --reference_batches {reference_batches}" \
                      f" --n_neighbors {n_neighbors}" \
                      " --no-filter_genes" \
                      f" --nichenet_keep_target_genes_ratio {nichenet_keep_target_genes_ratio}" \
                      " --nichenet_max_n_target_genes_per_gp 250" \
                      " --include_mebocost_gps" \
                      f" --species {species}" \
                      " --gp_filter_mode subset" \
                      " --combine_overlap_gps" \
                      " --overlap_thresh_source_genes 0.9" \
                      " --overlap_thresh_target_genes 0.9" \
                      " --overlap_thresh_genes 0.9" \
                      " --counts_key counts" \
                      " --spatial_key spatial" \
                      " --adj_key spatial_connectivities" \
                      " --mapping_entity_key mapping_entity" \
                      " --gp_targets_mask_key nichecompass_gp_targets" \
                      " --gp_sources_mask_key nichecompass_gp_sources" \
                      " --gp_names_key nichecompass_gp_names" \
                      f" --model_label {ablation_task}_{task}" \
                      " --active_gp_names_key nichecompass_active_gp_names" \
                      " --latent_key nichecompass_latent" \
                      " --active_gp_thresh_ratio 0." \
                      " --gene_expr_recon_dist nb" \
                      " --log_variational" \
                      f" --node_label_method {node_label_method}" \
                      " --n_layers_encoder 1" \
                      " --n_hidden_encoder None" \
                      " --conv_layer_encoder gcnconv" \
                      " --n_epochs 100" \
                      " --n_epochs_all_gps 25" \
                      " --lr 0.001" \
                      f" --lambda_edge_recon {lambda_edge_recon}" \
                      f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                      " --lambda_group_lasso 0." \
                      f" --lambda_l1_masked 0." \
                      f" --edge_batch_size {edge_batch_size}" \
                      " --node_batch_size None" \
                      f" --n_sampled_neighbors {n_sampled_neighbors}" \
                      f" --timestamp_suffix _{job_id}"

        submit_python_script(
                job_name_prefix=job_name_prefix,
                job_id=job_id,
                job_folder_path=job_folder_path,
                conda_env_name=conda_env_name,
                script_folder_path=script_folder_path,
                script_name=script_name,
                script_args=script_args,
                nice=10000)

        job_id += 1

#### 2.2.4 Metrics Computation

In [None]:
for job_id in range(1, 4):
    task = "ablation"
    dataset = "starmap_plus_mouse_cns"
    ablation_task = "neighbor_sampling"
    cell_type_keys = "Main_molecular_cell_type"
    batch_keys = "None"
    file_name = f"mlflow_summary_{ablation_task}_{task}_{dataset}_{job_id}.csv"
    metrics = "gcs mlami cas clisis nasw cnmi cari casw clisi"

    job_name_prefix = f"nichecompass_{ablation_task}_{task}_{dataset}_metrics_computation"
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "compute_metrics.py"
    script_args = f" --task {ablation_task}_{task}" \
                  f" --file_name {file_name}" \
                  f" --datasets {dataset}" \
                  f" --cell_type_keys {cell_type_keys}" \
                  f" --batch_keys {batch_keys}" \
                  f" --metrics {metrics}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=reproducibility_conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args)

In [None]:
task = "ablation"
ablation_task = "neighbor_sampling"
datasets = "starmap_plus_mouse_cns"
cell_type_keys = "Main_molecular_cell_type"
batch_keys = "None None None"
file_name = f"mlflow_summary_{ablation_task}_{task}_processed.csv"

job_name_prefix = f"nichecompass_{ablation_task}_{task}_metrics_computation"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "compute_metrics.py"
script_args = f" --task {ablation_task}_{task}" \
              f" --file_name {file_name}" \
              f" --datasets {datasets}" \
              f" --cell_type_keys {cell_type_keys}" \
              f" --batch_keys {batch_keys}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=reproducibility_conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        t="12:00:00",
        p="interactive_gpu_p",
        gres="gpu:1",
        qos="interactive_gpu",
        nice=9999)

### 2.3 Encoder Architecture

Ablating:
- Encoder Number of Message Passing Layers: 1, 2
- Encoder Type of Message Passing Layers: GCNConv vs GATv2Conv (4 attention heads)

#### 2.3.1 Xenium Human Breast Cancer

In [None]:
job_id = 1
for n_layers_encoder in [1, 2]:
    for conv_layer_encoder in ["gcnconv", "gatv2conv"]:
        for n_hidden_encoder in [None]:
            for n_neighbors in [4, 8, 12, 16]:
                task = "ablation"
                ablation_task = "encoder_architecture"
                dataset = "xenium_human_breast_cancer"
                reference_batches = "batch1"
                species = "human"
                node_label_method = "one-hop-norm"
                edge_batch_size = 128
                lambda_edge_recon = 50000000.
                lambda_gene_expr_recon = 3000.
                n_sampled_neighbors = 4
                
                job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
                job_folder_path = f"../scripts/{task}/slurm_jobs"
                script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
                script_name = "train_nichecompass_reference_model.py"
                script_args = f" --dataset {dataset}" \
                              f" --reference_batches {reference_batches}" \
                              f" --n_neighbors {n_neighbors}" \
                              " --no-filter_genes" \
                              " --nichenet_keep_target_genes_ratio 1." \
                              " --nichenet_max_n_target_genes_per_gp 250" \
                              " --include_mebocost_gps" \
                              f" --species {species}" \
                              " --gp_filter_mode subset" \
                              " --combine_overlap_gps" \
                              " --overlap_thresh_source_genes 0.9" \
                              " --overlap_thresh_target_genes 0.9" \
                              " --overlap_thresh_genes 0.9" \
                              " --counts_key counts" \
                              " --spatial_key spatial" \
                              " --adj_key spatial_connectivities" \
                              " --mapping_entity_key mapping_entity" \
                              " --gp_targets_mask_key nichecompass_gp_targets" \
                              " --gp_sources_mask_key nichecompass_gp_sources" \
                              " --gp_names_key nichecompass_gp_names" \
                              f" --model_label {ablation_task}_{task}" \
                              " --active_gp_names_key nichecompass_active_gp_names" \
                              " --latent_key nichecompass_latent" \
                              f" --active_gp_thresh_ratio 0." \
                              " --gene_expr_recon_dist nb" \
                              " --log_variational" \
                              f" --node_label_method {node_label_method}" \
                              f" --n_layers_encoder {n_layers_encoder}" \
                              f" --n_hidden_encoder {n_hidden_encoder}" \
                              f" --conv_layer_encoder {conv_layer_encoder}" \
                              " --n_epochs 100" \
                              " --n_epochs_all_gps 25" \
                              " --lr 0.001" \
                              f" --lambda_edge_recon {lambda_edge_recon}" \
                              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                              f" --lambda_group_lasso 0." \
                              f" --lambda_l1_masked 0." \
                              f" --edge_batch_size {edge_batch_size}" \
                              " --node_batch_size None" \
                              f" --n_sampled_neighbors {n_sampled_neighbors}" \
                              f" --timestamp_suffix _{job_id}"

                submit_python_script(
                        job_name_prefix=job_name_prefix,
                        job_id=job_id,
                        job_folder_path=job_folder_path,
                        conda_env_name=conda_env_name,
                        script_folder_path=script_folder_path,
                        script_name=script_name,
                        script_args=script_args,
                        nice=10000)

                job_id += 1

#### 2.2.2 STARmap PLUS Mouse Central Nervous System

In [None]:
job_id = 1
for n_layers_encoder in [1, 2]:
    for conv_layer_encoder in ["gcnconv", "gatv2conv"]:
        for n_hidden_encoder in [None]:
            for n_neighbors in [4, 8, 12, 16]:
                task = "ablation"
                ablation_task = "encoder_architecture"
                dataset = "starmap_plus_mouse_cns"
                reference_batches = "batch1"
                species = "mouse"
                node_label_method = "one-hop-norm"
                edge_batch_size = 256
                lambda_edge_recon = 500000.
                lambda_gene_expr_recon = 30.
                n_sampled_neighbors = 4

                job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
                job_folder_path = f"../scripts/{task}/slurm_jobs"
                script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
                script_name = "train_nichecompass_reference_model.py"
                script_args = f" --dataset {dataset}" \
                              f" --reference_batches {reference_batches}" \
                              f" --n_neighbors {n_neighbors}" \
                              " --no-filter_genes" \
                              " --nichenet_keep_target_genes_ratio 0.1" \
                              " --nichenet_max_n_target_genes_per_gp 250" \
                              " --include_mebocost_gps" \
                              f" --species {species}" \
                              " --gp_filter_mode subset" \
                              " --combine_overlap_gps" \
                              " --overlap_thresh_source_genes 0.9" \
                              " --overlap_thresh_target_genes 0.9" \
                              " --overlap_thresh_genes 0.9" \
                              " --counts_key counts" \
                              " --spatial_key spatial" \
                              " --adj_key spatial_connectivities" \
                              " --mapping_entity_key mapping_entity" \
                              " --gp_targets_mask_key nichecompass_gp_targets" \
                              " --gp_sources_mask_key nichecompass_gp_sources" \
                              " --gp_names_key nichecompass_gp_names" \
                              f" --model_label {ablation_task}_{task}" \
                              " --active_gp_names_key nichecompass_active_gp_names" \
                              " --latent_key nichecompass_latent" \
                              f" --active_gp_thresh_ratio 0." \
                              " --gene_expr_recon_dist nb" \
                              " --log_variational" \
                              f" --node_label_method {node_label_method}" \
                              f" --n_layers_encoder {n_layers_encoder}" \
                              f" --n_hidden_encoder {n_hidden_encoder}" \
                              f" --conv_layer_encoder {conv_layer_encoder}" \
                              " --n_epochs 100" \
                              " --n_epochs_all_gps 25" \
                              " --lr 0.001" \
                              f" --lambda_edge_recon {lambda_edge_recon}" \
                              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                              f" --lambda_group_lasso 0." \
                              f" --lambda_l1_masked 0." \
                              f" --edge_batch_size {edge_batch_size}" \
                              " --node_batch_size None" \
                              f" --n_sampled_neighbors {n_sampled_neighbors}" \
                              f" --timestamp_suffix _{job_id}"

                submit_python_script(
                        job_name_prefix=job_name_prefix,
                        job_id=job_id,
                        job_folder_path=job_folder_path,
                        conda_env_name=conda_env_name,
                        script_folder_path=script_folder_path,
                        script_name=script_name,
                        script_args=script_args,
                        nice=10000)

                job_id += 1

#### 2.2.3 Vizgen MERFISH Human Ovarian Cancer

In [None]:
job_id = 1
for n_layers_encoder in [1, 2]:
    for conv_layer_encoder in ["gcnconv", "gatv2conv"]:
        for n_hidden_encoder in [None]:
            for n_neighbors in [16]:
                task = "ablation"
                ablation_task = "encoder_architecture"
                dataset = "vizgen_merfish_human_ovarian_cancer"
                reference_batches = "batch2"
                species = "human"
                node_label_method = "one-hop-norm"
                edge_batch_size = 128
                lambda_edge_recon = 500000.
                lambda_gene_expr_recon = 300.

                job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
                job_folder_path = f"../scripts/{task}/slurm_jobs"
                script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
                script_name = "train_nichecompass_reference_model.py"
                script_args = f" --dataset {dataset}" \
                              f" --reference_batches {reference_batches}" \
                              f" --n_neighbors {n_neighbors}" \
                              " --no-filter_genes" \
                              " --nichenet_keep_target_genes_ratio 0.01" \
                              " --nichenet_max_n_target_genes_per_gp 250" \
                              " --include_mebocost_gps" \
                              f" --species {species}" \
                              " --gp_filter_mode subset" \
                              " --combine_overlap_gps" \
                              " --overlap_thresh_source_genes 0.9" \
                              " --overlap_thresh_target_genes 0.9" \
                              " --overlap_thresh_genes 0.9" \
                              " --counts_key counts" \
                              " --spatial_key spatial" \
                              " --adj_key spatial_connectivities" \
                              " --mapping_entity_key mapping_entity" \
                              " --gp_targets_mask_key nichecompass_gp_targets" \
                              " --gp_sources_mask_key nichecompass_gp_sources" \
                              " --gp_names_key nichecompass_gp_names" \
                              f" --model_label {ablation_task}_{task}" \
                              " --active_gp_names_key nichecompass_active_gp_names" \
                              " --latent_key nichecompass_latent" \
                              f" --active_gp_thresh_ratio 0." \
                              " --gene_expr_recon_dist nb" \
                              " --log_variational" \
                              f" --node_label_method {node_label_method}" \
                              f" --n_layers_encoder {n_layers_encoder}" \
                              f" --n_hidden_encoder {n_hidden_encoder}" \
                              f" --conv_layer_encoder {conv_layer_encoder}" \
                              " --n_epochs 100" \
                              " --n_epochs_all_gps 25" \
                              " --lr 0.001" \
                              f" --lambda_edge_recon {lambda_edge_recon}" \
                              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                              f" --lambda_group_lasso 0." \
                              f" --lambda_l1_masked 0." \
                              f" --edge_batch_size {edge_batch_size}" \
                              " --node_batch_size None" \
                              f" --timestamp_suffix _{job_id}"

                submit_python_script(
                        job_name_prefix=job_name_prefix,
                        job_id=job_id,
                        job_folder_path=job_folder_path,
                        conda_env_name=conda_env_name,
                        script_folder_path=script_folder_path,
                        script_name=script_name,
                        script_args=script_args,
                        nice=10000)

                job_id += 1

### 2.3 Gene Program & Gene Selection

Ablating:
- Gene Program Selection: Active GP Threshold Ratio, Group Lasso Regularization
- Gene Selection: L1 Regularization
- Spatial Neighborhood Graph: Number of Neighbors

#### 2.3.1 Xenium Human Breast Cancer

In [None]:
job_id = 1
for active_gp_thresh_ratio in [0., 0.05, 0.1]:
    for lambda_group_lasso in [0., 5., 10.]:
        for lambda_l1_masked in [0., 5., 10., 50.]:
            for n_neighbors in [4, 8, 12, 16]:
                task = "ablation"
                ablation_task = "gene_program_gene_selection"
                dataset = "xenium_human_breast_cancer"
                reference_batches = "batch1"
                species = "human"
                node_label_method = "one-hop-norm"
                edge_batch_size = 512
                lambda_edge_recon = 500000.
                lambda_gene_expr_recon = 300.

                job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
                job_folder_path = f"../scripts/{task}/slurm_jobs"
                script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
                script_name = "train_nichecompass_reference_model.py"
                script_args = f" --dataset {dataset}" \
                              f" --reference_batches {reference_batches}" \
                              f" --n_neighbors {n_neighbors}" \
                              " --no-filter_genes" \
                              " --nichenet_keep_target_genes_ratio 0.01" \
                              " --nichenet_max_n_target_genes_per_gp 250" \
                              " --include_mebocost_gps" \
                              f" --species {species}" \
                              " --gp_filter_mode subset" \
                              " --combine_overlap_gps" \
                              " --overlap_thresh_source_genes 0.9" \
                              " --overlap_thresh_target_genes 0.9" \
                              " --overlap_thresh_genes 0.9" \
                              " --counts_key counts" \
                              " --spatial_key spatial" \
                              " --adj_key spatial_connectivities" \
                              " --mapping_entity_key mapping_entity" \
                              " --gp_targets_mask_key nichecompass_gp_targets" \
                              " --gp_sources_mask_key nichecompass_gp_sources" \
                              " --gp_names_key nichecompass_gp_names" \
                              f" --model_label {ablation_task}_{task}" \
                              " --active_gp_names_key nichecompass_active_gp_names" \
                              " --latent_key nichecompass_latent" \
                              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                              " --gene_expr_recon_dist nb" \
                              " --log_variational" \
                              f" --node_label_method {node_label_method}" \
                              " --n_layers_encoder 1" \
                              " --n_hidden_encoder None" \
                              " --conv_layer_encoder gcnconv" \
                              " --n_epochs 100" \
                              " --n_epochs_all_gps 25" \
                              " --lr 0.001" \
                              f" --lambda_edge_recon {lambda_edge_recon}" \
                              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                              f" --lambda_group_lasso {lambda_group_lasso}" \
                              f" --lambda_l1_masked {lambda_l1_masked}" \
                              f" --edge_batch_size {edge_batch_size}" \
                              " --node_batch_size None" \
                              f" --timestamp_suffix _{job_id}"

                submit_python_script(
                        job_name_prefix=job_name_prefix,
                        job_id=job_id,
                        job_folder_path=job_folder_path,
                        conda_env_name=conda_env_name,
                        script_folder_path=script_folder_path,
                        script_name=script_name,
                        script_args=script_args,
                        nice=10000)

                job_id += 1

#### 2.3.2 STARmap PLUS Mouse Central Nervous System

In [None]:
job_id = 1
for active_gp_thresh_ratio in [0., 0.05, 0.1]:
    for lambda_group_lasso in [0, 5, 10]:
        for lambda_l1_masked in [0., 5., 10., 50., 100.]:
            for n_neighbors in [4, 8, 12, 16]:
                task = "ablation"
                ablation_task = "gene_program_gene_selection"
                dataset = "starmap_plus_mouse_cns"
                reference_batches = "batch1"
                species = "mouse"
                node_label_method = "one-hop-norm"
                edge_batch_size = 1024
                lambda_edge_recon = 500000.
                lambda_gene_expr_recon = 300.

                job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
                job_folder_path = f"../scripts/{task}/slurm_jobs"
                script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
                script_name = "train_nichecompass_reference_model.py"
                script_args = f" --dataset {dataset}" \
                              f" --reference_batches {reference_batches}" \
                              f" --n_neighbors {n_neighbors}" \
                              " --no-filter_genes" \
                              " --nichenet_keep_target_genes_ratio 1." \
                              " --nichenet_max_n_target_genes_per_gp 250" \
                              " --include_mebocost_gps" \
                              f" --species {species}" \
                              " --gp_filter_mode subset" \
                              " --combine_overlap_gps" \
                              " --overlap_thresh_source_genes 0.9" \
                              " --overlap_thresh_target_genes 0.9" \
                              " --overlap_thresh_genes 0.9" \
                              " --counts_key counts" \
                              " --spatial_key spatial" \
                              " --adj_key spatial_connectivities" \
                              " --mapping_entity_key mapping_entity" \
                              " --gp_targets_mask_key nichecompass_gp_targets" \
                              " --gp_sources_mask_key nichecompass_gp_sources" \
                              " --gp_names_key nichecompass_gp_names" \
                              f" --model_label {ablation_task}_{task}" \
                              " --active_gp_names_key nichecompass_active_gp_names" \
                              " --latent_key nichecompass_latent" \
                              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                              " --gene_expr_recon_dist nb" \
                              " --log_variational" \
                              f" --node_label_method {node_label_method}" \
                              " --n_layers_encoder 1" \
                              " --n_hidden_encoder None" \
                              " --conv_layer_encoder gcnconv" \
                              " --n_epochs 100" \
                              " --n_epochs_all_gps 25" \
                              " --lr 0.001" \
                              f" --lambda_edge_recon {lambda_edge_recon}" \
                              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                              f" --lambda_group_lasso {lambda_group_lasso}" \
                              f" --lambda_l1_masked 0." \
                              f" --edge_batch_size {edge_batch_size}" \
                              " --node_batch_size None" \
                              f" --timestamp_suffix _{job_id}"

                submit_python_script(
                        job_name_prefix=job_name_prefix,
                        job_id=job_id,
                        job_folder_path=job_folder_path,
                        conda_env_name=conda_env_name,
                        script_folder_path=script_folder_path,
                        script_name=script_name,
                        script_args=script_args,
                        nice=10000)

                job_id += 1

#### 2.3.3 Vizgen MERFISH Human Ovarian Cancer

In [None]:
job_id = 1
for active_gp_thresh_ratio in [0., 0.05, 0.1]:
    for lambda_group_lasso in [0, 5, 10]:
        for lambda_l1_masked in [0., 5., 10., 50., 100.]:
            for n_neighbors in [4, 8, 12, 16]:
                task = "ablation"
                ablation_task = "gene_program_gene_selection"
                dataset = "vizgen_merfish_human_ovarian_cancer"
                reference_batches = "batch2"
                species = "human"
                node_label_method = "one-hop-norm"
                edge_batch_size = 1024
                lambda_edge_recon = 500000.
                lambda_gene_expr_recon = 300.

                job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
                job_folder_path = f"../scripts/{task}/slurm_jobs"
                script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
                script_name = "train_nichecompass_reference_model.py"
                script_args = f" --dataset {dataset}" \
                              f" --reference_batches {reference_batches}" \
                              f" --n_neighbors {n_neighbors}" \
                              " --no-filter_genes" \
                              " --nichenet_keep_target_genes_ratio 1." \
                              " --nichenet_max_n_target_genes_per_gp 250" \
                              " --include_mebocost_gps" \
                              f" --species {species}" \
                              " --gp_filter_mode subset" \
                              " --combine_overlap_gps" \
                              " --overlap_thresh_source_genes 0.9" \
                              " --overlap_thresh_target_genes 0.9" \
                              " --overlap_thresh_genes 0.9" \
                              " --counts_key counts" \
                              " --spatial_key spatial" \
                              " --adj_key spatial_connectivities" \
                              " --mapping_entity_key mapping_entity" \
                              " --gp_targets_mask_key nichecompass_gp_targets" \
                              " --gp_sources_mask_key nichecompass_gp_sources" \
                              " --gp_names_key nichecompass_gp_names" \
                              f" --model_label {ablation_task}_{task}" \
                              " --active_gp_names_key nichecompass_active_gp_names" \
                              " --latent_key nichecompass_latent" \
                              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                              " --gene_expr_recon_dist nb" \
                              " --log_variational" \
                              f" --node_label_method {node_label_method}" \
                              " --n_layers_encoder 1" \
                              " --n_hidden_encoder None" \
                              " --conv_layer_encoder gcnconv" \
                              " --n_epochs 100" \
                              " --n_epochs_all_gps 25" \
                              " --lr 0.001" \
                              f" --lambda_edge_recon {lambda_edge_recon}" \
                              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                              f" --lambda_group_lasso {lambda_group_lasso}" \
                              f" --lambda_l1_masked 0." \
                              f" --edge_batch_size {edge_batch_size}" \
                              " --node_batch_size None" \
                              f" --timestamp_suffix _{job_id}"

                submit_python_script(
                        job_name_prefix=job_name_prefix,
                        job_id=job_id,
                        job_folder_path=job_folder_path,
                        conda_env_name=conda_env_name,
                        script_folder_path=script_folder_path,
                        script_name=script_name,
                        script_args=script_args,
                        nice=10000)

                job_id += 1

### 2.5 Categorical Covariate Embedding & Contrastive Integration

#### 2.5.1 Xenium Human Breast Cancer

In [None]:
job_id = 1
for cat_covariates_embeds_injection in ["encoder", "gene_expr_decoder"]:
for lambda_cat_covariates_contrastive in [0., 1, 2]:
    for contrastive_logits_pos_ratio in ["gcnconv", "gatv2conv"]:
        for n_hidden_encoder in [None]:
            for n_neighbors in [4, 8, 12, 16]:
                task = "ablation"
                ablation_task = "encoder_architecture"
                dataset = "xenium_human_breast_cancer"
                reference_batches = "batch1"
                species = "human"
                node_label_method = "one-hop-norm"
                edge_batch_size = 512
                lambda_edge_recon = 500000.
                lambda_gene_expr_recon = 300.
                active_gp_thresh_ratio = 0.05
                lambda_group_lasso = 0.
                n_layers_encoder = 1
                n_hidden_encoder = None
                conv_layer_encoder = "gcnconv"
                
                job_name_prefix = f"{dataset}_nichecompass_{ablation_task}_{task}"
                job_folder_path = f"../scripts/{task}/slurm_jobs"
                script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
                script_name = "train_nichecompass_reference_model.py"
                script_args = f" --dataset {dataset}" \
                              f" --reference_batches {reference_batches}" \
                              f" --n_neighbors {n_neighbors}" \
                              " --no-filter_genes" \
                              " --nichenet_keep_target_genes_ratio 1." \
                              " --nichenet_max_n_target_genes_per_gp 250" \
                              " --include_mebocost_gps" \
                              f" --species {species}" \
                              " --gp_filter_mode subset" \
                              " --combine_overlap_gps" \
                              " --overlap_thresh_source_genes 0.9" \
                              " --overlap_thresh_target_genes 0.9" \
                              " --overlap_thresh_genes 0.9" \
                              " --counts_key counts" \
                              f" --cat_covariates_keys {cat_covariates_keys}" \
                              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                              " --spatial_key spatial" \
                              " --adj_key spatial_connectivities" \
                              " --mapping_entity_key mapping_entity" \
                              " --gp_targets_mask_key nichecompass_gp_targets" \
                              " --gp_sources_mask_key nichecompass_gp_sources" \
                              " --gp_names_key nichecompass_gp_names" \
                              f" --model_label {ablation_task}_{task}" \
                              " --active_gp_names_key nichecompass_active_gp_names" \
                              " --latent_key nichecompass_latent" \
                              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                              " --gene_expr_recon_dist nb" \
                              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                              " --log_variational" \
                              f" --node_label_method {node_label_method}" \
                              f" --n_layers_encoder {n_layers_encoder}" \
                              f" --n_hidden_encoder {n_hidden_encoder}" \
                              f" --conv_layer_encoder {conv_layer_encoder}" \
                              " --n_epochs 100" \
                              " --n_epochs_all_gps 25" \
                              f" --n_epochs_no_cat_covariates_contrastive {n_epochs_no_cat_covariates_contrastive}" \
                              " --lr 0.001" \
                              f" --lambda_edge_recon {lambda_edge_recon}" \
                              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                              f" --lambda_group_lasso {lambda_group_lasso}" \
                              f" --lambda_l1_masked {lambda_l1_masked}" \
                              f" --edge_batch_size {edge_batch_size}" \
                              " --node_batch_size None" \
                              f" --timestamp_suffix _{job_id}"

                submit_python_script(
                        job_name_prefix=job_name_prefix,
                        job_id=job_id,
                        job_folder_path=job_folder_path,
                        conda_env_name=conda_env_name,
                        script_folder_path=script_folder_path,
                        script_name=script_name,
                        script_args=script_args,
                        nice=10000)

                job_id += 1

## 3. NicheCompass Single Sample Method Benchmarking Models Training

### 3.1 seqFISH Mouse Organogenesis

#### 3.1.1 Spatial Transcriptomics Data

In [None]:
task = "single_sample_method_benchmarking"
dataset = "seqfish_mouse_organogenesis_embryo2"
cell_type_key = "celltype_mapped_refined"
species = "mouse"
node_label_method = "one-hop-norm" # one-hop-norm, one-hop-attention
edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384" # 16384 is full dataset

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None None None" \
               " --seeds 0 1 2 3 4 5 6 7 8 9" \
               " --run_index 1 2 3 4 5 6 7 8 9 10" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1." \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {node_label_method}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --active_gp_thresh_ratio 0.05" \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method {node_label_method}" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               " --conv_layer_encoder gcnconv" \
               " --n_epochs 100" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 3.1.2 Spatial Transcriptomics Data Subsamples

In [None]:
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "single_sample_method_benchmarking"
    dataset = f"seqfish_mouse_organogenesis_subsample_{subsample_pct}pct_embryo2"
    cell_type_key = "celltype_mapped_refined"
    species = "mouse"
    node_label_method = "one-hop-attention" # one-hop-norm, one-hop-attention
    edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384" # 16384 is full dataset

    job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None None None" \
                   " --seeds 0 1 2 3 4 5 6 7 8 9" \
                   " --run_index 1 2 3 4 5 6 7 8 9 10" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1." \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {node_label_method}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --active_gp_thresh_ratio 0.05" \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method {node_label_method}" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   " --conv_layer_encoder gcnconv" \
                   " --n_epochs 100" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 500000." \
                   " --lambda_gene_expr_recon 300." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

### 3.2 STARmap PLUS Mouse Central Nervous System

#### 3.2.1 Spatial Transcriptomics Data

In [None]:
task = "single_sample_method_benchmarking"
dataset = "starmap_plus_mouse_cns_batch1"
cell_type_key = "Main_molecular_cell_type"
species = "mouse"
node_label_method = "one-hop-norm" # one-hop-norm, one-hop-attention
edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384" # 16384 is full dataset

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None None None" \
               " --seeds 0 1 2 3 4 5 6 7 8 9" \
               " --run_index 1 2 3 4 5 6 7 8 9 10" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1." \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {node_label_method}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --active_gp_thresh_ratio 0.05" \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method {node_label_method}" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               " --conv_layer_encoder gcnconv" \
               " --n_epochs 100" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 3.2.2 Spatial Transcriptomics Data Subsamples

In [None]:
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "single_sample_method_benchmarking"
    dataset = f"starmap_plus_mouse_cns_subsample_{subsample_pct}pct_batch1"
    cell_type_key = "Main_molecular_cell_type"
    species = "mouse"
    node_label_method = "one-hop-attention" # one-hop-norm, one-hop-attention
    edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384" # 16384 is full dataset

    job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None None None" \
                   " --seeds 0 1 2 3 4 5 6 7 8 9" \
                   " --run_index 1 2 3 4 5 6 7 8 9 10" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1." \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {node_label_method}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --active_gp_thresh_ratio 0.05" \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method {node_label_method}" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   " --conv_layer_encoder gcnconv" \
                   " --n_epochs 100" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 500000." \
                   " --lambda_gene_expr_recon 300." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

### 3.3 nanoString CosMx SMI Human Non-Small-Cell Lung Cancer (NSCLC)

#### 3.3.1 Spatial Transcriptomics Data

In [None]:
task = "single_sample_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
cell_type_key = "cell_type"
species = "mouse"
node_label_method = "one-hop-norm" # one-hop-norm, one-hop-attention
edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384" # 16384 is full dataset

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None None None" \
               " --seeds 0 1 2 3 4 5 6 7 8 9" \
               " --run_index 1 2 3 4 5 6 7 8 9 10" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1." \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {node_label_method}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --active_gp_thresh_ratio 0.05" \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method {node_label_method}" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               " --conv_layer_encoder gcnconv" \
               " --n_epochs 100" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 3.3.2 Spatial Transcriptomics Data Subsamples

In [None]:
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "single_sample_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    cell_type_key = "cell_type"
    species = "mouse"
    node_label_method = "one-hop-attention" # one-hop-norm, one-hop-attention
    edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384" # 16384 is full dataset

    job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None None None" \
                   " --seeds 0 1 2 3 4 5 6 7 8 9" \
                   " --run_index 1 2 3 4 5 6 7 8 9 10" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1." \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {node_label_method}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --active_gp_thresh_ratio 0.05" \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method {node_label_method}" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   " --conv_layer_encoder gcnconv" \
                   " --n_epochs 100" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 500000." \
                   " --lambda_gene_expr_recon 300." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

### 3.4 Vizgen MERFISH Mouse Liver

#### 3.4.1 Spatial Transcriptomics Data

In [None]:
task = "single_sample_method_benchmarking"
dataset = "vizgen_merfish_mouse_liver"
cell_type_key = "Cell_Type"
species = "mouse"
node_label_method = "one-hop-norm" # one-hop-norm, one-hop-attention
edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384" # 16384 is full dataset

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None None None" \
               " --seeds 0 1 2 3 4 5 6 7 8 9" \
               " --run_index 1 2 3 4 5 6 7 8 9 10" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1." \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {node_label_method}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --active_gp_thresh_ratio 0.05" \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method {node_label_method}" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               " --conv_layer_encoder gcnconv" \
               " --n_epochs 100" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 3.4.2 Spatial Transcriptomics Data Subsamples

In [None]:
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "single_sample_method_benchmarking"
    dataset = f"vizgen_merfish_mouse_liver_subsample_{subsample_pct}pct"
    cell_type_key = "Cell_Type"
    species = "mouse"
    node_label_method = "one-hop-attention" # one-hop-norm, one-hop-attention
    edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384" # 16384 is full dataset

    job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None None None" \
                   " --seeds 0 1 2 3 4 5 6 7 8 9" \
                   " --run_index 1 2 3 4 5 6 7 8 9 10" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1." \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {node_label_method}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --active_gp_thresh_ratio 0.05" \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method {node_label_method}" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   " --conv_layer_encoder gcnconv" \
                   " --n_epochs 100" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 500000." \
                   " --lambda_gene_expr_recon 300." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

### 3.5 Slide-seqV2 Mouse Hippocampus

#### 3.5.1 Spatial Transcriptomics Data

In [None]:
task = "single_sample_method_benchmarking"
dataset = "slideseqv2_mouse_hippocampus"
cell_type_key = "cell_type"
species = "mouse"
node_label_method = "one-hop-norm" # one-hop-norm, one-hop-attention
edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384" # 16384 is full dataset

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None None None" \
               " --seeds 0 1 2 3 4 5 6 7 8 9" \
               " --run_index 1 2 3 4 5 6 7 8 9 10" \
               f" --cell_type_key {cell_type_key}" \
               " --nichenet_keep_target_genes_ratio 1." \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               " --reference_batches None" \
               " --counts_key counts" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --mapping_entity_key mapping_entity" \
               " --no-filter_genes" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {node_label_method}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --active_gp_thresh_ratio 0.05" \
               " --gene_expr_recon_dist nb" \
               " --log_variational" \
               f" --node_label_method {node_label_method}" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               " --conv_layer_encoder gcnconv" \
               " --n_epochs 100" \
               " --n_epochs_all_gps 25" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               " --lambda_group_lasso 0." \
               " --lambda_l1_masked 0." \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 3.5.2 Spatial Transcriptomics Data Subsamples

In [None]:
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "single_sample_method_benchmarking"
    dataset = f"slideseqv2_mouse_hippocampus_subsample_{subsample_pct}pct"
    cell_type_key = "cell_type"
    species = "mouse"
    node_label_method = "one-hop-attention" # one-hop-norm, one-hop-attention
    edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384" # 16384 is full dataset

    job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None None None" \
                   " --seeds 0 1 2 3 4 5 6 7 8 9" \
                   " --run_index 1 2 3 4 5 6 7 8 9 10" \
                   f" --cell_type_key {cell_type_key}" \
                   " --nichenet_keep_target_genes_ratio 1." \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   " --reference_batches None" \
                   " --counts_key counts" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --mapping_entity_key mapping_entity" \
                   " --no-filter_genes" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {node_label_method}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --active_gp_thresh_ratio 0.05" \
                   " --gene_expr_recon_dist nb" \
                   " --log_variational" \
                   f" --node_label_method {node_label_method}" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   " --conv_layer_encoder gcnconv" \
                   " --n_epochs 100" \
                   " --n_epochs_all_gps 25" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 500000." \
                   " --lambda_gene_expr_recon 300." \
                   " --lambda_group_lasso 0." \
                   " --lambda_l1_masked 0." \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

## 4. NicheCompass Sample Integration Method Benchmarking Models Training

### 4.1 seqFISH Mouse Organogenesis

#### 4.1.1 Spatial Transcriptomics Data

In [None]:
task = "sample_integration_method_benchmarking"
dataset = "seqfish_mouse_organogenesis"
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
cell_type_key = "celltype_mapped_refined"
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_embeds_nums = "6"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None None None" \
               " --seeds 0 1 2 3 4 5 6 7 8 9" \
               " --run_index 1 2 3 4 5 6 7 8 9 10" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1." \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {node_label_method}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --active_gp_thresh_ratio 0.05" \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method {node_label_method}" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               " --conv_layer_encoder gcnconv" \
               " --n_epochs 100" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 4.1.2 Spatial Transcriptomics Data Subsamples

In [None]:
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "sample_integration_method_benchmarking"
    dataset = f"seqfish_mouse_organogenesis_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
    cell_type_key = "celltype_mapped_refined"
    species = "mouse"
    node_label_method = "one-hop-norm"
    edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_embeds_nums = "6"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.

    job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None None None" \
                   " --seeds 0 1 2 3 4 5 6 7 8 9" \
                   " --run_index 1 2 3 4 5 6 7 8 9 10" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1." \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {node_label_method}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --active_gp_thresh_ratio 0.05" \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method {node_label_method}" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   " --conv_layer_encoder gcnconv" \
                   " --n_epochs 100" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 500000." \
                   " --lambda_gene_expr_recon 300." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

### 4.2 STARmap PLUS Mouse Central Nervous System

#### 4.2.1 Spatial Transcriptomics Data

In [None]:
task = "sample_integration_method_benchmarking"
dataset = "starmap_plus_mouse_cns"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "Main_molecular_cell_type"
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None None None" \
               " --seeds 0 1 2 3 4 5 6 7 8 9" \
               " --run_index 1 2 3 4 5 6 7 8 9 10" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1." \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {node_label_method}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --active_gp_thresh_ratio 0.05" \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method {node_label_method}" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               " --conv_layer_encoder gcnconv" \
               " --n_epochs 100" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 4.2.2 Spatial Transcriptomics Data Subsamples

In [None]:
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "sample_integration_method_benchmarking"
    dataset = f"starmap_plus_mouse_cns_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "Main_molecular_cell_type"
    species = "mouse"
    node_label_method = "one-hop-norm"
    edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch"
    cat_covariates_embeds_nums = "3"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.

    job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None None None" \
                   " --seeds 0 1 2 3 4 5 6 7 8 9" \
                   " --run_index 1 2 3 4 5 6 7 8 9 10" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1." \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {node_label_method}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --active_gp_thresh_ratio 0.05" \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method {node_label_method}" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   " --conv_layer_encoder gcnconv" \
                   " --n_epochs 100" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 500000." \
                   " --lambda_gene_expr_recon 300." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

### 4.3 nanoString CosMx SMI Human Non-Small-Cell Lung Cancer (NSCLC)

#### 4.3.1 Spatial Transcriptomics Data

In [None]:
task = "sample_integration_method_benchmarking"
dataset = "nanostring_cosmx_human_nsclc"
reference_batches = "batch1 batch2 batch3"
cell_type_key = "cell_type"
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384"
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch fov"
cat_covariates_embeds_nums = "3 10"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_id = 1
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_benchmarking_models.py"
script_args =  " --adata_new_name None" \
               " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
               f" --edge_batch_size_list {edge_batch_size_str}" \
               " --node_batch_size_list None None None None None None None None None None" \
               " --seeds 0 1 2 3 4 5 6 7 8 9" \
               " --run_index 1 2 3 4 5 6 7 8 9 10" \
               f" --cell_type_key {cell_type_key}" \
               " --no-filter_genes" \
               " --nichenet_keep_target_genes_ratio 1." \
               " --nichenet_max_n_target_genes_per_gp 250" \
               " --include_mebocost_gps" \
               f" --species {species}" \
               " --gp_filter_mode subset" \
               " --combine_overlap_gps" \
               " --overlap_thresh_source_genes 0.9" \
               " --overlap_thresh_target_genes 0.9" \
               " --overlap_thresh_genes 0.9" \
               f" --dataset {dataset}" \
               f" --reference_batches {reference_batches}" \
               " --counts_key counts" \
               f" --cat_covariates_keys {cat_covariates_keys}" \
               f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
               " --spatial_key spatial" \
               " --adj_key spatial_connectivities" \
               " --gp_targets_mask_key nichecompass_gp_targets" \
               " --gp_sources_mask_key nichecompass_gp_sources" \
               " --gp_names_key nichecompass_gp_names" \
               f" --model_label {node_label_method}_{task}" \
               " --active_gp_names_key nichecompass_active_gp_names" \
               " --latent_key nichecompass_latent" \
               " --active_gp_thresh_ratio 0.05" \
               " --gene_expr_recon_dist nb" \
               f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
               f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
               " --log_variational" \
               f" --node_label_method {node_label_method}" \
               " --n_layers_encoder 1" \
               " --n_hidden_encoder None" \
               " --conv_layer_encoder gcnconv" \
               " --n_epochs 100" \
               " --n_epochs_all_gps 25" \
               " --n_epochs_no_cat_covariates_contrastive 0" \
               " --lr 0.001" \
               " --lambda_edge_recon 500000." \
               " --lambda_gene_expr_recon 300." \
               f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
               f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
               f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
               " --lambda_group_lasso 0." \
               f" --lambda_l1_masked 0." \
               f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

#### 4.3.2 Spatial Transcriptomics Data Subsamples

In [None]:
for subsample_pct in [50, 25, 10, 5, 1]:
    task = "sample_integration_method_benchmarking"
    dataset = f"nanostring_cosmx_human_nsclc_subsample_{subsample_pct}pct"
    reference_batches = "batch1 batch2 batch3"
    cell_type_key = "cell_type"
    species = "human"
    node_label_method = "one-hop-norm"
    edge_batch_size_str = "16384 16384 16384 16384 16384 16384 16384 16384 16384 16384"
    cat_covariates_embeds_injection = "gene_expr_decoder"
    cat_covariates_keys = "batch fov"
    cat_covariates_embeds_nums = "3 10"
    lambda_cat_covariates_contrastive = 0.
    contrastive_logits_pos_ratio = 0.
    contrastive_logits_neg_ratio = 0.

    job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
    job_id = 1
    job_folder_path = f"../scripts/{task}/slurm_jobs"
    script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
    script_name = "train_nichecompass_benchmarking_models.py"
    script_args =  " --adata_new_name None" \
                   " --n_neighbors_list 4 4 8 8 12 12 16 16 20 20" \
                   f" --edge_batch_size_list {edge_batch_size_str}" \
                   " --node_batch_size_list None None None None None None None None None None" \
                   " --seeds 0 1 2 3 4 5 6 7 8 9" \
                   " --run_index 1 2 3 4 5 6 7 8 9 10" \
                   f" --cell_type_key {cell_type_key}" \
                   " --no-filter_genes" \
                   " --nichenet_keep_target_genes_ratio 1." \
                   " --nichenet_max_n_target_genes_per_gp 250" \
                   " --include_mebocost_gps" \
                   f" --species {species}" \
                   " --gp_filter_mode subset" \
                   " --combine_overlap_gps" \
                   " --overlap_thresh_source_genes 0.9" \
                   " --overlap_thresh_target_genes 0.9" \
                   " --overlap_thresh_genes 0.9" \
                   f" --dataset {dataset}" \
                   f" --reference_batches {reference_batches}" \
                   " --counts_key counts" \
                   f" --cat_covariates_keys {cat_covariates_keys}" \
                   f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                   " --spatial_key spatial" \
                   " --adj_key spatial_connectivities" \
                   " --gp_targets_mask_key nichecompass_gp_targets" \
                   " --gp_sources_mask_key nichecompass_gp_sources" \
                   " --gp_names_key nichecompass_gp_names" \
                   f" --model_label {node_label_method}_{task}" \
                   " --active_gp_names_key nichecompass_active_gp_names" \
                   " --latent_key nichecompass_latent" \
                   " --active_gp_thresh_ratio 0.05" \
                   " --gene_expr_recon_dist nb" \
                   f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                   f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                   " --log_variational" \
                   f" --node_label_method {node_label_method}" \
                   " --n_layers_encoder 1" \
                   " --n_hidden_encoder None" \
                   " --conv_layer_encoder gcnconv" \
                   " --n_epochs 100" \
                   " --n_epochs_all_gps 25" \
                   " --n_epochs_no_cat_covariates_contrastive 0" \
                   " --lr 0.001" \
                   " --lambda_edge_recon 500000." \
                   " --lambda_gene_expr_recon 300." \
                   f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                   f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                   f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                   " --lambda_group_lasso 0." \
                   f" --lambda_l1_masked 0." \
                   f" --timestamp_suffix _{job_id}"

    submit_python_script(
            job_name_prefix=job_name_prefix,
            job_id=job_id,
            job_folder_path=job_folder_path,
            conda_env_name=conda_env_name,
            script_folder_path=script_folder_path,
            script_name=script_name,
            script_args=script_args,
            nice=10000)

## 5. NicheCompass Reference Model Training

### 5.1 seqFISH Mouse Organogenesis Imputed

In [9]:
job_id = 1
for lambda_edge_recon in [50000, 500000, 5000000]:
    for lambda_gene_expr_recon in [30, 300, 3000]:
            for active_gp_thresh_ratio in [0., 0.05]:
                for lambda_l1_masked in [0, 5, 10]:
                    for lambda_l1_addon in [5, 10]:
                        for nichenet_keep_target_genes_ratio in [1.0]:
                            for conv_layer_encoder in ["gatv2conv"]: 
                                task = "reference"
                                dataset = "seqfish_mouse_organogenesis_imputed"
                                reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
                                n_neighbors = 12
                                n_sampled_neighbors = -1
                                n_hvg = 4000
                                species = "mouse"
                                node_label_method = "one-hop-norm"
                                edge_batch_size = "512" # "512"
                                cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
                                cat_covariates_keys = "batch"
                                cat_covariates_no_edges = "True"
                                cat_covariates_embeds_nums = "3"
                                lambda_cat_covariates_contrastive = 0.
                                contrastive_logits_pos_ratio = 0.
                                contrastive_logits_neg_ratio = 0.

                                job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
                                job_folder_path = f"../scripts/{task}/slurm_jobs"
                                script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
                                script_name = "train_nichecompass_reference_model.py"
                                script_args = f" --dataset {dataset}" \
                                              f" --reference_batches {reference_batches}" \
                                              f" --n_neighbors {n_neighbors}" \
                                              " --filter_genes" \
                                              f" --n_hvg {n_hvg}" \
                                              f" --nichenet_keep_target_genes_ratio {nichenet_keep_target_genes_ratio}" \
                                              " --nichenet_max_n_target_genes_per_gp 250" \
                                              " --include_mebocost_gps" \
                                              f" --species {species}" \
                                              " --gp_filter_mode subset" \
                                              " --combine_overlap_gps" \
                                              " --overlap_thresh_source_genes 0.9" \
                                              " --overlap_thresh_target_genes 0.9" \
                                              " --overlap_thresh_genes 0.9" \
                                              " --counts_key counts" \
                                              " --condition_key batch" \
                                              f" --cat_covariates_keys {cat_covariates_keys}" \
                                              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                                              " --spatial_key spatial" \
                                              " --adj_key spatial_connectivities" \
                                              " --mapping_entity_key mapping_entity" \
                                              " --gp_targets_mask_key nichecompass_gp_targets" \
                                              " --gp_sources_mask_key nichecompass_gp_sources" \
                                              " --gp_names_key nichecompass_gp_names" \
                                              f" --model_label {node_label_method}_{task}" \
                                              " --active_gp_names_key nichecompass_active_gp_names" \
                                              " --latent_key nichecompass_latent" \
                                              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                                              " --gene_expr_recon_dist nb" \
                                              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                                              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                                              " --log_variational" \
                                              f" --node_label_method {node_label_method}" \
                                              " --n_layers_encoder 1" \
                                              " --n_hidden_encoder None" \
                                              f" --conv_layer_encoder {conv_layer_encoder}" \
                                              " --n_epochs 100" \
                                              " --n_epochs_all_gps 25" \
                                              " --n_epochs_no_cat_covariates_contrastive 0" \
                                              " --lr 0.001" \
                                              f" --lambda_edge_recon {lambda_edge_recon}" \
                                              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                                              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                                              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                                              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                                              " --lambda_group_lasso 0." \
                                              f" --lambda_l1_masked {lambda_l1_masked}" \
                                              f" --lambda_l1_addon {lambda_l1_addon}" \
                                              f" --edge_batch_size {edge_batch_size}" \
                                              " --node_batch_size None" \
                                              f" --n_sampled_neighbors {n_sampled_neighbors}" \
                                              f" --timestamp_suffix _{job_id}"

                                submit_python_script(
                                        job_name_prefix=job_name_prefix,
                                        job_id=job_id,
                                        job_folder_path=job_folder_path,
                                        conda_env_name=conda_env_name,
                                        script_folder_path=script_folder_path,
                                        script_name=script_name,
                                        script_args=script_args,
                                        nice=10000)

                                job_id += 1

Submitted batch job 12669884
Submitted batch job 12669885
Submitted batch job 12669886
Submitted batch job 12669887
Submitted batch job 12669888
Submitted batch job 12669889
Submitted batch job 12669890
Submitted batch job 12669891
Submitted batch job 12669892
Submitted batch job 12669893
Submitted batch job 12669894
Submitted batch job 12669895
Submitted batch job 12669896
Submitted batch job 12669897
Submitted batch job 12669898
Submitted batch job 12669899
Submitted batch job 12669900
Submitted batch job 12669901
Submitted batch job 12669902
Submitted batch job 12669903
Submitted batch job 12669904
Submitted batch job 12669905
Submitted batch job 12669906
Submitted batch job 12669907
Submitted batch job 12669908
Submitted batch job 12669909
Submitted batch job 12669910
Submitted batch job 12669911
Submitted batch job 12669912
Submitted batch job 12669913
Submitted batch job 12669914
Submitted batch job 12669915
Submitted batch job 12669916
Submitted batch job 12669917
Submitted batc

In [5]:
# tmp
job_id = 2
for lambda_edge_recon in [500000]:
    for lambda_gene_expr_recon in [300]:
            for active_gp_thresh_ratio in [0.05]:
                for lambda_l1_masked in [5.]:
                    for lambda_l1_addon in [5.]:
                        for nichenet_keep_target_genes_ratio in [1.0]:
                            for conv_layer_encoder in ["gcnconv"]: 
                                task = "reference"
                                dataset = "seqfish_mouse_organogenesis_imputed"
                                reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
                                n_neighbors = 4
                                n_sampled_neighbors = -1
                                n_hvg = 4000
                                species = "mouse"
                                node_label_method = "one-hop-norm"
                                edge_batch_size = "4096"
                                cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
                                cat_covariates_keys = "batch"
                                cat_covariates_no_edges = "True"
                                cat_covariates_embeds_nums = "3"
                                lambda_cat_covariates_contrastive = 0.
                                contrastive_logits_pos_ratio = 0.
                                contrastive_logits_neg_ratio = 0.

                                job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
                                job_folder_path = f"../scripts/{task}/slurm_jobs"
                                script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
                                script_name = "train_nichecompass_reference_model.py"
                                script_args = f" --dataset {dataset}" \
                                              f" --reference_batches {reference_batches}" \
                                              f" --n_neighbors {n_neighbors}" \
                                              " --filter_genes" \
                                              f" --n_hvg {n_hvg}" \
                                              f" --nichenet_keep_target_genes_ratio {nichenet_keep_target_genes_ratio}" \
                                              " --nichenet_max_n_target_genes_per_gp 250" \
                                              " --include_mebocost_gps" \
                                              f" --species {species}" \
                                              " --gp_filter_mode subset" \
                                              " --combine_overlap_gps" \
                                              " --overlap_thresh_source_genes 0.9" \
                                              " --overlap_thresh_target_genes 0.9" \
                                              " --overlap_thresh_genes 0.9" \
                                              " --counts_key counts" \
                                              " --condition_key batch" \
                                              f" --cat_covariates_keys {cat_covariates_keys}" \
                                              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                                              " --spatial_key spatial" \
                                              " --adj_key spatial_connectivities" \
                                              " --mapping_entity_key mapping_entity" \
                                              " --gp_targets_mask_key nichecompass_gp_targets" \
                                              " --gp_sources_mask_key nichecompass_gp_sources" \
                                              " --gp_names_key nichecompass_gp_names" \
                                              f" --model_label {node_label_method}_{task}" \
                                              " --active_gp_names_key nichecompass_active_gp_names" \
                                              " --latent_key nichecompass_latent" \
                                              f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                                              " --gene_expr_recon_dist nb" \
                                              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                                              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                                              " --log_variational" \
                                              f" --node_label_method {node_label_method}" \
                                              " --n_layers_encoder 1" \
                                              " --n_hidden_encoder None" \
                                              f" --conv_layer_encoder {conv_layer_encoder}" \
                                              " --n_epochs 100" \
                                              " --n_epochs_all_gps 25" \
                                              " --n_epochs_no_cat_covariates_contrastive 0" \
                                              " --lr 0.001" \
                                              f" --lambda_edge_recon {lambda_edge_recon}" \
                                              f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                                              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                                              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                                              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                                              " --lambda_group_lasso 0." \
                                              f" --lambda_l1_masked {lambda_l1_masked}" \
                                              f" --lambda_l1_addon {lambda_l1_addon}" \
                                              f" --edge_batch_size {edge_batch_size}" \
                                              " --node_batch_size None" \
                                              f" --n_sampled_neighbors {n_sampled_neighbors}" \
                                              f" --timestamp_suffix _{job_id}"

                                submit_python_script(
                                        job_name_prefix=job_name_prefix,
                                        job_id=job_id,
                                        job_folder_path=job_folder_path,
                                        conda_env_name=conda_env_name,
                                        script_folder_path=script_folder_path,
                                        script_name=script_name,
                                        script_args=script_args,
                                        nice=10000)

                                job_id += 1

Submitted batch job 12668637


In [None]:
task = "reference"
dataset = "seqfish_mouse_organogenesis_imputed"
job_id = 4
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6"
n_neighbors = 12
n_hvg = 4000
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = "4096"
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 30.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              " --nichenet_keep_target_genes_ratio 0.1" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {node_label_method}_{task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0." \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 50000000." \
              " --lambda_gene_expr_recon 30000." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

### 5.2 STARmap PLUS Mouse Central Nervous System

In [None]:
task = "reference"
dataset = "starmap_plus_mouse_cns"
job_id = 1
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8" \
                    " batch9 batch10 batch11 batch12 batch13 batch14 batch15 batch16 batch17 batch18" \
                    " batch19 batch20"
reference_batches = "batch1 batch2 batch3"
n_neighbors = 12 # 8, 12
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = 2048 # 2048
cat_covariates_embeds_injection = "encoder gene_expr_decoder"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0. # 0., 10000, 100000, 500000
contrastive_logits_pos_ratio = 0. # 0., 0.125, 0.0625, 0.03125
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1." \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {node_label_method}_{task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
task = "reference"
dataset = "starmap_plus_mouse_cns"
job_id = 3
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8" \
                    " batch9 batch10 batch11 batch12 batch13 batch14 batch15 batch16 batch17 batch18" \
                    " batch19 batch20"
n_neighbors = 12 # 8, 12
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = 1024 # 2048
cat_covariates_embeds_injection = "encoder gene_expr_decoder"
n_cond_embed = 20 # 3, 20, None
lambda_cat_covariates_contrastive = 250000. # 0., 10000, 100000, 500000
contrastive_logits_pos_ratio = 0.03125 # 0., 0.125, 0.0625, 0.03125
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1." \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {node_label_method}_{task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --n_cond_embed {n_cond_embed}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

### 5.3 nanoString CosMx SMI Human Non-Small-Cell Lung Cancer (NSCLC)

In [None]:
task = "reference"
dataset = "nanostring_cosmx_human_nsclc_filtered"
job_id = 1
reference_batches = "batch1 batch2 batch3"
n_neighbors = 20 # 4, 8, 12
n_sampled_neighbors = 4
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size = 2048 # 4096, 2048, 512
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 0.01" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {node_label_method}_{task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0." \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
task = "reference"
dataset = "nanostring_cosmx_human_nsclc"
job_id = 1
reference_batches = "batch1 batch2 batch3"
n_neighbors = 12 # 4, 8, 12
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size = 1024 # 4096, 2048, 512
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "fov"
cat_covariates_no_edges = "False"
cat_covariates_embeds_nums = "10"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1." \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {node_label_method}_{task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
task = "reference"
dataset = "nanostring_cosmx_human_nsclc"
job_id = 10
reference_batches = "batch1 batch2 batch3"
n_neighbors = 12 # 4, 8, 12
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size = 1024 # 4096, 2048, 512
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "batch fov"
cat_covariates_no_edges = "True False"
cat_covariates_embeds_nums = "3 10"
lambda_cat_covariates_contrastive = 100000.
contrastive_logits_pos_ratio = 0.0625
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1." \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {node_label_method}_{task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
task = "reference"
dataset = "nanostring_cosmx_human_nsclc"
job_id = 11
reference_batches = "batch1 batch2 batch3"
n_neighbors = 12 # 4, 8, 12
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size = 1024 # 4096, 2048, 512
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = None
cat_covariates_no_edges = None
cat_covariates_embeds_nums = None
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1." \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {node_label_method}_{task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

In [None]:
task = "reference"
dataset = "nanostring_cosmx_human_nsclc"
job_id = 15
reference_batches = "batch1 batch2 batch3 batch4 batch5 batch6 batch7 batch8"
n_neighbors = 12 # 4, 8, 12
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size = 512 # 4096, 2048, 512
cat_covariates_embeds_injection = "encoder gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "patient batch fov"
cat_covariates_embeds_nums = "5 8 20"
lambda_cat_covariates_contrastive = 250000.
contrastive_logits_pos_ratio = 0.0625
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1." \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {node_label_method}_{task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

### 5.4 Vizgen MERFISH Human Ovarian Cancer

In [None]:
task = "reference"
dataset = "vizgen_merfish_human_ovarian_cancer"
job_id = 2
reference_batches = "batch1 batch2 batch3 batch4"
n_neighbors = 4 # 8, 12
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size = 4096 # 2048
cat_covariates_embeds_injection = "encoder gene_expr_decoder"
cat_covariates_keys = "patient batch"
cat_covariates_no_edges = "True True"
cat_covariates_embeds_nums = "2 4"
lambda_cat_covariates_contrastive = 0. # 0., 10000, 100000, 500000
contrastive_logits_pos_ratio = 0. # 0., 0.125, 0.0625, 0.03125
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1." \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {node_label_method}_{task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

### 5.4 Vizgen MERFISH Human Lung Cancer

In [None]:
task = "reference"
dataset = "vizgen_merfish_human_lung_cancer"
job_id = 1
reference_batches = "batch1 batch2"
n_neighbors = 4 # 8, 12
species = "human"
node_label_method = "one-hop-norm"
edge_batch_size = 4096 # 2048
cat_covariates_embeds_injection = "encoder gene_expr_decoder"
cat_covariates_embeds_nums = "2"
lambda_cat_covariates_contrastive = 250000. # 0., 10000, 100000, 500000
contrastive_logits_pos_ratio = 0.125 # 0., 0.125, 0.0625, 0.03125
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1." \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {node_label_method}_{task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

### 5.5 Spatial ATAC-RNA-Seq Mouse Embryo & Brain

In [12]:
job_id = 100001
for lambda_edge_recon in [500000]:
    for lambda_gene_expr_recon in [300]:
        for lambda_chrom_access_recon in [300]: # [100, 300, 1000, 3000, 10000, 30000]
            for active_gp_thresh_ratio in [0.]: # [0., 0.03, 0.05, 0.1]
                for lambda_l1_masked in [5.]: # [0, 5, 10, 50]
                    for lambda_l1_addon in [5.]: # [0, 5, 10, 50]
                        for nichenet_keep_target_genes_ratio in [1.0]: # [1.0, 0.1]
                            task = "reference"
                            dataset = "spatial_atac_rna_seq_mouse_brain_batch2"
                            reference_batches = "None"
                            n_neighbors = 8
                            species = "mouse"
                            node_label_method = "one-hop-norm"
                            edge_batch_size = "4096"
                            n_sampled_neighbors = 4
                            lambda_cat_covariates_contrastive = 0.
                            contrastive_logits_pos_ratio = 0.
                            contrastive_logits_neg_ratio = 0.
                            cat_covariates_embeds_injection = "None"
                            cat_covariates_keys = "batch"
                            cat_covariates_no_edges = "True"
                            cat_covariates_embeds_nums = 0
                            n_hvg = 3000

                            job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
                            job_folder_path = f"../scripts/{task}/slurm_jobs"
                            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
                            script_name = "train_nichecompass_reference_model.py"
                            script_args = f" --dataset {dataset}" \
                                          f" --reference_batches {reference_batches}" \
                                          f" --n_neighbors {n_neighbors}" \
                                          " --filter_genes" \
                                          f" --n_hvg {n_hvg}" \
                                          f" --nichenet_keep_target_genes_ratio {nichenet_keep_target_genes_ratio}" \
                                          " --nichenet_max_n_target_genes_per_gp 250" \
                                          " --include_mebocost_gps" \
                                          " --include_collectri_gps" \
                                          f" --species {species}" \
                                          " --gp_filter_mode subset" \
                                          " --combine_overlap_gps" \
                                          " --overlap_thresh_source_genes 0.9" \
                                          " --overlap_thresh_target_genes 0.9" \
                                          " --overlap_thresh_genes 0.9" \
                                          " --counts_key counts" \
                                          " --condition_key batch" \
                                          f" --cat_covariates_keys {cat_covariates_keys}" \
                                          f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                                          " --spatial_key spatial" \
                                          " --adj_key spatial_connectivities" \
                                          " --mapping_entity_key mapping_entity" \
                                          " --gp_targets_mask_key nichecompass_gp_targets" \
                                          " --gp_sources_mask_key nichecompass_gp_sources" \
                                          " --gp_names_key nichecompass_gp_names" \
                                          " --include_atac_modality" \
                                          " --filter_peaks" \
                                          " --min_cell_peak_thresh_ratio 0.0005" \
                                          f" --model_label {node_label_method}_{task}" \
                                          " --active_gp_names_key nichecompass_active_gp_names" \
                                          " --latent_key nichecompass_latent" \
                                          f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                                          " --gene_expr_recon_dist nb" \
                                          f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                                          f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                                          " --log_variational" \
                                          f" --node_label_method {node_label_method}" \
                                          " --n_layers_encoder 1" \
                                          " --n_hidden_encoder None" \
                                          " --conv_layer_encoder gcnconv" \
                                          " --n_epochs 100" \
                                          " --n_epochs_all_gps 25" \
                                          " --n_epochs_no_cat_covariates_contrastive 0" \
                                          " --lr 0.001" \
                                          f" --lambda_edge_recon {lambda_edge_recon}" \
                                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                                          f" --lambda_chrom_access_recon {lambda_chrom_access_recon}" \
                                          f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                                          f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                                          f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                                          " --lambda_group_lasso 0." \
                                          f" --lambda_l1_masked {lambda_l1_masked}" \
                                          f" --edge_batch_size {edge_batch_size}" \
                                          " --node_batch_size None" \
                                          f" --n_sampled_neighbors {n_sampled_neighbors}" \
                                          f" --timestamp_suffix _{job_id}"

                            submit_python_script(
                                    job_name_prefix=job_name_prefix,
                                    job_id=job_id,
                                    job_folder_path=job_folder_path,
                                    conda_env_name=conda_env_name,
                                    script_folder_path=script_folder_path,
                                    script_name=script_name,
                                    script_args=script_args,
                                    nice=10000)

                            job_id += 1

Submitted batch job 12640711


In [11]:
job_id = 1
for lambda_edge_recon in [500000, 5000000, 50000000]:
    for lambda_gene_expr_recon in [300, 3000, 30000]:
        for lambda_chrom_access_recon in [300, 3000, 30000]: # [100, 300, 1000, 3000, 10000, 30000]
            for active_gp_thresh_ratio in [0., 0.05, 0.1]: # [0., 0.03, 0.05, 0.1]
                for lambda_l1_masked in [0, 5]: # [0, 5, 10, 50]
                    for lambda_l1_addon in [5.]: # [0, 5, 10, 50]
                        for nichenet_keep_target_genes_ratio in [0.1]: # [1.0, 0.1]
                            task = "reference"
                            dataset = "spatial_atac_rna_seq_mouse_brain_batch2"
                            reference_batches = "None"
                            n_neighbors = 8
                            species = "mouse"
                            node_label_method = "one-hop-norm"
                            edge_batch_size = "4096"
                            n_sampled_neighbors = 4
                            lambda_cat_covariates_contrastive = 0.
                            contrastive_logits_pos_ratio = 0.
                            contrastive_logits_neg_ratio = 0.
                            cat_covariates_embeds_injection = "None"
                            cat_covariates_keys = "batch"
                            cat_covariates_no_edges = "True"
                            cat_covariates_embeds_nums = 0
                            n_hvg = 3000

                            job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
                            job_folder_path = f"../scripts/{task}/slurm_jobs"
                            script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
                            script_name = "train_nichecompass_reference_model.py"
                            script_args = f" --dataset {dataset}" \
                                          f" --reference_batches {reference_batches}" \
                                          f" --n_neighbors {n_neighbors}" \
                                          " --filter_genes" \
                                          f" --n_hvg {n_hvg}" \
                                          f" --nichenet_keep_target_genes_ratio {nichenet_keep_target_genes_ratio}" \
                                          " --nichenet_max_n_target_genes_per_gp 250" \
                                          " --include_mebocost_gps" \
                                          " --include_collectri_gps" \
                                          f" --species {species}" \
                                          " --gp_filter_mode subset" \
                                          " --combine_overlap_gps" \
                                          " --overlap_thresh_source_genes 0.9" \
                                          " --overlap_thresh_target_genes 0.9" \
                                          " --overlap_thresh_genes 0.9" \
                                          " --counts_key counts" \
                                          " --condition_key batch" \
                                          f" --cat_covariates_keys {cat_covariates_keys}" \
                                          f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
                                          " --spatial_key spatial" \
                                          " --adj_key spatial_connectivities" \
                                          " --mapping_entity_key mapping_entity" \
                                          " --gp_targets_mask_key nichecompass_gp_targets" \
                                          " --gp_sources_mask_key nichecompass_gp_sources" \
                                          " --gp_names_key nichecompass_gp_names" \
                                          " --include_atac_modality" \
                                          " --filter_peaks" \
                                          " --min_cell_peak_thresh_ratio 0.0005" \
                                          f" --model_label {node_label_method}_{task}" \
                                          " --active_gp_names_key nichecompass_active_gp_names" \
                                          " --latent_key nichecompass_latent" \
                                          f" --active_gp_thresh_ratio {active_gp_thresh_ratio}" \
                                          " --gene_expr_recon_dist nb" \
                                          f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
                                          f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
                                          " --log_variational" \
                                          f" --node_label_method {node_label_method}" \
                                          " --n_layers_encoder 1" \
                                          " --n_hidden_encoder None" \
                                          " --conv_layer_encoder gcnconv" \
                                          " --n_epochs 100" \
                                          " --n_epochs_all_gps 25" \
                                          " --n_epochs_no_cat_covariates_contrastive 0" \
                                          " --lr 0.001" \
                                          f" --lambda_edge_recon {lambda_edge_recon}" \
                                          f" --lambda_gene_expr_recon {lambda_gene_expr_recon}" \
                                          f" --lambda_chrom_access_recon {lambda_chrom_access_recon}" \
                                          f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
                                          f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
                                          f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
                                          " --lambda_group_lasso 0." \
                                          f" --lambda_l1_masked {lambda_l1_masked}" \
                                          f" --edge_batch_size {edge_batch_size}" \
                                          " --node_batch_size None" \
                                          f" --n_sampled_neighbors {n_sampled_neighbors}" \
                                          f" --timestamp_suffix _{job_id}"

                            submit_python_script(
                                    job_name_prefix=job_name_prefix,
                                    job_id=job_id,
                                    job_folder_path=job_folder_path,
                                    conda_env_name=conda_env_name,
                                    script_folder_path=script_folder_path,
                                    script_name=script_name,
                                    script_args=script_args,
                                    nice=10000)

                            job_id += 1

Submitted batch job 12663365
Submitted batch job 12663366
Submitted batch job 12663367
Submitted batch job 12663368
Submitted batch job 12663369
Submitted batch job 12663370
Submitted batch job 12663371
Submitted batch job 12663372
Submitted batch job 12663373
Submitted batch job 12663374
Submitted batch job 12663375
Submitted batch job 12663376
Submitted batch job 12663377
Submitted batch job 12663378
Submitted batch job 12663379
Submitted batch job 12663380
Submitted batch job 12663381
Submitted batch job 12663382
Submitted batch job 12663383
Submitted batch job 12663384
Submitted batch job 12663385
Submitted batch job 12663386
Submitted batch job 12663387
Submitted batch job 12663388
Submitted batch job 12663389
Submitted batch job 12663390
Submitted batch job 12663391
Submitted batch job 12663392
Submitted batch job 12663393
Submitted batch job 12663394
Submitted batch job 12663395
Submitted batch job 12663396
Submitted batch job 12663397
Submitted batch job 12663398
Submitted batc

### 5.6 Xenium Human Breast Cancer

In [None]:
task = "reference"
dataset = "xenium_human_breast_cancer"
job_id = 2
reference_batches = "batch1 batch2"
n_neighbors = 4 # 4, 8, 12
species = "human"
node_label_method = "one-hop-attention"
edge_batch_size = 4096 # 4096 (4, 8), 1024 (12)
cat_covariates_embeds_injection = "encoder gene_expr_decoder"
cat_covariates_embeds_nums = "2"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0. # 0., 5., 10.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 1." \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {node_label_method}_{task}" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=10000)

## 6. NicheCompass Reference Query Mapping

### 6.1 seqFISH Mouse Organogenesis Imputed

#### 6.1.1 Reference Model Training

In [None]:
task = "reference_query"
dataset = "seqfish_mouse_organogenesis_imputed"
job_id = 1
reference_batches = "batch1 batch2 batch3 batch4"
n_neighbors = 12
n_hvg = 3000
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = 4096
cat_covariates_embeds_injection = "gene_expr_decoder" # "encoder gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 5.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}_reference_only"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --filter_genes" \
              f" --n_hvg {n_hvg}" \
              " --nichenet_keep_target_genes_ratio 0.1" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              " --condition_key batch" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {node_label_method}_{task}_reference_only" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0.05" \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        t="12:00:00",
        p="interactive_gpu_p",
        gres="gpu:1",
        qos="interactive_gpu",
        nice=9999)

#### 6.1.2 Query Mapping

In [None]:
task = "reference_query"
dataset = "seqfish_mouse_organogenesis_imputed"
job_id = 1
query_batches = "batch5 batch6"
n_neighbors = 12
node_label_method = "one-hop-norm"
load_timestamp = "01072023_165203_1"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 5.
edge_batch_size = 4096

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}_query_mapping"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --query_batches {query_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --spatial_key spatial" \
              " --mapping_entity_key mapping_entity" \
              " --gp_names_key nichecompass_gp_names" \
              f" --reference_model_label {node_label_method}_{task}_reference_only" \
              f" --load_timestamp {load_timestamp}" \
              f" --query_model_label {node_label_method}_{task}_query_only" \
              f" --reference_query_model_label {node_label_method}_{task}_query_mapping" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        t="12:00:00",
        p="interactive_gpu_p",
        gres="gpu:1",
        qos="interactive_gpu",
        nice=9999)

### 6.2 STARmap PLUS Mouse Central Nervous System

#### 6.2.1 Reference Model Training

In [None]:
task = "reference_query"
dataset = "starmap_plus_mouse_cns"
job_id = 4
reference_batches = "batch1 batch2"
n_neighbors = 8 # 8, 12
n_sampled_neighbors = 4
species = "mouse"
node_label_method = "one-hop-norm"
edge_batch_size = 1024
cat_covariates_embeds_injection = "gene_expr_decoder"
cat_covariates_keys = "batch"
cat_covariates_no_edges = "True"
cat_covariates_embeds_nums = "3"
lambda_cat_covariates_contrastive = 250000. # 0., 10000, 100000, 500000
contrastive_logits_pos_ratio = 0.0625 # 0., 0.125, 0.0625, 0.03125
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 0.

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "train_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --reference_batches {reference_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --no-filter_genes" \
              " --nichenet_keep_target_genes_ratio 0.1" \
              " --nichenet_max_n_target_genes_per_gp 250" \
              " --include_mebocost_gps" \
              f" --species {species}" \
              " --gp_filter_mode subset" \
              " --combine_overlap_gps" \
              " --overlap_thresh_source_genes 0.9" \
              " --overlap_thresh_target_genes 0.9" \
              " --overlap_thresh_genes 0.9" \
              " --counts_key counts" \
              f" --cat_covariates_keys {cat_covariates_keys}" \
              f" --cat_covariates_no_edges {cat_covariates_no_edges}" \
              " --spatial_key spatial" \
              " --adj_key spatial_connectivities" \
              " --mapping_entity_key mapping_entity" \
              " --gp_targets_mask_key nichecompass_gp_targets" \
              " --gp_sources_mask_key nichecompass_gp_sources" \
              " --gp_names_key nichecompass_gp_names" \
              f" --model_label {node_label_method}_{task}_reference_only" \
              " --active_gp_names_key nichecompass_active_gp_names" \
              " --latent_key nichecompass_latent" \
              " --active_gp_thresh_ratio 0." \
              " --gene_expr_recon_dist nb" \
              f" --cat_covariates_embeds_injection {cat_covariates_embeds_injection}" \
              f" --cat_covariates_embeds_nums {cat_covariates_embeds_nums}" \
              " --log_variational" \
              f" --node_label_method {node_label_method}" \
              " --n_layers_encoder 1" \
              " --n_hidden_encoder None" \
              " --conv_layer_encoder gcnconv" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \
              f" --n_sampled_neighbors {n_sampled_neighbors}" \
              f" --timestamp_suffix _{job_id}"

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        nice=9999)

#### 6.2.2 Query Mapping

In [None]:
task = "reference_query"
dataset = "starmap_plus_mouse_cns"
job_id = 1
query_batches = "batch3"
n_neighbors = 12
node_label_method = "one-hop-norm"
load_timestamp = "06072023_114143_1"
lambda_cat_covariates_contrastive = 0.
contrastive_logits_pos_ratio = 0.
contrastive_logits_neg_ratio = 0.
lambda_l1_masked = 5.
edge_batch_size = 1024

job_name_prefix = f"{dataset}_nichecompass_{node_label_method}_{task}_query_mapping"
job_folder_path = f"../scripts/{task}/slurm_jobs"
script_folder_path = f"/home/aih/sebastian.birk/workspace/projects/nichecompass-reproducibility/scripts/{task}"
script_name = "map_query_on_nichecompass_reference_model.py"
script_args = f" --dataset {dataset}" \
              f" --query_batches {query_batches}" \
              f" --n_neighbors {n_neighbors}" \
              " --spatial_key spatial" \
              " --mapping_entity_key mapping_entity" \
              " --gp_names_key nichecompass_gp_names" \
              f" --reference_model_label {node_label_method}_{task}_reference_only" \
              f" --load_timestamp {load_timestamp}" \
              f" --query_model_label {node_label_method}_{task}_query_only" \
              f" --reference_query_model_label {node_label_method}_{task}_query_mapping" \
              " --n_epochs 100" \
              " --n_epochs_all_gps 25" \
              " --n_epochs_no_cat_covariates_contrastive 0" \
              " --lr 0.001" \
              " --lambda_edge_recon 500000." \
              " --lambda_gene_expr_recon 300." \
              f" --lambda_cat_covariates_contrastive {lambda_cat_covariates_contrastive}" \
              f" --contrastive_logits_pos_ratio {contrastive_logits_pos_ratio}" \
              f" --contrastive_logits_neg_ratio {contrastive_logits_neg_ratio}" \
              " --lambda_group_lasso 0." \
              f" --lambda_l1_masked {lambda_l1_masked}" \
              f" --edge_batch_size {edge_batch_size}" \
              " --node_batch_size None" \

submit_python_script(
        job_name_prefix=job_name_prefix,
        job_id=job_id,
        job_folder_path=job_folder_path,
        conda_env_name=conda_env_name,
        script_folder_path=script_folder_path,
        script_name=script_name,
        script_args=script_args,
        t="12:00:00",
        p="interactive_gpu_p",
        gres="gpu:1",
        qos="interactive_gpu",
        nice=9999)