## TODO: rerun other simulations later: we updated some functions.

# Simulation code for Root Cause Discovery

We perform 3 simulations: the first two is in low-dimensional setting, while the last mimicks our real data analysis (high-dimensional setting). All simulation uses the following model

+ X <- mu_x+ BX +error + delta, 
+ X = (I-B)^{-1}* (mu_x + error + delta), 

with different kinds of B.

In [2]:
import os
import subprocess
from math import ceil

def submit(command, ncores, total_mem, joblog_dir, jobname="submit"):
    mem = ceil(total_mem / ncores)  # memory per core
    filename = f"{jobname}.sh"
    with open(filename, "w") as io:
        io.write("#!/bin/bash\n")
        io.write("#\n")
        io.write(f"#SBATCH --job-name={jobname}\n")
        io.write("#\n")
        io.write("#SBATCH --time=48:00:00\n")
        io.write(f"#SBATCH --cpus-per-task={ncores}\n")
        io.write(f"#SBATCH --mem-per-cpu={mem}G\n")
        io.write("#SBATCH --partition=candes,normal,hns\n")
        io.write(f"#SBATCH --output={os.path.join(joblog_dir, 'slurm-%j.out')}\n")
        io.write("\n")
        io.write("#save job info on joblog:\n")
        io.write("echo \"Job $JOB_ID started on:   \" `hostname -s`\n")
        io.write("echo \"Job $JOB_ID started on:   \" `date `\n")
        io.write("\n")
        io.write("# load the job environment:\n")
        io.write("module load python/3.6.1\n")
        io.write("\n")
        io.write("# run code\n")
        io.write(f"echo \"{command}\"\n")
        io.write(f"{command}\n")
        io.write("\n")
        io.write("#echo job info on joblog:\n")
        io.write("echo \"Job $JOB_ID ended on:   \" `hostname -s`\n")
        io.write("echo \"Job $JOB_ID ended on:   \" `date `\n")
        io.write("#echo \" \"\n")
    
    # submit job
    subprocess.run(["sbatch", filename])
    # clean up
    os.remove(filename)
    return None

## Simulation 1: hub DAG - high-dim

In [None]:
python_exe = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250601/HighDimHubDAGn100/simu_script.py"

s_B_vec = [0.2]
int_mean_vec = [15]
dag_type = "hub"
dimreduce_method = "cv"
ncore = 8
seedB_all = 5
seedm_all = 20
outdir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250601/HighDimHubDAGn100/results"

for s_B in s_B_vec:
    for int_mean in int_mean_vec:
        for seedB in range(seedB_all):
            for seedm in range(seedm_all):
                cmd = f"python3 {python_exe} {s_B} {int_mean} {dag_type} {dimreduce_method} {ncore} {seedB} {seedm} {outdir}"

                # 'ncore', 12 GB
                joblog_dir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250601/HighDimHubDAGn100/joblogs"
                jobname = f"hd_{dag_type}_{dimreduce_method}_s{int(s_B*10)}_int{int_mean}_seedB{seedB}_seedm{seedm}"
                submit(cmd, ncore, 12, joblog_dir, jobname)

Submitted batch job 47641150
Submitted batch job 47641151
Submitted batch job 47641159
Submitted batch job 47641160
Submitted batch job 47641161


In [None]:
python_exe = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250601/HighDimHubDAGn200/simu_script.py"

s_B_vec = [0.2]
int_mean_vec = [15]
dag_type = "hub"
dimreduce_method = "cv"
ncore = 8
seedB_all = 5
seedm_all = 20
outdir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250601/HighDimHubDAGn200/results"

for s_B in s_B_vec:
    for int_mean in int_mean_vec:
        for seedB in range(seedB_all):
            for seedm in range(seedm_all):
                cmd = f"python3 {python_exe} {s_B} {int_mean} {dag_type} {dimreduce_method} {ncore} {seedB} {seedm} {outdir}"

                # 'ncore', 12 GB
                joblog_dir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250601/HighDimHubDAGn100/joblogs"
                jobname = f"hd_{dag_type}_{dimreduce_method}_s{int(s_B*10)}_int{int_mean}_seedB{seedB}_seedm{seedm}"
                submit(cmd, ncore, 12, joblog_dir, jobname)