# Simulation code for Root Cause Discovery

We perform 5 simulations: 2 in low-dimensional setting, 2 in low-dim latent setting, 1 mimicks our real data analysis (high-dimensional setting). All simulation uses the following model

+ X <- mu_x+ BX +error + delta, 
+ X = (I-B)^{-1}* (mu_x + error + delta), 

with different kinds of B.

In [24]:
import os
import subprocess
from math import ceil

def submit(command, ncores, total_mem, joblog_dir, jobname="submit"):
    mem = ceil(total_mem / ncores)  # memory per core
    filename = f"{jobname}.sh"
    with open(filename, "w") as io:
        io.write("#!/bin/bash\n")
        io.write("#\n")
        io.write(f"#SBATCH --job-name={jobname}\n")
        io.write("#\n")
        io.write("#SBATCH --time=4:00:00\n")
        io.write(f"#SBATCH --cpus-per-task={ncores}\n")
        io.write(f"#SBATCH --mem-per-cpu={mem}G\n")
        io.write("#SBATCH --partition=candes,owners,normal,hns\n")
        io.write(f"#SBATCH --output={os.path.join(joblog_dir, 'slurm-%j.out')}\n")
        io.write("\n")
        io.write("#save job info on joblog:\n")
        io.write("echo \"Job $JOB_ID started on:   \" `hostname -s`\n")
        io.write("echo \"Job $JOB_ID started on:   \" `date `\n")
        io.write("\n")
        io.write("# load the job environment:\n")
        #io.write("module load python/3.6.1\n")
        io.write("module load gcc/10.1.0 python/3.6.1 openblas/0.3.10\n")
        io.write("\n")
        io.write("# run code\n")
        io.write(f"echo \"{command}\"\n")
        io.write(f"{command}\n")
        io.write("\n")
        io.write("#echo job info on joblog:\n")
        io.write("echo \"Job $JOB_ID ended on:   \" `hostname -s`\n")
        io.write("echo \"Job $JOB_ID ended on:   \" `date `\n")
        io.write("#echo \" \"\n")
    
    # submit job
    subprocess.run(["sbatch", filename])
    # clean up
    os.remove(filename)
    return None

# Simulation 1: low-dim random DAG

In [37]:
# python_exe = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/1.RandomDAG/simu_script.py"

# n_vec = [200]
# s_B_vec = [0.8]
# delta_r_vec = [12]
# dag_type = "random"
# seedB_all = 20
# seedm_all = 50
# outdir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/1.RandomDAG/results"
# for n in n_vec:
#     for s_B in s_B_vec:
#         for delta_r in delta_r_vec:
#             for seedB in range(seedB_all):
#             #for seedB in [15]:
#                 for seedm in range(seedm_all):
#                 #for seedm in [48,49]:
#                     cmd = f"python3 {python_exe} {n} {s_B} {delta_r} {dag_type} {seedB} {seedm} {outdir}"

#                     # 1 core, 5 GB
#                     joblog_dir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/1.RandomDAG/joblogs"
#                     jobname = f"{dag_type}_n{n}_s{int(s_B*10)}_int{delta_r}_seedB{seedB}_seedm{seedm}"
#                     submit(cmd, 1, 5, joblog_dir, jobname)

Submitted batch job 51506327
Submitted batch job 51506328


# Simulation 2: low-dim hub DAG

In [71]:
# python_exe = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/2.HubDAG/simu_script.py"

# n_vec = [200]
# s_B_vec = [0.6]
# delta_r_vec = [12]
# dag_type = "hub"
# seedB_all = 20
# seedm_all = 50
# outdir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/2.HubDAG/results"
# for n in n_vec:
#     for s_B in s_B_vec:
#         for delta_r in delta_r_vec:
#             #for seedB in range(seedB_all):
#             for seedB in [17]:
#                 #for seedm in range(seedm_all):
#                 for seedm in [0,1,2]:
#                     cmd = f"python3 {python_exe} {n} {s_B} {delta_r} {dag_type} {seedB} {seedm} {outdir}"

#                     # 1 core, 5 GB
#                     joblog_dir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/2.HubDAG/joblogs"
#                     jobname = f"{dag_type}_n{n}_s{int(s_B*10)}_int{delta_r}_seedB{seedB}_seedm{seedm}"
#                     submit(cmd, 1, 5, joblog_dir, jobname)

Submitted batch job 51559613
Submitted batch job 51559614
Submitted batch job 51559615


# Simulation 3: high-dim hub DAG

In [100]:
# # change time to 24h !!
# python_exe = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/3.hdHubDAG/simu_script.py"

# n_vec = [400]
# s_B_vec = [0.2]
# delta_r_vec = [12]
# dag_type = "hub"
# seedB_all = range(15,20)
# seedm_all = range(50)
# ncores = 12 # < 20 would be good
# outdir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/3.hdHubDAG/results"
# for n in n_vec:
#     for s_B in s_B_vec:
#         for delta_r in delta_r_vec:
#             for seedB in seedB_all:
#                 for seedm in seedm_all:
#                     resultname = f"{dag_type}hd_n{n}s{int(s_B*10)}intv{delta_r}seedB{seedB}seedm{seedm}.npz"
#                     file_path = f"{outdir}/{resultname}"
#                     if not os.path.isfile(file_path):
#                         cmd = f"python3 {python_exe} {n} {s_B} {delta_r} {dag_type} {seedB} {seedm} {ncores} {outdir}"

#                         # 1 core, 5 GB
#                         joblog_dir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/3.hdHubDAG/joblogs"
#                         jobname = f"{dag_type}hd_n{n}_s{int(s_B*10)}_int{delta_r}_seedB{seedB}_seedm{seedm}"

#                         # add  one line of code submit only if no result file exist!
#                         submit(cmd, ncores, 5, joblog_dir, jobname)

## Simulation 4: Latent random DAG

In [63]:
# python_exe = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/4.RandomDAGlatent/simu_script.py"

# s_B_vec = [0.8]
# n_vec = [200]
# latent_prop = 0.3
# delta_r_vec = [12]
# dag_type = "random"
# seedB_all = range(20)
# seedm_all = range(50)
# outdir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/4.RandomDAGlatent/results"
# for n in n_vec:
#     for s_B in s_B_vec:
#         for delta_r in delta_r_vec:
#             for seedB in seedB_all:
#                 for seedm in seedm_all:
#                     resultname = f"{dag_type}_lat{int(latent_prop*10)}_n{n}s{int(s_B*10)}intv{delta_r}seedB{seedB}seedm{seedm}.npz"
#                     file_path = f"{outdir}/{resultname}"
#                     if not os.path.isfile(file_path):
#                         cmd = f"python3 {python_exe} {n} {s_B} {delta_r} {dag_type} {seedB} {seedm} {latent_prop} {outdir}"

#                         # 1 core, 5 GB
#                         joblog_dir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/4.RandomDAGlatent/joblogs"
#                         jobname = f"{dag_type}_lat{int(latent_prop*10)}_n{n}_s{int(s_B*10)}_int{delta_r}_seedB{seedB}_seedm{seedm}"
#                         submit(cmd, 1, 5, joblog_dir, jobname)

## Simulation 5: Latent hub DAG

In [108]:
# python_exe = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/5.HubDAGlatent/simu_script.py"

# s_B_vec = [0.8]
# latent_prop = 0.3
# n_vec = [200]
# delta_r_vec = [12]
# dag_type = "hub"
# seedB_all = range(20)
# seedm_all = range(50)
# outdir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/5.HubDAGlatent/results"
# for n in n_vec:
#     for s_B in s_B_vec:
#         for delta_r in delta_r_vec:
#             for seedB in seedB_all:
#                 for seedm in seedm_all:
#                     resultname = f"{dag_type}_lat{int(latent_prop*10)}_n{n}s{int(s_B*10)}intv{delta_r}seedB{seedB}seedm{seedm}.npz"
#                     file_path = f"{outdir}/{resultname}"
#                     if not os.path.isfile(file_path):
#                         cmd = f"python3 {python_exe} {n} {s_B} {delta_r} {dag_type} {seedB} {seedm} {latent_prop} {outdir}"

#                         # 1 core, 5 GB
#                         joblog_dir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/5.HubDAGlatent/joblogs"
#                         jobname = f"{dag_type}_lat{int(latent_prop*10)}_n{n}_s{int(s_B*10)}_int{delta_r}_seedB{seedB}_seedm{seedm}"
#                         submit(cmd, 1, 5, joblog_dir, jobname)

## Simulation 6: Add LiNGAM, random DAG + Unif error

In [3]:
# python_exe = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/6.RandomDAGLiNGAMunif/simu_script.py"

# n_vec = [200]
# s_B_vec = [0.2]
# delta_r_vec = [8]
# dag_type = "random"
# error_type = "Uniform"
# seedB_all = range(20)
# seedm_all = range(50)
# outdir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/6.RandomDAGLiNGAMunif/results"
# for n in n_vec:
#     for s_B in s_B_vec:
#         for delta_r in delta_r_vec:
#             for seedB in seedB_all:
#                 for seedm in seedm_all:
#                     cmd = f"python3 {python_exe} {n} {s_B} {delta_r} {dag_type} {seedB} {seedm} {error_type} {outdir}"

#                     # 1 core, 5 GB
#                     joblog_dir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/6.RandomDAGLiNGAMunif/joblogs"
#                     jobname = f"{dag_type}{error_type}_n{n}_s{int(s_B*10)}_int{delta_r}_seedB{seedB}_seedm{seedm}"
#                     submit(cmd, 1, 5, joblog_dir, jobname)

## Simulation 7: LiNGAM, random DAG + Gaussian error

In [20]:
# python_exe = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/8.RandomDAGLiNGAMgaussian/simu_script.py"

# n_vec = [400]
# s_B_vec = [0.2]
# delta_r_vec = [12]
# dag_type = "random"
# error_type = "Gaussian"
# seedB_all = range(10,20)
# seedm_all = range(50)
# outdir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/8.RandomDAGLiNGAMgaussian/results"
# for n in n_vec:
#     for s_B in s_B_vec:
#         for delta_r in delta_r_vec:
#             for seedB in seedB_all:
#                 for seedm in seedm_all:
#                     cmd = f"python3 {python_exe} {n} {s_B} {delta_r} {dag_type} {seedB} {seedm} {error_type} {outdir}"

#                     # 1 core, 5 GB
#                     joblog_dir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/8.RandomDAGLiNGAMgaussian/joblogs"
#                     jobname = f"{dag_type}{error_type}_n{n}_s{int(s_B*10)}_int{delta_r}_seedB{seedB}_seedm{seedm}"
#                     submit(cmd, 1, 5, joblog_dir, jobname)

## Simulation 8: LiNGAM, hub DAG + Uniform error

In [None]:
# python_exe = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/7.HubDAGLiNGAMunif/simu_script.py"

# n_vec = [100]
# s_B_vec = [0.2]
# delta_r_vec = [12]
# dag_type = "hub"
# error_type = "Uniform"
# seedB_all = range(20)
# seedm_all = range(50)
# outdir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/7.HubDAGLiNGAMunif/results"
# for n in n_vec:
#     for s_B in s_B_vec:
#         for delta_r in delta_r_vec:
#             for seedB in seedB_all:
#                 for seedm in seedm_all:
#                     cmd = f"python3 {python_exe} {n} {s_B} {delta_r} {dag_type} {seedB} {seedm} {error_type} {outdir}"

#                     # 1 core, 5 GB
#                     joblog_dir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/7.HubDAGLiNGAMunif/joblogs"
#                     jobname = f"{dag_type}{error_type}_n{n}_s{int(s_B*10)}_int{delta_r}_seedB{seedB}_seedm{seedm}"
#                     submit(cmd, 1, 5, joblog_dir, jobname)

## Simulation 9: LiNGAM, hub DAG + Gaussian error

In [None]:
python_exe = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/9.HubDAGLiNGAMgaussian/simu_script.py"

n_vec = [100]
s_B_vec = [0.2]
delta_r_vec = [20]
dag_type = "hub"
error_type = "Gaussian"
seedB_all = range(14,20)
seedm_all = range(50)
outdir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/9.HubDAGLiNGAMgaussian/results"
for n in n_vec:
    for s_B in s_B_vec:
        for delta_r in delta_r_vec:
            for seedB in seedB_all:
                for seedm in seedm_all:
                    cmd = f"python3 {python_exe} {n} {s_B} {delta_r} {dag_type} {seedB} {seedm} {error_type} {outdir}"

                    # 1 core, 5 GB
                    joblog_dir = "/home/users/jinzhoul/Simulations/RootCauseDiscovery/20250815/9.HubDAGLiNGAMgaussian/joblogs"
                    jobname = f"{dag_type}{error_type}_n{n}_s{int(s_B*10)}_int{delta_r}_seedB{seedB}_seedm{seedm}"
                    submit(cmd, 1, 5, joblog_dir, jobname)

Submitted batch job 51980436
Submitted batch job 51980437
Submitted batch job 51980439
Submitted batch job 51980440
Submitted batch job 51980441
Submitted batch job 51980442
Submitted batch job 51980443
Submitted batch job 51980444
Submitted batch job 51980445
Submitted batch job 51980446
Submitted batch job 51980447
Submitted batch job 51980448
Submitted batch job 51980449
Submitted batch job 51980450
Submitted batch job 51980451
Submitted batch job 51980456
Submitted batch job 51980457
Submitted batch job 51980459
Submitted batch job 51980462
Submitted batch job 51980463
Submitted batch job 51980464
Submitted batch job 51980465
Submitted batch job 51980466
Submitted batch job 51980467
Submitted batch job 51980468
Submitted batch job 51980469
Submitted batch job 51980470
Submitted batch job 51980471
Submitted batch job 51980474
Submitted batch job 51980476
Submitted batch job 51980478
Submitted batch job 51980480
Submitted batch job 51980481
Submitted batch job 51980482
Submitted batc

Submitted batch job 51982816
Submitted batch job 51982825
Submitted batch job 51982827
Submitted batch job 51982833
Submitted batch job 51982834
Submitted batch job 51982835
Submitted batch job 51982836
Submitted batch job 51982849
Submitted batch job 51982850
Submitted batch job 51982872
Submitted batch job 51982883
Submitted batch job 51982884
Submitted batch job 51982885
Submitted batch job 51982886
Submitted batch job 51982894
Submitted batch job 51982898
Submitted batch job 51982900
Submitted batch job 51982906
Submitted batch job 51982907
Submitted batch job 51982908
Submitted batch job 51982910
Submitted batch job 51982934
Submitted batch job 51982936
Submitted batch job 51982940
Submitted batch job 51982942
Submitted batch job 51982947
Submitted batch job 51982948
Submitted batch job 51982949
Submitted batch job 51982950
Submitted batch job 51982956
Submitted batch job 51982958
Submitted batch job 51982959
Submitted batch job 51982961
Submitted batch job 51982986
Submitted batc