In [None]:
import os, math, subprocess, threading, optuna

STUDY_DB = "sqlite:///catboost_optuna_v1.db"
STUDY_NAME = "catboost_cat_study_v1"

optuna.create_study(study_name=STUDY_NAME, storage=STUDY_DB,
                    direction="minimize", load_if_exists=True)

GPUS = [0, 1, 2, 3]       
THREADS_PER_WORKER = 8    
TRIALS_TOTAL = 40
TRIALS_PER_WORKER = max(1, math.ceil(TRIALS_TOTAL / len(GPUS)))
SECONDS_PER_WORKER = 7200

PYTHON = "/gpfs/milgram/project/christakis/vs479/conda_envs/pykan-env/bin/python"

common_args = [
    "--storage", STUDY_DB, "--study_name", STUDY_NAME,
    "--n_trials", str(TRIALS_PER_WORKER),
    "--timeout", str(SECONDS_PER_WORKER),
    "--threads", str(THREADS_PER_WORKER),
    "--gpu_ram_part", "0.30",
    "--stratify_regime",                 
    "--cv_splits", "5",                
    "--sigmoid_cut", "7.0",             
    "--epsilon", "1e-4",
    "--writer_id", "0", 
]

base_env = os.environ.copy()
for var in ["TBB_NUM_THREADS","OMP_NUM_THREADS","MKL_NUM_THREADS","NUMEXPR_NUM_THREADS"]:
    base_env[var] = str(THREADS_PER_WORKER)

def launch_worker(worker_id, gpu_id):
    env = base_env.copy()
    env["CUDA_VISIBLE_DEVICES"] = str(gpu_id)  
    cmd = [PYTHON, "catboost_optuna_tuning-stratified-CV.py",
           "--gpus", "0",                     
           "--worker_id", str(worker_id)] + common_args
    p = subprocess.Popen(cmd, env=env, stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT, text=True, bufsize=1)
    def pipe():
        for line in p.stdout:
            print(f"[W{worker_id}/GPU{gpu_id}] {line}", end="")
    threading.Thread(target=pipe, daemon=True).start()
    return p

procs = []
for wid, g in enumerate(GPUS):
    print(f"Starting worker {wid} on GPU {g} with {THREADS_PER_WORKER} threads and {TRIALS_PER_WORKER} trials...")
    procs.append(launch_worker(wid, g))

for p in procs:
    p.wait()

print("All workers finished.")
