From ffdd867e2d28b08ee7bfae337d9a63b58a9fb442 Mon Sep 17 00:00:00 2001 From: Connor Robertson Date: Fri, 15 Dec 2023 13:55:50 -0800 Subject: [PATCH 1/2] add support for specifying slurm output file --- src/slurm.jl | 30 ++++++++++++++++++++++++------ test/runtests.jl | 7 ++++++- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/slurm.jl b/src/slurm.jl index 02015d3..97ac6bf 100644 --- a/src/slurm.jl +++ b/src/slurm.jl @@ -51,19 +51,37 @@ function launch(manager::SlurmManager, params::Dict, instances_arr::Array, mkdir(job_file_loc) end + # Check for given output file name + jobname = "julia-$(getpid())" + has_output_name = ("-o" in srunargs) | ("--output" in srunargs) + if has_output_name + loc = findfirst(x-> x == "-o", srunargs) + if isnothing(loc) + loc = findfirst(x-> x == "--output", srunargs) + end + job_output_name = srunargs[loc+1] + job_output_template = joinpath(job_file_loc, job_output_name) + srunargs[loc+1] = job_output_template + else + job_output_name = "$(jobname)-$(trunc(Int, Base.time() * 10))" + make_job_output_path(task_num) = joinpath(job_file_loc, "$(job_output_name)-$(task_num).out") + job_output_template = make_job_output_path("%4t") + append!(srunargs, "-o", job_output_template) + end + np = manager.np - jobname = "julia-$(getpid())" - job_output_name = "$(jobname)-$(trunc(Int, Base.time() * 10))" - make_job_output_path(task_num) = joinpath(job_file_loc, "$(job_output_name)-$(task_num).out") - job_output_template = make_job_output_path("%4t") - srun_cmd = `srun -J $jobname -n $np -o "$(job_output_template)" -D $exehome $(srunargs) $exename $exeflags $(worker_arg())` + srun_cmd = `srun -J $jobname -n $np -D $exehome $(srunargs) $exename $exeflags $(worker_arg())` srun_proc = open(srun_cmd) slurm_spec_regex = r"([\w]+):([\d]+)#(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})" retry_delays = manager.retry_delays for i = 0:np - 1 println("connecting to worker $(i + 1) out of $np") slurm_spec_match = nothing - fn = make_job_output_path(lpad(i, 4, "0")) + if has_output_name + fn = job_output_template + else + fn = make_job_output_path(lpad(i, 4, "0")) + end t0 = time() for retry_delay in retry_delays # Wait for output log to be created and populated, then parse diff --git a/test/runtests.jl b/test/runtests.jl index b9a0a39..30ec4ba 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -20,7 +20,8 @@ end if "slurm" in ARGS @testset "Slurm" begin - p = addprocs_slurm(1) + out_file = "my_slurm_job.out" + p = addprocs_slurm(1; o=out_file) @test nprocs() == 2 @test workers() == p @test fetch(@spawnat :any myid()) == p[1] @@ -28,6 +29,10 @@ if "slurm" in ARGS rmprocs(p) @test nprocs() == 1 @test workers() == [1] + + # Check output file creation + @test isfile(out_file) + rm(out_file) end end From 8f1dee92f8e36cc82d5890abb868456acd1f32c8 Mon Sep 17 00:00:00 2001 From: Connor Robertson Date: Tue, 19 Dec 2023 13:25:50 -0800 Subject: [PATCH 2/2] Condense output flag check Co-authored-by: Kevin Bonham --- src/slurm.jl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/slurm.jl b/src/slurm.jl index 97ac6bf..90fb873 100644 --- a/src/slurm.jl +++ b/src/slurm.jl @@ -55,10 +55,7 @@ function launch(manager::SlurmManager, params::Dict, instances_arr::Array, jobname = "julia-$(getpid())" has_output_name = ("-o" in srunargs) | ("--output" in srunargs) if has_output_name - loc = findfirst(x-> x == "-o", srunargs) - if isnothing(loc) - loc = findfirst(x-> x == "--output", srunargs) - end + loc = findfirst(x-> x == "-o" || x == "--output", srunargs) job_output_name = srunargs[loc+1] job_output_template = joinpath(job_file_loc, job_output_name) srunargs[loc+1] = job_output_template