-
Notifications
You must be signed in to change notification settings - Fork 8
Open
Description
I have the following script
#!/bin/bash -l
#SBATCH --job-name=floquet-dpt
#SBATCH --output=output_eiger7.out
#SBATCH --time=24:00:00
#SBATCH --nodes=2
#SBATCH --ntasks-per-node=128
#SBATCH --cpus-per-task=1
#SBATCH --mem=0
#SBATCH --partition=normal
#SBATCH --hint=nomultithread
#SBATCH --exclusive
#SBATCH --uenv=julia/25.5:v1
#SBATCH --view=juliaup
export JULIA_NUM_THREADS=$SLURM_CPUS_PER_TASK
# Instantiate Julia environment
echo "Instantiating Julia environment..."
julia --project -e 'using Pkg; Pkg.instantiate()'
echo "Julia environment instantiated."
# Run Julia script
echo "Running Julia script..."
stdbuf -oL -eL julia --project 1ph_kerr.jl "[7]" "[80]"
echo "Julia script finished."and the following beginning part of a Julia file
using Distributed
using SlurmClusterManager
const SLURM_CPUS_PER_TASK = parse(Int, get(ENV, "SLURM_CPUS_PER_TASK", "1"))
const SCRATCH = get(ENV, "SCRATCH", "~")
exeflags = ["--project", "--threads=$(SLURM_CPUS_PER_TASK)"]
addprocs(SlurmManager(), exeflags=exeflags)
println("################")
println("Hello! You have $(nworkers()) workers with $(remotecall_fetch(Threads.nthreads, 2)) threads each.")
println("----------------")It works mostly of the times, but once over five times I get the following error
nested task error: launch_timeout exceeded
Stacktrace:
[1] try_yieldto(undo::typeof(Base.ensure_rescheduled))
@ Base ./task.jl:1128
[2] wait()
@ Base ./task.jl:1200
[3] wait(c::Base.GenericCondition{Base.Threads.SpinLock}; first::Bool)
@ Base ./condition.jl:141
[4] wait
@ ./condition.jl:136 [inlined]
[5] copyuntil(out::IOBuffer, x::Base.PipeEndpoint, c::UInt8; keep::Bool)
@ Base ./stream.jl:1046
[6] copyuntil(out::IOBuffer, io::Base.Process, arg::UInt8; kw::@Kwargs{keep::Bool})
@ Base ./io.jl:464
[7] copyuntil
@ ./io.jl:464 [inlined]
[8] copyline(out::IOBuffer, s::Base.Process; keep::Bool)
@ Base ./iobuffer.jl:944
[9] copyline
@ ./iobuffer.jl:920 [inlined]
[10] readline(s::Base.Process; keep::Bool)
@ Base ./io.jl:619
[11] (::SlurmClusterManager.var"#8#9"{SlurmManager, Vector{WorkerConfig}, Condition})()
@ SlurmClusterManager /capstor/scratch/cscs/amercuri/.julia/mc/juliaup/depot/packages/SlurmClusterManager/5cccZ/src/slurmmanager.jl:205
in expression starting at /users/amercuri/GitHub/Research/2025/Floquet-Dissipative-Phase-Transitions/definitive_figures/cluster/1ph_kerr.jl:8
Metadata
Metadata
Assignees
Labels
No labels