Skip to content

Commit

Permalink
add Lmod hook to set $OMPI_MCA_btl to '^smcuda' when loading OpenMPI …
Browse files Browse the repository at this point in the history
…module
  • Loading branch information
boegel committed Feb 12, 2024
1 parent 1045ef0 commit c4dadde
Showing 1 changed file with 21 additions and 0 deletions.
21 changes: 21 additions & 0 deletions create_lmodrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,28 @@
end
end
local function openmpi_load_hook(t)
-- disable smcuda BTL when loading OpenMPI module for aarch64/neoverse_v1,
-- to work around hang/crash due to bug in OpenMPI;
-- see https://gitlab.com/eessi/support/-/issues/41
local frameStk = require("FrameStk"):singleton()
local mt = frameStk:mt()
local moduleName = string.match(t.modFullName, "(.-)/")
local cpuTarget = os.getenv("EESSI_SOFTWARE_SUBDIR") or ""
if (moduleName == "OpenMPI") and (cpuTarget == "x86_64/intel/skylake_avx512") then --(cpuTarget == "aarch64/neoverse_v1") then
local msg = "Adding '^smcuda' to $OMPI_MCA_btl to work around bug in OpenMPI"
LmodMessage(msg .. " (see https://gitlab.com/eessi/support/-/issues/41)")
local ompiMcaBtl = os.getenv("OMPI_MCA_btl")
if ompiMcaBtl == nil then
setenv("OMPI_MCA_btl", "^smcuda")
else
setenv("OMPI_MCA_btl", ompiMcaBtl .. ",^smcuda")
end
end
end
hook.register("load", cuda_enabled_load_hook)
hook.register("load", openmpi_load_hook)
"""

def error(msg):
Expand Down

0 comments on commit c4dadde

Please sign in to comment.