From cebf53e9e9f206b763a059947218a568a2fb0ef5 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 19 Aug 2025 15:15:32 +0200 Subject: [PATCH 01/10] Fix tarball naming for CPU-only tarballs --- bot/build.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bot/build.sh b/bot/build.sh index a1e103a4..4d6b0cda 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -271,7 +271,11 @@ source $software_layer_dir/init/eessi_defaults # append the project (subdirectory) name to the end tarball name. This is information # then used at the ingestion stage. If ${EESSI_DEV_PROJECT} is not defined, nothing is # appended -export TGZ=$(printf "eessi-%s-software-%s-%s-%s-%b%d.tar.gz" ${EESSI_VERSION} ${EESSI_OS_TYPE} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE//\//-} ${EESSI_ACCELERATOR_TARGET_OVERRIDE//\//-} ${EESSI_DEV_PROJECT:+$EESSI_DEV_PROJECT-} ${timestamp}) +if [[ -z ${EESSI_ACCELERATOR_TARGET_OVERRIDE} ]]; + export TGZ=$(printf "eessi-%s-software-%s-%s-%b%d.tar.gz" ${EESSI_VERSION} ${EESSI_OS_TYPE} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE//\//-} ${EESSI_DEV_PROJECT:+$EESSI_DEV_PROJECT-} ${timestamp}) +else + export TGZ=$(printf "eessi-%s-software-%s-%s-%s-%b%d.tar.gz" ${EESSI_VERSION} ${EESSI_OS_TYPE} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE//\//-} ${EESSI_ACCELERATOR_TARGET_OVERRIDE//\//-} ${EESSI_DEV_PROJECT:+$EESSI_DEV_PROJECT-} ${timestamp}) +fi # Export EESSI_DEV_PROJECT to use it (if needed) when making tarball echo "bot/build.sh: EESSI_DEV_PROJECT='${EESSI_DEV_PROJECT}'" From f85e74ef05570803d49f7255093ec11b4f0a0f9f Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 19 Aug 2025 15:16:22 +0200 Subject: [PATCH 02/10] Add dummy change to prove functionality of the code --- eb_hooks.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index bdf8f49b..d9554069 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -1,5 +1,6 @@ # Hooks to customize how EasyBuild installs software in EESSI # see https://docs.easybuild.io/en/latest/Hooks.html +# DUMMY CHANGE, DONT MERGE import ast import datetime import glob @@ -151,7 +152,7 @@ def parse_list_of_dicts_env(var_name): if not re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', var_name): raise ValueError(f"Invalid environment variable name: {var_name}") list_string = os.getenv(var_name, '[]') - + list_of_dicts = [] try: # Try JSON format first @@ -162,7 +163,7 @@ def parse_list_of_dicts_env(var_name): list_of_dicts = ast.literal_eval(list_string) except (ValueError, SyntaxError): raise ValueError(f"Environment variable '{var_name}' does not contain a valid list of dictionaries.") - + return list_of_dicts @@ -211,7 +212,7 @@ def post_ready_hook(self, *args, **kwargs): parallel = self.parallel else: parallel = self.cfg['parallel'] - + if parallel == 1: return # no need to limit if already using 1 core @@ -733,7 +734,7 @@ def pre_configure_hook_score_p(self, *args, **kwargs): def pre_configure_hook_vsearch(self, *args, **kwargs): """ Pre-configure hook for VSEARCH - - Workaround for a Zlib macro being renamed in Gentoo, see https://bugs.gentoo.org/383179 + - Workaround for a Zlib macro being renamed in Gentoo, see https://bugs.gentoo.org/383179 (solves "expected initializer before 'OF'" errors) """ if self.name == 'VSEARCH': From 33393f54f1898c0a25e03fe64be3f2a1c7a1a103 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 19 Aug 2025 16:08:22 +0200 Subject: [PATCH 03/10] These settings were introduced in EB 5.1.0, so the check should be >= 5.1 --- EESSI-extend-easybuild.eb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/EESSI-extend-easybuild.eb b/EESSI-extend-easybuild.eb index 0abb6ce9..547710ac 100644 --- a/EESSI-extend-easybuild.eb +++ b/EESSI-extend-easybuild.eb @@ -212,7 +212,7 @@ easybuild_version = os.getenv("EBVERSIONEASYBUILD") or easybuild_version eessi_version = os.getenv("EESSI_VERSION") or "2023.06" -- Set environment variables that are EasyBuild version specific -if convertToCanonical(easybuild_version) > convertToCanonical("4") then +if convertToCanonical(easybuild_version) >= convertToCanonical("5.1") then setenv ("EASYBUILD_STRICT_RPATH_SANITY_CHECK", "1") setenv ("EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS", "1") setenv ("EASYBUILD_FAIL_ON_MOD_FILES_GCCCORE", "1") From d668ada856e24971ab5169298c3047a455694d95 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 19 Aug 2025 16:16:19 +0200 Subject: [PATCH 04/10] Move installation of CUDA SDK back down, reverting the move from #54. This will make sure the rebuild of EESSI-extend is done before building the CUDA in host-injections. That's essentially, as the fix in EESSI-extend is needed to make the CUDA in host-injections step pass --- EESSI-install-software.sh | 47 ++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 48e03f94..d462cad2 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -247,29 +247,6 @@ if [ ! -f ${_lmod_sitepackage_file} ]; then python3 ${TOPDIR}/create_lmodsitepackage.py ${_eessi_software_path} fi -# Install full CUDA SDK and cu* libraries in host_injections -# (This is done *before* configuring EasyBuild as it may rely on an older EB version) -# Hardcode this for now, see if it works -# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install -# Allow skipping CUDA SDK install in e.g. CI environments -echo "Going to install full CUDA SDK and cu* libraries under host_injections if necessary" -temp_install_storage=${TMPDIR}/temp_install_storage -mkdir -p ${temp_install_storage} -if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then - ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \ - -t ${temp_install_storage} \ - --accept-cuda-eula \ - --accept-cudnn-eula -else - echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed" -fi - -# Install NVIDIA drivers in host_injections (if they exist) -if nvidia_gpu_available; then - echo "Installing NVIDIA drivers for use in prefix shell..." - ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh -fi - echo ">> Configuring EasyBuild..." # Make sure EESSI-extend is not loaded, and configure location variables for a @@ -316,6 +293,30 @@ echo "DEBUG: before loading EESSI-extend // EASYBUILD_INSTALLPATH='${EASYBUILD_I source $TOPDIR/load_eessi_extend_module.sh ${EESSI_VERSION} echo "DEBUG: after loading EESSI-extend // EASYBUILD_INSTALLPATH='${EASYBUILD_INSTALLPATH}'" +# Install full CUDA SDK and cu* libraries in host_injections +# (This is done *before* configuring EasyBuild as it may rely on an older EB version) +# Hardcode this for now, see if it works +# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install +# Allow skipping CUDA SDK install in e.g. CI environments +echo "Going to install full CUDA SDK and cu* libraries under host_injections if necessary" +temp_install_storage=${TMPDIR}/temp_install_storage +mkdir -p ${temp_install_storage} +if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh \ + -t ${temp_install_storage} \ + --accept-cuda-eula \ + --accept-cudnn-eula +else + echo "Skipping installation of CUDA SDK and cu* libraries in host_injections, since the --skip-cuda-install flag was passed" +fi + +# Install NVIDIA drivers in host_injections (if they exist) +if nvidia_gpu_available; then + echo "Installing NVIDIA drivers for use in prefix shell..." + ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh +fi + + if [ ! -z "${shared_fs_path}" ]; then shared_eb_sourcepath=${shared_fs_path}/easybuild/sources echo ">> Using ${shared_eb_sourcepath} as shared EasyBuild source path" From 1e0a49a4ac4957e712dda60d48499705a347795b Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 19 Aug 2025 16:41:13 +0200 Subject: [PATCH 05/10] Add debugging output --- scripts/gpu_support/nvidia/install_cuda_and_libraries.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh index 5123a7c1..04d49b8a 100755 --- a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh +++ b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh @@ -115,6 +115,7 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do unset EESSI_PROJECT_INSTALL unset EESSI_USER_INSTALL export EESSI_SITE_INSTALL=1 + echo "BEFORE UNLOADING EESSI-EXTEND, EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS = $EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS" module unload EESSI-extend ml_av_eessi_extend_out=${tmpdir}/ml_av_eessi_extend.out # need to use --ignore_cache to avoid the case that the module was removed (to be @@ -127,7 +128,9 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do error="\nNo module for EESSI-extend/${EESSI_EXTEND_VERSION} found\nwhile EESSI has been initialised to use software under ${EESSI_SOFTWARE_PATH}\n" fatal_error "${error}" fi + echo "BEFORE RELOADING EESSI-EXTEND, EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS = $EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS" module --ignore_cache load EESSI-extend/${EESSI_EXTEND_VERSION} + echo "AFTER RELOADING EESSI-EXTEND, EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS = $EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS" unset EESSI_EXTEND_VERSION # If there is a GPU on the node, the installation path will by default have an From e080044083850bdb50d85d20077cc0827cf23319 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 19 Aug 2025 17:02:43 +0200 Subject: [PATCH 06/10] Fix that we unset EB config vars unconditionally upon unload --- EESSI-extend-easybuild.eb | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/EESSI-extend-easybuild.eb b/EESSI-extend-easybuild.eb index 547710ac..bc3695e6 100644 --- a/EESSI-extend-easybuild.eb +++ b/EESSI-extend-easybuild.eb @@ -211,8 +211,23 @@ end easybuild_version = os.getenv("EBVERSIONEASYBUILD") or easybuild_version eessi_version = os.getenv("EESSI_VERSION") or "2023.06" +if (mode() == "unload") then + -- unload unconditionally, so that even if EB versions were switched in the meantime, this gets unset + -- This avoids issues where EESSI-extend is first loaded with EB => 5.1 (which set these vars) + -- but then EB is swapped for a version < 5.1 and then EESSI-extend is unloaded (which would not unset + -- these vars if we did it conditional on the EB version) + setenv ("EASYBUILD_STRICT_RPATH_SANITY_CHECK", "1") + setenv ("EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS", "1") + setenv ("EASYBUILD_FAIL_ON_MOD_FILES_GCCCORE", "1") + setenv ("EASYBUILD_LOCAL_VAR_NAMING_CHECK", "error") + -- This can still be conditional, eessi_version is always set + if convertToCanonical(eessi_version) > convertToCanonical("2023.06") then + setenv ("EASYBUILD_PREFER_PYTHON_SEARCH_PATH", "EBPYTHONPREFIXES") + setenv ("EASYBUILD_MODULE_SEARCH_PATH_HEADERS", "include_paths") + setenv ("EASYBUILD_SEARCH_PATH_CPP_HEADERS", "include_paths") + end -- Set environment variables that are EasyBuild version specific -if convertToCanonical(easybuild_version) >= convertToCanonical("5.1") then +elseif convertToCanonical(easybuild_version) >= convertToCanonical("5.1") then setenv ("EASYBUILD_STRICT_RPATH_SANITY_CHECK", "1") setenv ("EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS", "1") setenv ("EASYBUILD_FAIL_ON_MOD_FILES_GCCCORE", "1") From 454f7576514bfdd64750daf488d6e9a3f34da535 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 19 Aug 2025 17:14:36 +0200 Subject: [PATCH 07/10] Do the same thing with less duplication --- EESSI-extend-easybuild.eb | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/EESSI-extend-easybuild.eb b/EESSI-extend-easybuild.eb index bc3695e6..eaf59540 100644 --- a/EESSI-extend-easybuild.eb +++ b/EESSI-extend-easybuild.eb @@ -211,23 +211,12 @@ end easybuild_version = os.getenv("EBVERSIONEASYBUILD") or easybuild_version eessi_version = os.getenv("EESSI_VERSION") or "2023.06" -if (mode() == "unload") then - -- unload unconditionally, so that even if EB versions were switched in the meantime, this gets unset - -- This avoids issues where EESSI-extend is first loaded with EB => 5.1 (which set these vars) - -- but then EB is swapped for a version < 5.1 and then EESSI-extend is unloaded (which would not unset - -- these vars if we did it conditional on the EB version) - setenv ("EASYBUILD_STRICT_RPATH_SANITY_CHECK", "1") - setenv ("EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS", "1") - setenv ("EASYBUILD_FAIL_ON_MOD_FILES_GCCCORE", "1") - setenv ("EASYBUILD_LOCAL_VAR_NAMING_CHECK", "error") - -- This can still be conditional, eessi_version is always set - if convertToCanonical(eessi_version) > convertToCanonical("2023.06") then - setenv ("EASYBUILD_PREFER_PYTHON_SEARCH_PATH", "EBPYTHONPREFIXES") - setenv ("EASYBUILD_MODULE_SEARCH_PATH_HEADERS", "include_paths") - setenv ("EASYBUILD_SEARCH_PATH_CPP_HEADERS", "include_paths") - end -- Set environment variables that are EasyBuild version specific -elseif convertToCanonical(easybuild_version) >= convertToCanonical("5.1") then +-- Do unload unconditionally, so that even if EB versions were switched in the meantime, this gets unset +-- This avoids issues where EESSI-extend is first loaded with EB => 5.1 (which set these vars) +-- but then EB is swapped for a version < 5.1 and then EESSI-extend is unloaded (which would not unset +-- these vars if we did it conditional on the EB version) +if convertToCanonical(easybuild_version) >= convertToCanonical("5.1") or mode() == "unload" then setenv ("EASYBUILD_STRICT_RPATH_SANITY_CHECK", "1") setenv ("EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS", "1") setenv ("EASYBUILD_FAIL_ON_MOD_FILES_GCCCORE", "1") From 728c638403efe8c316b2546a3ec402c6e106301f Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 19 Aug 2025 17:20:32 +0200 Subject: [PATCH 08/10] Remove debugging output --- scripts/gpu_support/nvidia/install_cuda_and_libraries.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh index 04d49b8a..5123a7c1 100755 --- a/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh +++ b/scripts/gpu_support/nvidia/install_cuda_and_libraries.sh @@ -115,7 +115,6 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do unset EESSI_PROJECT_INSTALL unset EESSI_USER_INSTALL export EESSI_SITE_INSTALL=1 - echo "BEFORE UNLOADING EESSI-EXTEND, EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS = $EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS" module unload EESSI-extend ml_av_eessi_extend_out=${tmpdir}/ml_av_eessi_extend.out # need to use --ignore_cache to avoid the case that the module was removed (to be @@ -128,9 +127,7 @@ for EASYSTACK_FILE in ${TOPDIR}/easystacks/eessi-*CUDA*.yml; do error="\nNo module for EESSI-extend/${EESSI_EXTEND_VERSION} found\nwhile EESSI has been initialised to use software under ${EESSI_SOFTWARE_PATH}\n" fatal_error "${error}" fi - echo "BEFORE RELOADING EESSI-EXTEND, EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS = $EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS" module --ignore_cache load EESSI-extend/${EESSI_EXTEND_VERSION} - echo "AFTER RELOADING EESSI-EXTEND, EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS = $EASYBUILD_CUDA_SANITY_CHECK_ERROR_ON_FAILED_CHECKS" unset EESSI_EXTEND_VERSION # If there is a GPU on the node, the installation path will by default have an From 1beac4556d7633bec52eb4257eb2cf836b3f553b Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Tue, 19 Aug 2025 20:54:24 +0200 Subject: [PATCH 09/10] Revert "Add dummy change to prove functionality of the code" This reverts commit f85e74ef05570803d49f7255093ec11b4f0a0f9f. --- eb_hooks.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/eb_hooks.py b/eb_hooks.py index d9554069..bdf8f49b 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -1,6 +1,5 @@ # Hooks to customize how EasyBuild installs software in EESSI # see https://docs.easybuild.io/en/latest/Hooks.html -# DUMMY CHANGE, DONT MERGE import ast import datetime import glob @@ -152,7 +151,7 @@ def parse_list_of_dicts_env(var_name): if not re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', var_name): raise ValueError(f"Invalid environment variable name: {var_name}") list_string = os.getenv(var_name, '[]') - + list_of_dicts = [] try: # Try JSON format first @@ -163,7 +162,7 @@ def parse_list_of_dicts_env(var_name): list_of_dicts = ast.literal_eval(list_string) except (ValueError, SyntaxError): raise ValueError(f"Environment variable '{var_name}' does not contain a valid list of dictionaries.") - + return list_of_dicts @@ -212,7 +211,7 @@ def post_ready_hook(self, *args, **kwargs): parallel = self.parallel else: parallel = self.cfg['parallel'] - + if parallel == 1: return # no need to limit if already using 1 core @@ -734,7 +733,7 @@ def pre_configure_hook_score_p(self, *args, **kwargs): def pre_configure_hook_vsearch(self, *args, **kwargs): """ Pre-configure hook for VSEARCH - - Workaround for a Zlib macro being renamed in Gentoo, see https://bugs.gentoo.org/383179 + - Workaround for a Zlib macro being renamed in Gentoo, see https://bugs.gentoo.org/383179 (solves "expected initializer before 'OF'" errors) """ if self.name == 'VSEARCH': From 305f3db92cc8a8b99e93eba60d42ef43d8ceb465 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Tue, 19 Aug 2025 21:28:05 +0200 Subject: [PATCH 10/10] add missing `then` in condition to determine tarball filename in `bot/build.sh` --- bot/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/build.sh b/bot/build.sh index 4d6b0cda..eb7c68c5 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -271,7 +271,7 @@ source $software_layer_dir/init/eessi_defaults # append the project (subdirectory) name to the end tarball name. This is information # then used at the ingestion stage. If ${EESSI_DEV_PROJECT} is not defined, nothing is # appended -if [[ -z ${EESSI_ACCELERATOR_TARGET_OVERRIDE} ]]; +if [[ -z ${EESSI_ACCELERATOR_TARGET_OVERRIDE} ]]; then export TGZ=$(printf "eessi-%s-software-%s-%s-%b%d.tar.gz" ${EESSI_VERSION} ${EESSI_OS_TYPE} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE//\//-} ${EESSI_DEV_PROJECT:+$EESSI_DEV_PROJECT-} ${timestamp}) else export TGZ=$(printf "eessi-%s-software-%s-%s-%s-%b%d.tar.gz" ${EESSI_VERSION} ${EESSI_OS_TYPE} ${EESSI_SOFTWARE_SUBDIR_OVERRIDE//\//-} ${EESSI_ACCELERATOR_TARGET_OVERRIDE//\//-} ${EESSI_DEV_PROJECT:+$EESSI_DEV_PROJECT-} ${timestamp})