From 9d5a4a808b9051db7a73dad63076cefa6d7e4b36 Mon Sep 17 00:00:00 2001 From: Gregor Daiss Date: Thu, 18 May 2023 12:53:35 -0500 Subject: [PATCH 01/11] Re-enable tests Tests were disabled in #5796 --- libs/core/async_cuda/tests/performance/synchronize.cu | 9 --------- libs/core/async_cuda/tests/unit/transform_stream.cu | 9 --------- 2 files changed, 18 deletions(-) diff --git a/libs/core/async_cuda/tests/performance/synchronize.cu b/libs/core/async_cuda/tests/performance/synchronize.cu index 359a59568f42..adb5a9799c20 100644 --- a/libs/core/async_cuda/tests/performance/synchronize.cu +++ b/libs/core/async_cuda/tests/performance/synchronize.cu @@ -6,9 +6,6 @@ #include -// NVCC fails unceremoniously with this test at least until V11.5 -#if !defined(HPX_CUDA_VERSION) || (HPX_CUDA_VERSION > 1105) - #include #include #include @@ -269,9 +266,3 @@ int main(int argc, char* argv[]) return hpx::local::init(hpx_main, argc, argv, init_args); } -#else -int main(int, char*[]) -{ - return 0; -} -#endif diff --git a/libs/core/async_cuda/tests/unit/transform_stream.cu b/libs/core/async_cuda/tests/unit/transform_stream.cu index b27f197822c6..657f6d8e52cb 100644 --- a/libs/core/async_cuda/tests/unit/transform_stream.cu +++ b/libs/core/async_cuda/tests/unit/transform_stream.cu @@ -6,9 +6,6 @@ #include -// NVCC fails unceremoniously with this test at least until V11.5 -#if !defined(HPX_CUDA_VERSION) || (HPX_CUDA_VERSION > 1105) - #include #include #include @@ -302,9 +299,3 @@ int main(int argc, char* argv[]) return hpx::util::report_errors(); } -#else -int main(int, char*[]) -{ - return 0; -} -#endif From 07dcf71e7fc0e22fcbaabe400c3a39514de9b52e Mon Sep 17 00:00:00 2001 From: Gregor Daiss Date: Thu, 18 May 2023 13:00:02 -0500 Subject: [PATCH 02/11] Fix compute architecture and drop kepler Kepler support is removed in CUDA 12 so it's time to remove it here as well. --- .jenkins/lsu/env-gcc-10-cuda-11.sh | 2 +- .jenkins/lsu/slurm-configuration-gcc-10-cuda-11.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.jenkins/lsu/env-gcc-10-cuda-11.sh b/.jenkins/lsu/env-gcc-10-cuda-11.sh index 39aa9b3539bc..78695b5827bf 100644 --- a/.jenkins/lsu/env-gcc-10-cuda-11.sh +++ b/.jenkins/lsu/env-gcc-10-cuda-11.sh @@ -23,4 +23,4 @@ configure_extra_options+=" -DHPX_WITH_CUDA=ON" configure_extra_options+=" -DHPX_WITH_NETWORKING=OFF" configure_extra_options+=" -DHPX_WITH_DISTRIBUTED_RUNTIME=OFF" configure_extra_options+=" -DHPX_WITH_ASYNC_MPI=ON" -configure_extra_options+=" -DCMAKE_CUDA_ARCHITECTURES='37;70'" +configure_extra_options+=" -DCMAKE_CUDA_ARCHITECTURES='70;80'" diff --git a/.jenkins/lsu/slurm-configuration-gcc-10-cuda-11.sh b/.jenkins/lsu/slurm-configuration-gcc-10-cuda-11.sh index 3dc9e8a74287..f2c6938e4437 100644 --- a/.jenkins/lsu/slurm-configuration-gcc-10-cuda-11.sh +++ b/.jenkins/lsu/slurm-configuration-gcc-10-cuda-11.sh @@ -4,5 +4,5 @@ # Distributed under the Boost Software License, Version 1.0. (See accompanying # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -configuration_slurm_partition="jenkins-cuda" +configuration_slurm_partition="cuda-V100,cuda-A100" configuration_slurm_num_nodes="1" From e201cf9cbce32742448fe78dbd2e80ec06e68f3c Mon Sep 17 00:00:00 2001 From: Gregor Daiss Date: Thu, 18 May 2023 14:55:12 -0500 Subject: [PATCH 03/11] Revert "Re-enable tests" This reverts commit 9d5a4a808b9051db7a73dad63076cefa6d7e4b36. Still not working unfortunately (tested with gcc/12 and cuda 12.1) --- libs/core/async_cuda/tests/performance/synchronize.cu | 9 +++++++++ libs/core/async_cuda/tests/unit/transform_stream.cu | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/libs/core/async_cuda/tests/performance/synchronize.cu b/libs/core/async_cuda/tests/performance/synchronize.cu index adb5a9799c20..359a59568f42 100644 --- a/libs/core/async_cuda/tests/performance/synchronize.cu +++ b/libs/core/async_cuda/tests/performance/synchronize.cu @@ -6,6 +6,9 @@ #include +// NVCC fails unceremoniously with this test at least until V11.5 +#if !defined(HPX_CUDA_VERSION) || (HPX_CUDA_VERSION > 1105) + #include #include #include @@ -266,3 +269,9 @@ int main(int argc, char* argv[]) return hpx::local::init(hpx_main, argc, argv, init_args); } +#else +int main(int, char*[]) +{ + return 0; +} +#endif diff --git a/libs/core/async_cuda/tests/unit/transform_stream.cu b/libs/core/async_cuda/tests/unit/transform_stream.cu index 657f6d8e52cb..b27f197822c6 100644 --- a/libs/core/async_cuda/tests/unit/transform_stream.cu +++ b/libs/core/async_cuda/tests/unit/transform_stream.cu @@ -6,6 +6,9 @@ #include +// NVCC fails unceremoniously with this test at least until V11.5 +#if !defined(HPX_CUDA_VERSION) || (HPX_CUDA_VERSION > 1105) + #include #include #include @@ -299,3 +302,9 @@ int main(int argc, char* argv[]) return hpx::util::report_errors(); } +#else +int main(int, char*[]) +{ + return 0; +} +#endif From be7eb3e462385a63c48ae322eee29960489fb2b7 Mon Sep 17 00:00:00 2001 From: Gregor Daiss Date: Thu, 18 May 2023 15:38:09 -0500 Subject: [PATCH 04/11] Disable troublesome tests for current cuda version --- libs/core/async_cuda/tests/performance/synchronize.cu | 4 ++-- libs/core/async_cuda/tests/unit/transform_stream.cu | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libs/core/async_cuda/tests/performance/synchronize.cu b/libs/core/async_cuda/tests/performance/synchronize.cu index 359a59568f42..d206638da99a 100644 --- a/libs/core/async_cuda/tests/performance/synchronize.cu +++ b/libs/core/async_cuda/tests/performance/synchronize.cu @@ -6,8 +6,8 @@ #include -// NVCC fails unceremoniously with this test at least until V11.5 -#if !defined(HPX_CUDA_VERSION) || (HPX_CUDA_VERSION > 1105) +// NVCC fails unceremoniously with this test at least until V12.1 +#if !defined(HPX_CUDA_VERSION) || (HPX_CUDA_VERSION > 1201) #include #include diff --git a/libs/core/async_cuda/tests/unit/transform_stream.cu b/libs/core/async_cuda/tests/unit/transform_stream.cu index b27f197822c6..d8ad49613be9 100644 --- a/libs/core/async_cuda/tests/unit/transform_stream.cu +++ b/libs/core/async_cuda/tests/unit/transform_stream.cu @@ -6,8 +6,8 @@ #include -// NVCC fails unceremoniously with this test at least until V11.5 -#if !defined(HPX_CUDA_VERSION) || (HPX_CUDA_VERSION > 1105) +// NVCC fails unceremoniously with this test at least until V12.1 +#if !defined(HPX_CUDA_VERSION) || (HPX_CUDA_VERSION > 1201) #include #include From 20b37160868578de7ce73b0d948e1f1283952b28 Mon Sep 17 00:00:00 2001 From: Gregor Daiss Date: Thu, 18 May 2023 15:39:01 -0500 Subject: [PATCH 05/11] Allow jenkins hip builds with warnings --- .jenkins/lsu/env-hipcc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lsu/env-hipcc.sh b/.jenkins/lsu/env-hipcc.sh index abc09f74b40e..0977332d1105 100644 --- a/.jenkins/lsu/env-hipcc.sh +++ b/.jenkins/lsu/env-hipcc.sh @@ -17,4 +17,4 @@ configure_extra_options+=" -DHPX_WITH_FETCH_ASIO=ON" configure_extra_options+=" -DHPX_WITH_MAX_CPU_COUNT=128" configure_extra_options+=" -DHPX_WITH_DEPRECATION_WARNINGS=OFF" configure_extra_options+=" -DHPX_WITH_COMPILER_WARNINGS=ON" -configure_extra_options+=" -DHPX_WITH_COMPILER_WARNINGS_AS_ERRORS=ON" +configure_extra_options+=" -DHPX_WITH_COMPILER_WARNINGS_AS_ERRORS=OFF" From 7cbf1694427e28ae9a60d020c97f9b23f2bfd073 Mon Sep 17 00:00:00 2001 From: Gregor Daiss Date: Thu, 18 May 2023 17:10:59 -0500 Subject: [PATCH 06/11] Remove broken node for testing --- .jenkins/lsu/entry.sh | 1 + .jenkins/lsu/slurm-configuration-gcc-10-cuda-11.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.jenkins/lsu/entry.sh b/.jenkins/lsu/entry.sh index 878e4aa9a4e8..9554137a7fa9 100755 --- a/.jenkins/lsu/entry.sh +++ b/.jenkins/lsu/entry.sh @@ -46,6 +46,7 @@ sbatch \ --job-name="${job_name}" \ --nodes="${configuration_slurm_num_nodes}" \ --partition="${configuration_slurm_partition}" \ + --exclude="bahram" \ --time="03:00:00" \ --output="jenkins-hpx-${configuration_name_with_build_type}.out" \ --error="jenkins-hpx-${configuration_name_with_build_type}.err" \ diff --git a/.jenkins/lsu/slurm-configuration-gcc-10-cuda-11.sh b/.jenkins/lsu/slurm-configuration-gcc-10-cuda-11.sh index f2c6938e4437..3dc9e8a74287 100644 --- a/.jenkins/lsu/slurm-configuration-gcc-10-cuda-11.sh +++ b/.jenkins/lsu/slurm-configuration-gcc-10-cuda-11.sh @@ -4,5 +4,5 @@ # Distributed under the Boost Software License, Version 1.0. (See accompanying # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -configuration_slurm_partition="cuda-V100,cuda-A100" +configuration_slurm_partition="jenkins-cuda" configuration_slurm_num_nodes="1" From 71c8ac34e334cabdd8bf4b9f872eaa79b8f7f7eb Mon Sep 17 00:00:00 2001 From: Gregor Daiss Date: Thu, 18 May 2023 19:04:02 -0500 Subject: [PATCH 07/11] Temporarily switch to rocm/4 As all rocm/5 modules are currently broken on the machine --- .jenkins/lsu/env-hipcc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lsu/env-hipcc.sh b/.jenkins/lsu/env-hipcc.sh index 0977332d1105..fd46ce039f8b 100644 --- a/.jenkins/lsu/env-hipcc.sh +++ b/.jenkins/lsu/env-hipcc.sh @@ -4,7 +4,7 @@ # Distributed under the Boost Software License, Version 1.0. (See accompanying # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -module load rocm +module load rocm/4 module load boost export CXX=hipcc From 1e84947cca6522221caf05f4d5982c10fbe66643 Mon Sep 17 00:00:00 2001 From: Gregor Daiss Date: Tue, 23 May 2023 14:25:16 -0500 Subject: [PATCH 08/11] Revert "Temporarily switch to rocm/4" This reverts commit 71c8ac34e334cabdd8bf4b9f872eaa79b8f7f7eb. --- .jenkins/lsu/env-hipcc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lsu/env-hipcc.sh b/.jenkins/lsu/env-hipcc.sh index fd46ce039f8b..0977332d1105 100644 --- a/.jenkins/lsu/env-hipcc.sh +++ b/.jenkins/lsu/env-hipcc.sh @@ -4,7 +4,7 @@ # Distributed under the Boost Software License, Version 1.0. (See accompanying # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -module load rocm/4 +module load rocm module load boost export CXX=hipcc From 087074ac3429e67004b70efcd74e74531250825e Mon Sep 17 00:00:00 2001 From: Gregor Daiss Date: Tue, 23 May 2023 23:46:18 -0500 Subject: [PATCH 09/11] Increase test timeout --- .jenkins/lsu-perftests/ctest.cmake | 2 +- .jenkins/lsu/ctest.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.jenkins/lsu-perftests/ctest.cmake b/.jenkins/lsu-perftests/ctest.cmake index f8cf66b3301f..3acd5888f32c 100644 --- a/.jenkins/lsu-perftests/ctest.cmake +++ b/.jenkins/lsu-perftests/ctest.cmake @@ -8,7 +8,7 @@ # This is a dummy file to trigger the upload of the perftests reports cmake_minimum_required(VERSION 3.1 FATAL_ERROR) -set(CTEST_TEST_TIMEOUT 300) +set(CTEST_TEST_TIMEOUT 900) set(CTEST_BUILD_PARALLELISM 20) set(CTEST_TEST_PARALLELISM 4) set(CTEST_CMAKE_GENERATOR Ninja) diff --git a/.jenkins/lsu/ctest.cmake b/.jenkins/lsu/ctest.cmake index 135f07c12025..f5be5696d025 100644 --- a/.jenkins/lsu/ctest.cmake +++ b/.jenkins/lsu/ctest.cmake @@ -7,7 +7,7 @@ cmake_minimum_required(VERSION 3.1 FATAL_ERROR) -set(CTEST_TEST_TIMEOUT 300) +set(CTEST_TEST_TIMEOUT 900) set(CTEST_BUILD_PARALLELISM 20) set(CTEST_TEST_PARALLELISM 4) set(CTEST_CMAKE_GENERATOR Ninja) From b4b3b314ffa18a7118abf3275b0a2047ab4ae712 Mon Sep 17 00:00:00 2001 From: Gregor Daiss Date: Fri, 26 May 2023 23:43:10 -0500 Subject: [PATCH 10/11] Revert "Increase test timeout" This reverts commit 087074ac3429e67004b70efcd74e74531250825e. --- .jenkins/lsu-perftests/ctest.cmake | 2 +- .jenkins/lsu/ctest.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.jenkins/lsu-perftests/ctest.cmake b/.jenkins/lsu-perftests/ctest.cmake index 3acd5888f32c..f8cf66b3301f 100644 --- a/.jenkins/lsu-perftests/ctest.cmake +++ b/.jenkins/lsu-perftests/ctest.cmake @@ -8,7 +8,7 @@ # This is a dummy file to trigger the upload of the perftests reports cmake_minimum_required(VERSION 3.1 FATAL_ERROR) -set(CTEST_TEST_TIMEOUT 900) +set(CTEST_TEST_TIMEOUT 300) set(CTEST_BUILD_PARALLELISM 20) set(CTEST_TEST_PARALLELISM 4) set(CTEST_CMAKE_GENERATOR Ninja) diff --git a/.jenkins/lsu/ctest.cmake b/.jenkins/lsu/ctest.cmake index f5be5696d025..135f07c12025 100644 --- a/.jenkins/lsu/ctest.cmake +++ b/.jenkins/lsu/ctest.cmake @@ -7,7 +7,7 @@ cmake_minimum_required(VERSION 3.1 FATAL_ERROR) -set(CTEST_TEST_TIMEOUT 900) +set(CTEST_TEST_TIMEOUT 300) set(CTEST_BUILD_PARALLELISM 20) set(CTEST_TEST_PARALLELISM 4) set(CTEST_CMAKE_GENERATOR Ninja) From 6e28071d2b50ccc2b02b316332fdffe81f3fccb8 Mon Sep 17 00:00:00 2001 From: Gregor Daiss Date: Fri, 26 May 2023 23:50:53 -0500 Subject: [PATCH 11/11] Test excluding troublesome hip alorithms test --- .jenkins/lsu/batch.sh | 2 +- .jenkins/lsu/env-common.sh | 2 ++ .jenkins/lsu/env-hipcc.sh | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.jenkins/lsu/batch.sh b/.jenkins/lsu/batch.sh index 1b24d132975f..64e4985ef554 100755 --- a/.jenkins/lsu/batch.sh +++ b/.jenkins/lsu/batch.sh @@ -21,7 +21,7 @@ ulimit -l unlimited set +e ctest \ - --verbose \ + ${ctest_extra_args} \ -S ${src_dir}/.jenkins/lsu/ctest.cmake \ -DCTEST_CONFIGURE_EXTRA_OPTIONS="${configure_extra_options}" \ -DCTEST_BUILD_CONFIGURATION_NAME="${configuration_name_with_build_type}" \ diff --git a/.jenkins/lsu/env-common.sh b/.jenkins/lsu/env-common.sh index 2d797c2acaf0..f0bd7e5dd00b 100644 --- a/.jenkins/lsu/env-common.sh +++ b/.jenkins/lsu/env-common.sh @@ -11,5 +11,7 @@ if [ "${build_type}" = "Debug" ]; then configure_extra_options+=" -DLCI_DEBUG=ON" fi +ctest_extra_args+=" --verbose " + hostname module avail diff --git a/.jenkins/lsu/env-hipcc.sh b/.jenkins/lsu/env-hipcc.sh index 0977332d1105..e31a25c4fa26 100644 --- a/.jenkins/lsu/env-hipcc.sh +++ b/.jenkins/lsu/env-hipcc.sh @@ -18,3 +18,5 @@ configure_extra_options+=" -DHPX_WITH_MAX_CPU_COUNT=128" configure_extra_options+=" -DHPX_WITH_DEPRECATION_WARNINGS=OFF" configure_extra_options+=" -DHPX_WITH_COMPILER_WARNINGS=ON" configure_extra_options+=" -DHPX_WITH_COMPILER_WARNINGS_AS_ERRORS=OFF" + +ctest_extra_args+=" -E tests.unit.modules.algorithms.detail "