Merge pull request #4431 from ye-luo/remove-legacy-cuda

Remove Legacy CUDA and disable QMC_CUDA build option
QMCPACK · Feb 1, 2023 · ba09a93 · ba09a93
2 parents b41ecf6 + 3f1360b
commit ba09a93
Show file tree

Hide file tree

Showing 162 changed files with 206 additions and 40,594 deletions.
diff --git a/.github/workflows/ci-github-actions-self-hosted.yaml b/.github/workflows/ci-github-actions-self-hosted.yaml
@@ -115,10 +115,6 @@ jobs:
       fail-fast: false
       matrix:
         jobname: [
-            GCC9-NoMPI-Legacy-CUDA-Real-Mixed, # mixed precision
-            GCC9-NoMPI-Legacy-CUDA-Complex-Mixed,
-            GCC9-NoMPI-Legacy-CUDA-Real, # full precision
-            GCC9-NoMPI-Legacy-CUDA-Complex,
             Clang15-MPI-CUDA-AFQMC-Offload-Real-Mixed, # auxiliary field, offload
             Clang15-MPI-CUDA-AFQMC-Offload-Real,
             Clang15-MPI-CUDA-AFQMC-Offload-Complex-Mixed,
@@ -222,10 +218,6 @@ jobs:
             ROCm-Clang13-NoMPI-CUDA2HIP-Real,
             ROCm-Clang13-NoMPI-CUDA2HIP-Complex-Mixed,
             ROCm-Clang13-NoMPI-CUDA2HIP-Complex,
-            ROCm-Clang13-MPI-Legacy-CUDA2HIP-Real-Mixed,
-            ROCm-Clang13-MPI-Legacy-CUDA2HIP-Real,
-            ROCm-Clang13-MPI-Legacy-CUDA2HIP-Complex-Mixed,
-            ROCm-Clang13-MPI-Legacy-CUDA2HIP-Complex,
           ]
 
     steps:

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -72,22 +72,17 @@ option(ENABLE_SYCL "Enable SYCL offload" OFF)
 # So the dependency must be explicitly specified with USE_OBJECT_TARGET.
 cmake_dependent_option(USE_OBJECT_TARGET "Use CMake object library target" OFF "ENABLE_OFFLOAD" OFF)
 
-if(ENABLE_CUDA AND QMC_CUDA)
-  message(FATAL_ERROR "ENABLE_CUDA=ON and QMC_CUDA=ON can not be set together!")
-endif(ENABLE_CUDA AND QMC_CUDA)
-
-if(ENABLE_OFFLOAD AND QMC_CUDA)
-  message(FATAL_ERROR "ENABLE_OFFLOAD=ON doesn't work together with QMC_CUDA=ON! Did you mean ENABLE_CUDA=ON?")
-endif()
+if(QMC_CUDA)
+  message(FATAL_ERROR "Legacy GPU implementation using CUDA has been removed! QMC_CUDA cannot be set ON. "
+                      "For NVIDIA GPU support use ENABLE_OFFLOAD with ENABLE_CUDA. Use v3.16.0 or earlier if you need QMC_CUDA.")
+endif(QMC_CUDA)
 
 if(ENABLE_CUDA AND ENABLE_SYCL)
   message(FATAL_ERROR "ENABLE_CUDA=ON and ENABLE_SYCL=ON can not be set together!")
 endif(ENABLE_CUDA AND ENABLE_SYCL)
 
 # set CMAKE_CUDA_ARCHITECTURES early such that offload compilers may take advantage of it
-if(ENABLE_CUDA
-   OR QMC_CUDA
-   AND NOT QMC_CUDA2HIP)
+if(ENABLE_CUDA AND NOT QMC_CUDA2HIP)
   if(DEFINED CUDA_ARCH)
     unset(CUDA_ARCH CACHE)
     message(
@@ -126,30 +121,15 @@ endif()
 #--------------------------------------------------------------------
 message(STATUS "Defining the float point precision")
 #--------------------------------------------------------------------
-if(QMC_CUDA)
-  option(QMC_MIXED_PRECISION "Enable/disable mixed precision" ON)
+option(QMC_MIXED_PRECISION "Enable/disable mixed precision" OFF)
+if(QMC_MIXED_PRECISION)
+  set(OHMMS_PRECISION float)
+  set(MIXED_PRECISION ON)
+else(QMC_MIXED_PRECISION)
   set(OHMMS_PRECISION double)
-  set(CUDA_PRECISION_FULL double)
-  if(QMC_MIXED_PRECISION)
-    set(CUDA_PRECISION float)
-  else(QMC_MIXED_PRECISION)
-    set(CUDA_PRECISION double)
-  endif(QMC_MIXED_PRECISION)
-  message("   Base precision = ${OHMMS_PRECISION}")
-  message("   Full precision = ${OHMMS_PRECISION_FULL}")
-  message("   CUDA base precision = ${CUDA_PRECISION}")
-  message("   CUDA full precision = ${CUDA_PRECISION_FULL}")
-else(QMC_CUDA)
-  option(QMC_MIXED_PRECISION "Enable/disable mixed precision" OFF)
-  if(QMC_MIXED_PRECISION)
-    set(OHMMS_PRECISION float)
-    set(MIXED_PRECISION ON)
-  else(QMC_MIXED_PRECISION)
-    set(OHMMS_PRECISION double)
-  endif(QMC_MIXED_PRECISION)
-  message("   Base precision = ${OHMMS_PRECISION}")
-  message("   Full precision = ${OHMMS_PRECISION_FULL}")
-endif(QMC_CUDA)
+endif(QMC_MIXED_PRECISION)
+message("   Base precision = ${OHMMS_PRECISION}")
+message("   Full precision = ${OHMMS_PRECISION_FULL}")
 
 #--------------------------------------------------------------------
 # Set debug printout
@@ -175,10 +155,6 @@ option(ENABLE_GCOV "Enable code coverage" OFF)
 option(BUILD_UNIT_TESTS "Build unit tests" ON)
 option(BUILD_MICRO_BENCHMARKS "Build micro benchmarks" ON)
 option(BUILD_LMYENGINE_INTERFACE "Build LMY engine" ON)
-if(QMC_CUDA AND BUILD_LMYENGINE_INTERFACE)
-  message(STATUS "LMY engine is not compatible with CUDA build! Disabling LMY engine")
-  set(BUILD_LMYENGINE_INTERFACE OFF)
-endif()
 if(MIXED_PRECISION AND BUILD_LMYENGINE_INTERFACE)
   message(STATUS "LMY engine is not compatible with CPU mixed precision build! Disabling LMY engine")
   set(BUILD_LMYENGINE_INTERFACE OFF)
@@ -197,7 +173,7 @@ option(USE_VTUNE_API "Enable use of VTune ittnotify APIs" OFF)
 cmake_dependent_option(USE_VTUNE_TASKS "USE VTune ittnotify task annotation" OFF "ENABLE_TIMERS AND USE_VTUNE_API" OFF)
 # CMake note - complex conditionals in cmake_dependent_option must have spaces around parentheses
 cmake_dependent_option(USE_NVTX_API "Enable/disable NVTX regions in CUDA code." OFF
-                       "ENABLE_TIMERS AND ( QMC_CUDA OR ENABLE_CUDA )" OFF)
+                       "ENABLE_TIMERS AND ENABLE_CUDA" OFF)
 set(HAVE_EINSPLINE 1) # to be removed
 option(QMC_EXP_THREADING "Experimental non openmp threading models" OFF)
 mark_as_advanced(QMC_EXP_THREADING)
@@ -738,15 +714,15 @@ endif()
 #  set up CUDA compiler options and libraries
 #-------------------------------------------------------------------
 set(HAVE_CUDA 0)
-if(QMC_CUDA OR ENABLE_CUDA)
+if(ENABLE_CUDA)
   if(QMC_CUDA2HIP)
     message(STATUS "CUDA2HIP enabled") # all the HIP and ROCm settings will be handled by ENABLE_ROCM
     if(CMAKE_VERSION VERSION_LESS 3.21.0)
       message(FATAL_ERROR "ENABLE_ROCM or QMC_CUDA2HIP require CMake 3.21.0 or later")
     endif()
   else(QMC_CUDA2HIP)
     if(CMAKE_VERSION VERSION_LESS 3.18.0)
-      message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.18.0 or later")
+      message(FATAL_ERROR "ENABLE_CUDA require CMake 3.18.0 or later")
     endif()
     if(NOT DEFINED CMAKE_CUDA_STANDARD)
       set(CMAKE_CUDA_STANDARD 17)
@@ -764,7 +740,7 @@ if(QMC_CUDA OR ENABLE_CUDA)
     endif()
 
     enable_language(CUDA)
-    if(QMC_CUDA)
+    if(ENABLE_CUDA)
       include(TestCUDAHostCompatibility)
     endif()
     find_package(CUDAToolkit 11.0 REQUIRED)
@@ -783,12 +759,12 @@ if(QMC_CUDA OR ENABLE_CUDA)
     set(HAVE_CUDA 1)
     message("Project CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
   endif()
-else(QMC_CUDA OR ENABLE_CUDA)
+else(ENABLE_CUDA)
   if(QMC_CUDA2HIP)
     message(FATAL_ERROR "QMC_CUDA2HIP requires QMC_CUDA=ON or ENABLE_CUDA=ON.")
   endif()
   message(STATUS "CUDA disabled")
-endif(QMC_CUDA OR ENABLE_CUDA)
+endif(ENABLE_CUDA)
 
 if(USE_NVTX_API)
   message(STATUS "Enabling use of CUDA NVTX APIs")

diff --git a/src/Estimators/tests/CMakeLists.txt b/src/Estimators/tests/CMakeLists.txt
@@ -33,13 +33,11 @@ set(SRCS
     test_InputSection.cpp
     test_EstimatorManagerInput.cpp
     test_ScalarEstimatorInputs.cpp
-    )
-
-# Tests incompatible with DiracDeterminantCUDA
-# DiracDeterminantsCUDA cannot be copied
-if(NOT QMC_CUDA)
-  set(SRCS ${SRCS} test_MomentumDistribution.cpp test_OneBodyDensityMatricesInput.cpp test_OneBodyDensityMatrices.cpp test_PerParticleHamiltonianLogger.cpp test_EstimatorManagerCrowd.cpp)
-endif()
+    test_MomentumDistribution.cpp
+    test_OneBodyDensityMatricesInput.cpp
+    test_OneBodyDensityMatrices.cpp
+    test_PerParticleHamiltonianLogger.cpp
+    test_EstimatorManagerCrowd.cpp)
 
 add_executable(${UTEST_EXE} ${SRCS})
 use_fake_rng(${UTEST_EXE})

diff --git a/src/Particle/CMakeLists.txt b/src/Particle/CMakeLists.txt
@@ -67,15 +67,6 @@ target_compile_options(qmcparticle_omptarget PRIVATE "$<$<BOOL:${ENABLE_OFFLOAD_
 
 target_link_libraries(qmcparticle PUBLIC qmcparticle_omptarget)
 
-if(QMC_CUDA)
-  add_library(qmcparticle_cuda accept_kernel.cu)
-  if(QMC_CUDA2HIP)
-    set_source_files_properties(accept_kernel.cu PROPERTIES LANGUAGE HIP)
-  endif()
-  target_link_libraries(qmcparticle_cuda PRIVATE platform_runtime)
-  target_link_libraries(qmcparticle PRIVATE qmcparticle_cuda)
-endif(QMC_CUDA)
-
 if(BUILD_UNIT_TESTS)
   add_subdirectory(tests)
   add_subdirectory(Lattice/tests)

diff --git a/src/Particle/InitMolecularSystem.cpp b/src/Particle/InitMolecularSystem.cpp
@@ -61,10 +61,8 @@ bool InitMolecularSystem::put(xmlNodePtr cur)
   else
     initMolecule(ions, els);
 
-#if !defined(QMC_CUDA)
   makeUniformRandom(els->spins);
   els->spins *= 2 * M_PI;
-#endif
 
   app_log() << "</init>" << std::endl;
   app_log().flush();