Skip to content

Commit

Permalink
Merge pull request #2559 from ye-luo/remove-workaround
Browse files Browse the repository at this point in the history
Remove Clang bug workaround and remove hard-coded CUDA_PROPAGATE_HOST_FLAGS for Clang
  • Loading branch information
ye-luo committed Jun 30, 2020
2 parents 108879d + 50101ea commit 6543398
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 7 deletions.
8 changes: 3 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -686,11 +686,9 @@ ENDIF()
#-------------------------------------------------------------------
SET(HAVE_CUDA 0)
IF(QMC_CUDA OR ENABLE_CUDA)
IF(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
SET(CUDA_PROPAGATE_HOST_FLAGS ON)
ELSE(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
ENDIF(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
# FindCUDA default CUDA_PROPAGATE_HOST_FLAGS to ON but we prefer OFF
# It happened -ffast-math from host caused numerical issue in CUDA kernels.
OPTION(CUDA_PROPAGATE_HOST_FLAGS "Propagate C/CXX_FLAGS and friends to the host compiler via -Xcompile" OFF)
FIND_PACKAGE(CUDA REQUIRED)
SET(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
#set(CUDA_NVCC_FLAGS
Expand Down
4 changes: 2 additions & 2 deletions src/QMCWaveFunctions/BsplineFactory/SplineC2ROMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ void SplineC2ROMP<ST>::evaluateDetRatios(const VirtualParticleSet& VP,
ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c);

TT sum(0);
PRAGMA_OFFLOAD("omp parallel firstprivate(first, last, first_cplx, last_cplx, first_real, last_real, offload_scratch_iat_ptr, psi_iat_ptr, pos_scratch, psiinv_ptr)")
PRAGMA_OFFLOAD("omp parallel")
{
spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, a, b, c, offload_scratch_iat_ptr + first, first,
last);
Expand Down Expand Up @@ -499,7 +499,7 @@ void SplineC2ROMP<ST>::mw_evaluateDetRatios(const RefVector<SPOSet>& spo_list,
ST(pos_scratch[iat * 6 + 5]), ix, iy, iz, a, b, c);

TT sum(0);
PRAGMA_OFFLOAD("omp parallel firstprivate(first, last, first_cplx, last_cplx, first_real, last_real, offload_scratch_iat_ptr, psi_iat_ptr, pos_scratch, psiinv_ptr)")
PRAGMA_OFFLOAD("omp parallel")
{
spline2offload::evaluate_v_impl_v2(spline_ptr, ix, iy, iz, a, b, c, offload_scratch_iat_ptr + first, first,
last);
Expand Down

0 comments on commit 6543398

Please sign in to comment.