diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index c553c1ed0b54a..e4d388aaf40db 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -49,6 +49,13 @@ if(NOT WIN32 AND WITH_GPU) endif() endif() +if(WITH_ROCM) + set(WARPCTC_PATHCH_ROCM_COMMAND + patch -p1 < + ${PADDLE_SOURCE_DIR}/patches/warpctc/CMakeLists.txt.rocm.patch && patch + -p1 < ${PADDLE_SOURCE_DIR}/patches/warpctc/devicetypes.cuh.patch) +endif() + set(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE) @@ -100,7 +107,10 @@ ExternalProject_Add( SOURCE_DIR ${SOURCE_DIR} PREFIX ${WARPCTC_PREFIX_DIR} UPDATE_COMMAND "" - PATCH_COMMAND ${WARPCTC_PATCH_COMMAND} ${WARPCTC_PATCH_CUDA_COMMAND} + PATCH_COMMAND + COMMAND ${WARPCTC_PATCH_COMMAND} + COMMAND ${WARPCTC_PATCH_CUDA_COMMAND} + COMMAND ${WARPCTC_PATHCH_ROCM_COMMAND} #BUILD_ALWAYS 1 CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} diff --git a/cmake/external/warprnnt.cmake b/cmake/external/warprnnt.cmake index 0e000eafe1552..29ef5c12d90db 100644 --- a/cmake/external/warprnnt.cmake +++ b/cmake/external/warprnnt.cmake @@ -35,7 +35,11 @@ else() ${SOURCE_DIR} < ${PADDLE_SOURCE_DIR}/patches/warprnnt/CMakeLists.txt.cuda.patch) endif() - +if(WITH_ROCM) + set(WARPRNNT_PATCH_ROCM_COMMAND + patch -p1 < + ${PADDLE_SOURCE_DIR}/patches/warprnnt/CMakeLists.txt.rocm.patch) +endif() if(NOT WIN32 AND WITH_GPU) if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 12.0) @@ -99,7 +103,9 @@ ExternalProject_Add( SOURCE_DIR ${SOURCE_DIR} PREFIX ${WARPRNNT_PREFIX_DIR} UPDATE_COMMAND "" - PATCH_COMMAND ${WARPCTC_PATCH_CUDA_COMMAND} + PATCH_COMMAND + COMMAND ${WARPCTC_PATCH_CUDA_COMMAND} + COMMAND ${WARPRNNT_PATCH_ROCM_COMMAND} #BUILD_ALWAYS 1 CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 8279f83369ca8..563a3181b0114 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -92,8 +92,13 @@ macro(safe_set_nvflag flag_name) check_c_compiler_flag(${flag_name} C_COMPILER_SUPPORT_FLAG_${safe_name}) set(safe_name C_COMPILER_SUPPORT_FLAG_${safe_name}) if(${safe_name}) - set(SAFE_GPU_COMMON_FLAGS - "${SAFE_GPU_COMMON_FLAGS} -Xcompiler=\"${flag_name}\"") + if(WITH_ROCM) + set(SAFE_GPU_COMMON_FLAGS + "${SAFE_GPU_COMMON_FLAGS} -Xcompiler \"${flag_name}\"") + else() + set(SAFE_GPU_COMMON_FLAGS + "${SAFE_GPU_COMMON_FLAGS} -Xcompiler=\"${flag_name}\"") + endif() endif() endmacro() @@ -279,6 +284,7 @@ endif() # Disable -Werror, otherwise the compile will fail for rocblas_gemm_ex if(WITH_ROCM) + string(REPLACE "-Werror" "-Wno-error" HIP_HIPCC_FLAGS ${HIP_HIPCC_FLAGS}) string(REPLACE "-Werror" "-Wno-error" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) string(REPLACE "-Werror" "-Wno-error" CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) endif() diff --git a/cmake/hip.cmake b/cmake/hip.cmake index fa62f5798b15a..6efed5b468576 100644 --- a/cmake/hip.cmake +++ b/cmake/hip.cmake @@ -24,6 +24,7 @@ else() CACHE PATH "Path to which clang has been installed") endif() set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) +set(CMAKE_PREFIX_PATH "${ROCM_PATH}" ${CMAKE_PREFIX_PATH}) find_package(HIP REQUIRED) include_directories(${ROCM_PATH}/include) @@ -123,6 +124,17 @@ list(APPEND HIP_CXX_FLAGS -Wno-switch) list(APPEND HIP_CXX_FLAGS -Wno-literal-conversion) list(APPEND HIP_CXX_FLAGS -Wno-constant-conversion) list(APPEND HIP_CXX_FLAGS -Wno-defaulted-function-deleted) +list(APPEND HIP_CXX_FLAGS -Wno-sign-compare) +list(APPEND HIP_CXX_FLAGS -Wno-bitwise-instead-of-logical) +list(APPEND HIP_CXX_FLAGS -Wno-unknown-warning-option) +list(APPEND HIP_CXX_FLAGS -Wno-unused-lambda-capture) +list(APPEND HIP_CXX_FLAGS -Wno-unused-variable) +list(APPEND HIP_CXX_FLAGS -Wno-unused-but-set-variable) +list(APPEND HIP_CXX_FLAGS -Wno-reorder-ctor) +list(APPEND HIP_CXX_FLAGS -Wno-deprecated-copy-with-user-provided-copy) +list(APPEND HIP_CXX_FLAGS -Wno-unused-local-typedef) +list(APPEND HIP_CXX_FLAGS -Wno-missing-braces) +list(APPEND HIP_CXX_FLAGS -Wno-sometimes-uninitialized) if(WITH_CINN) list(APPEND HIP_CXX_FLAGS -std=c++14) diff --git a/paddle/fluid/framework/var_type_traits.h b/paddle/fluid/framework/var_type_traits.h index 3751118915e9a..7041b7d63f736 100644 --- a/paddle/fluid/framework/var_type_traits.h +++ b/paddle/fluid/framework/var_type_traits.h @@ -37,7 +37,7 @@ #ifdef PADDLE_WITH_HIP #include #ifdef PADDLE_WITH_RCCL -#include +#include #endif #endif diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index c3e51e508b103..67ad8d5793d83 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -33,10 +33,13 @@ if(WITH_GPU OR WITH_ROCM) list(APPEND ALLOCATOR_DEPS cuda_device_guard gpu_info dynload_cuda) endif() -if(WITH_GPU) - list(APPEND ALLOCATOR_DEPS phi common) +if(WITH_ROCM) + list(APPEND ALLOCATOR_DEPS ${ROCM_HIPRTC_LIB}) endif() +if(WITH_GPU OR WITH_ROCM) + list(APPEND ALLOCATOR_DEPS phi common) +endif() if(CUDA_VERSION VERSION_GREATER_EQUAL 10.2) list(APPEND ALLOCATOR_SRCS cuda_virtual_mem_allocator.cc) endif() diff --git a/paddle/fluid/platform/dynload/rccl.h b/paddle/fluid/platform/dynload/rccl.h index 4d988e4fb08a0..90d998972da0d 100644 --- a/paddle/fluid/platform/dynload/rccl.h +++ b/paddle/fluid/platform/dynload/rccl.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include +#include #include // NOLINT diff --git a/paddle/fluid/platform/dynload/rocblas.h b/paddle/fluid/platform/dynload/rocblas.h index 5cec6fb48798b..ec3f2cadb7eb7 100644 --- a/paddle/fluid/platform/dynload/rocblas.h +++ b/paddle/fluid/platform/dynload/rocblas.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include -#include +#include #include // NOLINT #include diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index 03467d175c78f..12da6f72eb030 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -39,9 +39,9 @@ limitations under the License. */ #endif // PADDLE_WITH_CUDA #ifdef PADDLE_WITH_HIP -#include +#include #include -#include +#include #include #include // NOLINT #endif diff --git a/paddle/phi/backends/dynload/hipfft.h b/paddle/phi/backends/dynload/hipfft.h index 45e5a2a473d2a..84a8edba0ae01 100644 --- a/paddle/phi/backends/dynload/hipfft.h +++ b/paddle/phi/backends/dynload/hipfft.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once #ifdef PADDLE_WITH_HIP -#include +#include #include // NOLINT diff --git a/paddle/phi/backends/dynload/hiprand.h b/paddle/phi/backends/dynload/hiprand.h index 038b01eb7de5f..f7780a6a6287a 100644 --- a/paddle/phi/backends/dynload/hiprand.h +++ b/paddle/phi/backends/dynload/hiprand.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include +#include #include // NOLINT diff --git a/paddle/phi/backends/dynload/rccl.h b/paddle/phi/backends/dynload/rccl.h index 9d3a49bce9624..1c75ae89817b8 100644 --- a/paddle/phi/backends/dynload/rccl.h +++ b/paddle/phi/backends/dynload/rccl.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include +#include #include // NOLINT diff --git a/paddle/phi/backends/dynload/rocblas.h b/paddle/phi/backends/dynload/rocblas.h index 19df156b086a0..d9e928c3174df 100644 --- a/paddle/phi/backends/dynload/rocblas.h +++ b/paddle/phi/backends/dynload/rocblas.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include -#include +#include #include // NOLINT #include diff --git a/paddle/phi/core/enforce.h b/paddle/phi/core/enforce.h index 8ffeb74896ec6..70cc8213956ca 100644 --- a/paddle/phi/core/enforce.h +++ b/paddle/phi/core/enforce.h @@ -24,9 +24,9 @@ limitations under the License. */ #endif // PADDLE_WITH_CUDA #ifdef PADDLE_WITH_HIP -#include +#include #include -#include +#include #include #include // NOLINT #endif diff --git a/patches/warpctc/CMakeLists.txt.rocm.patch b/patches/warpctc/CMakeLists.txt.rocm.patch new file mode 100644 index 0000000000000..9fc35f2c14afc --- /dev/null +++ b/patches/warpctc/CMakeLists.txt.rocm.patch @@ -0,0 +1,10 @@ +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -24,6 +24,7 @@ option(BUILD_SHARED "build warp-ctc shared library." ON) + option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF) + + if(WITH_ROCM) ++ list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}) + add_definitions(-DWARPCTC_WITH_HIP) + include(hip) + endif(WITH_ROCM) \ No newline at end of file diff --git a/patches/warpctc/devicetypes.cuh.patch b/patches/warpctc/devicetypes.cuh.patch new file mode 100644 index 0000000000000..137da63b0fb04 --- /dev/null +++ b/patches/warpctc/devicetypes.cuh.patch @@ -0,0 +1,13 @@ +--- a/include/contrib/moderngpu/include/device/devicetypes.cuh ++++ b/include/contrib/moderngpu/include/device/devicetypes.cuh +@@ -207,10 +207,6 @@ MGPU_HOST_DEVICE int2& operator+=(int2& a, int2 b) { + MGPU_HOST_DEVICE int2 operator*(int2 a, int2 b) { + return make_int2(a.x * b.x, a.y * b.y); + } +-MGPU_HOST_DEVICE int2& operator*=(int2& a, int2 b) { +- a = a * b; +- return a; +-} + + template + MGPU_HOST_DEVICE T max(T a, T b) { \ No newline at end of file diff --git a/patches/warprnnt/CMakeLists.txt.rocm.patch b/patches/warprnnt/CMakeLists.txt.rocm.patch new file mode 100644 index 0000000000000..b5efd34aede42 --- /dev/null +++ b/patches/warprnnt/CMakeLists.txt.rocm.patch @@ -0,0 +1,10 @@ +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -39,6 +39,7 @@ option(BUILD_SHARED "build warp-rnnt shared library." ON) + option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF) + + if(WITH_ROCM) ++ list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}) + add_definitions(-DWARPRNNT_WITH_HIP) + include(hip) + endif(WITH_ROCM)