Skip to content

Commit

Permalink
5.5 cherry pick (#317)
Browse files Browse the repository at this point in the history
* HIP SDK compatibility (#306)

* Adding HIP SDK compatibility

* Update cmake_path function in rmake.py

* Fix typo

* Update copyright year

* Copy googlebenchmark DLL to rocRAND benchmark build folder (#302)

* Copy googlebenchmark DLL to rocRAND benchmark build folder

* Changing googlebenchmark build to static library to avoid DLL copy on Windows

* Remove manual DLL copy of googlebenchmark

* Fix toolchain-windows.cmake for HIP SDK (#313)

* Adding HIP SDK compatibility

* Update cmake_path function in rmake.py

* Fix typo

* Update copyright year

* Update toolchain-windows.cmake for HIP SDK support

* Take python3 by default

* Update changelog for 5.5 cherry picks

* Workaround for ROCm 5.5 performance regression (#311)

This is a fix for a performance regression observed when compiling rocRAND
legacy benchmarks using a recent version of rocm-llvm. This regression is
caused by commit ba0d079c7aa52bc0ae860d16dd4a33b0dc5cfff7 in upstream llvm,
and just so happens to cause problems for this kernel. This workaround reverts
to the old way that the stride is computed, however, this should in theory
be less efficient than using gridDim and blockDim.

---------

Co-authored-by: Robin Voetter <robin@streamhpc.com>
  • Loading branch information
stanleytsang-amd and Snektron committed Mar 6, 2023
1 parent 88e3bb2 commit c6a084e
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 28 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -9,6 +9,8 @@ Full documentation for rocRAND is available at [https://rocrand.readthedocs.io/e
- ThreeFry pseudorandom number generator based on Salmon et al., 2011, "Parallel random numbers: as easy as 1, 2, 3".
### Changed
- Python 2.7 is no longer officially supported.
### Fixed
- Windows HIP SDK support

## rocRAND-2.10.16 for ROCm 5.4.0
### Added
Expand Down
2 changes: 1 addition & 1 deletion benchmark/CMakeLists.txt
Expand Up @@ -96,7 +96,7 @@ function(add_rocrand_benchmark benchmark_src legacy)
# for now adding in all .dll as dependency chain is not cmake based on win32
file( GLOB third_party_dlls
LIST_DIRECTORIES ON
CONFIGURE_DEPENDS
CONFIGURE_DEPENDS
${HIP_DIR}/bin/*.dll
${CMAKE_SOURCE_DIR}/rtest.*
)
Expand Down
13 changes: 11 additions & 2 deletions benchmark/benchmark_rocrand_kernel.cpp
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -94,7 +94,16 @@ void generate_kernel(GeneratorState * states,
const Extra extra)
{
const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int stride = gridDim.x * blockDim.x;

// Using gridDim.x * blockDim.x should actually a performance improvement, however, this kernel
// just so happen to use an unfortunate amount of registers that the changes introduced in
// https://github.com/llvm/llvm-project/commit/ba0d079c7aa52bc0ae860d16dd4a33b0dc5cfff7,
// cause adverse code generation that degrades performance.
#ifdef USE_HIP_CPU
const unsigned int stride = gridDim.x * blockDim.x;
#else
const unsigned int stride = hipGridDim_x * hipBlockDim_x;
#endif

GeneratorState state = states[state_id];
unsigned int index = state_id;
Expand Down
4 changes: 2 additions & 2 deletions cmake/Dependencies.cmake
@@ -1,6 +1,6 @@
# MIT License
#
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved.
# Copyright (c) 2018-2023 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -171,7 +171,7 @@ if(BUILD_BENCHMARK)
GIT_REPOSITORY https://github.com/google/benchmark.git
GIT_TAG v1.6.1
INSTALL_DIR ${GOOGLEBENCHMARK_ROOT}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS} -DBENCHMARK_ENABLE_TESTING=OFF -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> -DCMAKE_CXX_STANDARD=14 ${COMPILER_OVERRIDE}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DBUILD_SHARED_LIBS=OFF -DBENCHMARK_ENABLE_TESTING=OFF -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> -DCMAKE_CXX_STANDARD=14 ${COMPILER_OVERRIDE}
LOG_DOWNLOAD TRUE
LOG_CONFIGURE TRUE
LOG_BUILD TRUE
Expand Down
11 changes: 9 additions & 2 deletions rmake.py
@@ -1,5 +1,5 @@
#!/usr/bin/python3
"""Copyright 2020-2022 Advanced Micro Devices, Inc. All rights reserved.
"""Copyright 2020-2023 Advanced Micro Devices, Inc. All rights reserved.
Manage build and installation"""

import re
Expand Down Expand Up @@ -75,6 +75,12 @@ def delete_dir(dir_path) :
#print( linux_path )
run_cmd( "rm" , f"-rf {linux_path}")

def cmake_path(os_path):
if OS_info["ID"] == "windows":
return os_path.replace("\\", "/")
else:
return os.path.realpath(os_path)

def config_cmd():
global args
global OS_info
Expand All @@ -88,7 +94,8 @@ def config_cmd():

if (OS_info["ID"] == 'windows'):
# we don't have ROCM on windows but have hip, ROCM can be downloaded if required
rocm_path = os.getenv( 'ROCM_CMAKE_PATH', "C:/github/rocm-cmake-master") #C:/hip") # rocm/Utils/cmake-rocm4.2.0"
raw_rocm_path = cmake_path(os.getenv('HIP_PATH', "C:/hip"))
rocm_path = f'"{raw_rocm_path}"' # guard against spaces in path
cmake_executable = "cmake.exe"
toolchain = os.path.join( src_path, "toolchain-windows.cmake" )
#set CPACK_PACKAGING_INSTALL_PREFIX= defined as blank as it is appended to end of path for archive creation
Expand Down
31 changes: 10 additions & 21 deletions toolchain-windows.cmake
Expand Up @@ -3,47 +3,36 @@
# Ninja doesn't support platform
#set(CMAKE_GENERATOR_PLATFORM x64)

if (DEFINED ENV{HIP_DIR})
if (DEFINED ENV{HIP_PATH})
file(TO_CMAKE_PATH "$ENV{HIP_PATH}" HIP_DIR)
set(rocm_bin "${HIP_DIR}/bin")
elseif (DEFINED ENV{HIP_DIR})
file(TO_CMAKE_PATH "$ENV{HIP_DIR}" HIP_DIR)
set(rocm_bin "${HIP_DIR}/bin")
else()
set(HIP_DIR "C:/hip")
set(rocm_bin "C:/hip/bin")
endif()

#set(CMAKE_CXX_COMPILER "${rocm_bin}/hipcc.bat")
#set(CMAKE_C_COMPILER "${rocm_bin}/hipcc.bat")
set(CMAKE_CXX_COMPILER "${rocm_bin}/clang++.exe")
set(CMAKE_C_COMPILER "${rocm_bin}/clang.exe")

#set(CMAKE_CXX_LINKER "${rocm_bin}/hipcc.bat" )

# TODO remove, just to speed up slow cmake
set(CMAKE_C_COMPILER_WORKS 1)
set(CMAKE_CXX_COMPILER_WORKS 1)
#
if (NOT python)
set(python "python3") # take default for windows
endif()

#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -IC:/hip/include -IC:/hip/lib/clang/12.0.0 -DWIN32 -D_CRT_SECURE_NO_WARNINGS")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_DIR}/include -DWIN32 -D_CRT_SECURE_NO_WARNINGS")
# our usage flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWIN32 -D_CRT_SECURE_NO_WARNINGS")

# flags for clang direct use
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -fms-extensions -fms-compatibility")
# -Wno-ignored-attributes to avoid warning: __declspec attribute 'dllexport' is not supported [-Wignored-attributes] which is used by msvc compiler
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -fms-extensions -fms-compatibility -Wno-ignored-attributes")

# flags for clang direct use with hip
# -x hip causes linker error
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -x hip -IC:/hip/include/hip -D__HIP_PLATFORM_HCC__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_PATH}/include/hip -D__HIP_PLATFORM_HCC__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1")

if (DEFINED ENV{VCPKG_PATH})
file(TO_CMAKE_PATH "$ENV{VCPKG_PATH}" VCPKG_PATH)
else()
set(VCPKG_PATH "C:/github/vcpkg")
endif()
include("${VCPKG_PATH}/scripts/buildsystems/vcpkg.cmake")
# set(GTEST_DIR "C:/rocm/Utils/GTestMSVC")
# set(GTEST_INCLUDE_DIR "${GTEST_DIR}/include")
# set(GTEST_LIBRARY "${GTEST_DIR}/lib/Release/gtest.lib")
# set(GTEST_MAIN_LIBRARY "${GTEST_DIR}/lib/Release/gtest_main.lib")
# set(GTEST_LIBRARIES "${GTEST_DIR}/lib/Release/gtest.lib;${GTEST_DIR}/lib/Release/gtest_main.lib")

0 comments on commit c6a084e

Please sign in to comment.