Skip to content

Commit

Permalink
RCCL Testing
Browse files Browse the repository at this point in the history
- updated tests to use configuration files
- many tests generate a configuration file
- tests how have GPU option
- enable ncclCommCount, disable ncclGetVersion
- add testing for RCCLP via rccl-tests
- working directory of tests is PROJECT_BINARY_DIR
- add nccl/rccl functions to get_whole_function_names
- some clang compiler fixes
  • Loading branch information
jrmadsen committed Jul 25, 2022
1 parent 20c28e3 commit e71c467
Show file tree
Hide file tree
Showing 11 changed files with 285 additions and 33 deletions.
1 change: 1 addition & 0 deletions .cmake-format.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ parse:
NAME: '*'
TARGET: '*'
MPI: '*'
GPU: '*'
NUM_PROCS: '*'
REWRITE_TIMEOUT: '*'
RUNTIME_TIMEOUT: '*'
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/opensuse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,5 +121,6 @@ jobs:
with:
name: data-${{ github.job }}-files
path: |
build/omnitrace-tests-config/*.cfg
build/omnitrace-tests-output/**/*.txt
build/omnitrace-tests-output/**/*-instr*.json
1 change: 1 addition & 0 deletions .github/workflows/ubuntu-bionic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -150,5 +150,6 @@ jobs:
with:
name: data-${{ github.job }}-files
path: |
build/omnitrace-tests-config/*.cfg
build/omnitrace-tests-output/**/*.txt
build/omnitrace-tests-output/**/*-instr*.json
3 changes: 3 additions & 0 deletions .github/workflows/ubuntu-focal.yml
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ jobs:
with:
name: data-${{ github.job }}-files
path: |
build/omnitrace-tests-config/*.cfg
build/omnitrace-tests-output/**/*.txt
build/omnitrace-tests-output/**/*-instr*.json
Expand Down Expand Up @@ -322,6 +323,7 @@ jobs:
name: data-${{ github.job }}-files
path: |
omnitrace-tests-output/**/*.txt
build/omnitrace-tests-config/*.cfg
build/omnitrace-tests-output/**/*.txt
build/omnitrace-tests-output/**/*-instr*.json
Expand Down Expand Up @@ -474,5 +476,6 @@ jobs:
with:
name: data-${{ github.job }}-files
path: |
${{ github.workspace }}/build/omnitrace-tests-config/*.cfg
${{ github.workspace }}/build/omnitrace-tests-output/**/*.txt
${{ github.workspace }}/build/omnitrace-tests-output/**/*-instr*.json
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ omnitrace_add_option(OMNITRACE_USE_ROCPROFILER "Enable rocprofiler support"
omnitrace_add_option(
OMNITRACE_USE_ROCM_SMI "Enable rocm-smi support for power/temp/etc. sampling"
${OMNITRACE_USE_HIP})
omnitrace_add_option(OMNITRACE_USE_RCCL "Enable RCCL support" ${OMNITRACE_USE_HIP})
omnitrace_add_option(OMNITRACE_USE_RCCL "Enable RCCL support" ON)
omnitrace_add_option(OMNITRACE_USE_MPI_HEADERS
"Enable wrapping MPI functions w/o enabling MPI dependency" ON)
omnitrace_add_option(OMNITRACE_USE_OMPT "Enable OpenMP tools support" ON)
Expand Down
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ add_subdirectory(openmp)
add_subdirectory(mpi)
add_subdirectory(python)
add_subdirectory(lulesh)
add_subdirectory(rccl)
61 changes: 61 additions & 0 deletions examples/rccl/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)

project(omnitrace-rccl-example LANGUAGES CXX)

find_package(rccl)
find_package(hip HINTS ${ROCmVersion_DIR} PATHS ${ROCmVersion_DIR})

function(rccl_message)
if("${CMAKE_PROJECT_NAME}" STREQUAL "omnitrace")
omnitrace_message(${ARGN})
else()
message(${ARGN})
endif()
endfunction()

if(hip_FOUND AND rccl_FOUND)
include(FetchContent)
fetchcontent_declare(
rccl-tests GIT_REPOSITORY https://github.com/ROCmSoftwarePlatform/rccl-tests.git)

# After the following call, the CMake targets defined by googletest and Catch2 will be
# available to the rest of the build
fetchcontent_makeavailable(rccl-tests)

get_filename_component(rccl_ROOT_DIR "${rccl_INCLUDE_DIR}" DIRECTORY)

rccl_message(STATUS "Building rccl-tests...")
execute_process(
COMMAND make HIP_HOME=${ROCM_PATH} RCCL_HOME=${rccl_ROOT_DIR}
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/_deps/rccl-tests-src
RESULT_VARIABLE _RCCL_BUILD_RET
ERROR_VARIABLE _RCCL_BUILD_ERR
OUTPUT_VARIABLE _RCCL_BUILD_OUT
OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_STRIP_TRAILING_WHITESPACE)

if(NOT _RCCL_BUILD_RET EQUAL 0)
rccl_message(${_RCCL_BUILD_OUT})
rccl_message(AUTHOR_WARNING "Failed to build rccl-tests: ${_RCCL_BUILD_ERR}")
else()
file(GLOB RCCL_TEST_EXECUTABLES
${CMAKE_BINARY_DIR}/_deps/rccl-tests-src/build/*_perf)
set(_RCCL_TEST_TARGETS)

foreach(_EXE ${RCCL_TEST_EXECUTABLES})
get_filename_component(_EXE_NAME "${_EXE}" NAME)
execute_process(COMMAND ${CMAKE_COMMAND} -E copy ${_EXE}
${CMAKE_CURRENT_BINARY_DIR}/${_EXE_NAME})
add_executable(rccl-tests::${_EXE_NAME} IMPORTED GLOBAL)
set_property(
TARGET rccl-tests::${_EXE_NAME}
PROPERTY IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/${_EXE_NAME})
list(APPEND _RCCL_TEST_TARGETS "rccl-tests::${_EXE_NAME}")
endforeach()

set(RCCL_TEST_TARGETS
"${_RCCL_TEST_TARGETS}"
CACHE INTERNAL "rccl-test targets")
endif()
else()
rccl_message(AUTHOR_WARNING "${PROJECT_NAME} skipped. Missing RCCL and/or HIP...")
endif()
13 changes: 8 additions & 5 deletions examples/transpose/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,16 @@ endif()

add_executable(transpose transpose.cpp)

if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang"
AND NOT CMAKE_CXX_COMPILER_IS_HIPCC
AND NOT HIPCC_EXECUTABLE)
target_link_libraries(
transpose
PRIVATE
$<IF:$<TARGET_EXISTS:omnitrace::omnitrace-compile-options>,omnitrace::omnitrace-compile-options,>
$<IF:$<TARGET_EXISTS:hip::host>,hip::host,>
$<IF:$<TARGET_EXISTS:hip::device>,hip::device,>)
PRIVATE $<TARGET_NAME_IF_EXISTS:omnitrace::omnitrace-compile-options>
$<TARGET_NAME_IF_EXISTS:hip::host> $<TARGET_NAME_IF_EXISTS:hip::device>)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
target_link_libraries(
transpose PRIVATE $<TARGET_NAME_IF_EXISTS:omnitrace::omnitrace-compile-options>)
else()
target_compile_options(transpose PRIVATE -W -Wall)
endif()
Expand Down
8 changes: 7 additions & 1 deletion source/bin/omnitrace/details.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,13 @@ get_whole_function_names()
"rocr::core::BusyWaitSignal::WaitAcquire",
"rocr::core::BusyWaitSignal::WaitRelaxed", "rocr::HSA::hsa_signal_wait_scacquire",
"rocr::os::ThreadTrampoline", "rocr::image::ImageRuntime::CreateImageManager",
"rocr::AMD::GpuAgent::GetInfo", "rocr::HSA::hsa_agent_get_info", "event_base_loop"
"rocr::AMD::GpuAgent::GetInfo", "rocr::HSA::hsa_agent_get_info",
"event_base_loop", "bootstrapRoot", "bootstrapNetAccept", "ncclCommInitRank",
"ncclCommInitAll", "ncclCommDestroy", "ncclCommCount", "ncclCommCuDevice",
"ncclCommUserRank", "ncclReduce", "ncclBcast", "ncclBroadcast", "ncclAllReduce",
"ncclReduceScatter", "ncclAllGather", "ncclGroupStart", "ncclGroupEnd",
"ncclSend", "ncclRecv", "ncclGather", "ncclScatter", "ncclAllToAll",
"ncclAllToAllv"
};
#else
// should hopefully be removed soon
Expand Down
4 changes: 2 additions & 2 deletions source/lib/omnitrace/library/components/rcclp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,12 @@ configure_rcclp(const std::set<std::string>& permit, const std::set<std::string>
{
// generate the gotcha wrappers
rcclp_gotcha_t::get_initializer() = []() {
TIMEMORY_C_GOTCHA(rcclp_gotcha_t, 0, ncclGetVersion);
// TIMEMORY_C_GOTCHA(rcclp_gotcha_t, 0, ncclGetVersion);
// TIMEMORY_C_GOTCHA(rcclp_gotcha_t, 1, ncclGetUniqueId);
TIMEMORY_C_GOTCHA(rcclp_gotcha_t, 2, ncclCommInitRank);
TIMEMORY_C_GOTCHA(rcclp_gotcha_t, 3, ncclCommInitAll);
TIMEMORY_C_GOTCHA(rcclp_gotcha_t, 4, ncclCommDestroy);
// TIMEMORY_C_GOTCHA(rcclp_gotcha_t, 5, ncclCommCount);
TIMEMORY_C_GOTCHA(rcclp_gotcha_t, 5, ncclCommCount);
TIMEMORY_C_GOTCHA(rcclp_gotcha_t, 6, ncclCommCuDevice);
TIMEMORY_C_GOTCHA(rcclp_gotcha_t, 7, ncclCommUserRank);
TIMEMORY_C_GOTCHA(rcclp_gotcha_t, 8, ncclReduce);
Expand Down
Loading

0 comments on commit e71c467

Please sign in to comment.