Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
Documentation for TransferBench is available at
[https://rocm.docs.amd.com/projects/TransferBench](https://rocm.docs.amd.com/projects/TransferBench).

## v1.66.01
## Fixed
- Adding support for TheRock
- Fixing parsing issue when using NULL memory type
- Fixing CUAD compilation flags when enabling NIC/MPI
## Modified
- TransferBenchCuda must now be explicitly built with via 'make TransferBenchCuda'

## v1.66.00
### Added
- Adding multi-node support
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ if (NOT CMAKE_TOOLCHAIN_FILE)
message(STATUS "CMAKE_TOOLCHAIN_FILE: ${CMAKE_TOOLCHAIN_FILE}")
endif()

set(VERSION_STRING "1.66.00")
set(VERSION_STRING "1.66.01")
project(TransferBench VERSION ${VERSION_STRING} LANGUAGES CXX)

## Load CMake modules
Expand Down
35 changes: 20 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,21 @@ SINGLE_KERNEL ?= 0
# Default is the native GPU target
GPU_TARGETS ?= native

EXE=TransferBench
DEBUG ?= 0

# Only perform this check if 'make clean' is not the target
ifeq ($(filter clean,$(MAKECMDGOALS)),)
# Compile TransferBenchCuda if nvidia-smi returns successfully and nvcc detected
ifeq ("$(shell nvidia-smi > /dev/null 2>&1 && test -e $(NVCC) && echo found)", "found")
EXE=TransferBenchCuda
CXX=$(NVCC)
ifeq ($(MAKECMDGOALS),TransferBenchCuda)
# Check for nvcc
ifneq ($(shell test -e $(NVCC) && echo found), found)
$(error "Could not find $(NVCC). Please set CUDA_PATH appropriately")
else
$(info Compiling TransferBenchCuda using $(NVCC))
endif
NVFLAGS = -x cu -lnuma -arch=native
else
EXE=TransferBench
# Check for HIP compiler
ifeq ("$(shell test -e $(HIPCC) && echo found)", "found")
CXX=$(HIPCC)
else ifeq ("$(shell test -e $(ROCM_PATH)/bin/hipcc && echo found)", "found")
Expand All @@ -44,18 +50,17 @@ ifeq ($(filter clean,$(MAKECMDGOALS)),)
$(error "Could not find $(HIPCC) or $(ROCM_PATH)/bin/hipcc. Check if the path is correct if you want to build $(EXE)")
endif
GPU_TARGETS_FLAGS = $(foreach target,$(GPU_TARGETS),"--offload-arch=$(target)")
endif

CXXFLAGS = -I$(ROCM_PATH)/include -I$(ROCM_PATH)/include/hip -I$(ROCM_PATH)/include/hsa
HIPLDFLAGS= -lnuma -L$(ROCM_PATH)/lib -lhsa-runtime64 -lamdhip64
HIPFLAGS = -Wall -x hip -D__HIP_PLATFORM_AMD__ -D__HIPCC__ $(GPU_TARGETS_FLAGS)
ifneq ($(strip $(ROCM_DEVICE_LIB_PATH)),)
HIPFLAGS += --rocm-device-lib-path=$(ROCM_DEVICE_LIB_PATH)
CXXFLAGS = -I$(ROCM_PATH)/include -I$(ROCM_PATH)/include/hip -I$(ROCM_PATH)/include/hsa
HIPLDFLAGS= -lnuma -L$(ROCM_PATH)/lib -lhsa-runtime64 -lamdhip64
HIPFLAGS = -Wall -x hip -D__HIP_PLATFORM_AMD__ -D__HIPCC__ $(GPU_TARGETS_FLAGS)
ifneq ($(strip $(ROCM_DEVICE_LIB_PATH)),)
HIPFLAGS += --rocm-device-lib-path=$(ROCM_DEVICE_LIB_PATH)
endif
endif
NVFLAGS = -x cu -lnuma -arch=native

ifeq ($(SINGLE_KERNEL), 1)
CXXFLAGS += -DSINGLE_KERNEL
COMMON_FLAGS += -DSINGLE_KERNEL
endif

ifeq ($(DEBUG), 0)
Expand All @@ -79,7 +84,7 @@ ifeq ($(filter clean,$(MAKECMDGOALS)),)
else ifeq ("$(shell echo '#include <infiniband/verbs.h>' | $(CXX) -E - 2>/dev/null | grep -c 'infiniband/verbs.h')", "0")
$(info infiniband/verbs.h not found)
else
CXXFLAGS += -DNIC_EXEC_ENABLED
COMMON_FLAGS += -DNIC_EXEC_ENABLED
LDFLAGS += -libverbs
NIC_ENABLED = 1
endif
Expand All @@ -101,7 +106,7 @@ ifeq ($(filter clean,$(MAKECMDGOALS)),)
$(info Unable to find mpi.h at $(MPI_PATH)/include. Please specify appropriate MPI_PATH)
else
MPI_ENABLED = 1
CXXFLAGS += -DMPI_COMM_ENABLED -I$(MPI_PATH)/include
COMMON_FLAGS += -DMPI_COMM_ENABLED -I$(MPI_PATH)/include
LDFLAGS += -L/$(MPI_PATH)/lib -lmpi
ifeq ($(DEBUG), 1)
LDFLAGS += -lmpi_cxx
Expand Down
2 changes: 1 addition & 1 deletion src/client/EnvVars.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ THE SOFTWARE.
#include <random>
#include <time.h>

#define CLIENT_VERSION "00"
#define CLIENT_VERSION "01"

#include "TransferBench.hpp"
using namespace TransferBench;
Expand Down
7 changes: 6 additions & 1 deletion src/header/TransferBench.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1176,14 +1176,19 @@ namespace {
}

// Parse memory type
ERR_CHECK(CharToMemType(*ptr, w.memType));
ErrResult err = CharToMemType(*ptr, w.memType);
if (err.errType != ERR_NONE) {
return {err.errType, "Error parsing token [%s]: %s\n", token.c_str(), err.errMsg.c_str()};
}
ptr++; // Skip memory type

// Parse memory index
if (w.memType != MEM_NULL) {
ptr = ParseRange(ptr, -1, w.memIndices);
if (!ptr) return {ERR_FATAL, "Unable to parse device index in memory token %s", token.c_str()};
memDevices.push_back(w);
} else {
break;
}
}
return ERR_NONE;
Expand Down