Skip to content

Commit

Permalink
Add extension for kernel concurrency on same stream
Browse files Browse the repository at this point in the history
  • Loading branch information
gargrahul committed Mar 6, 2019
1 parent 2d67bc5 commit 59081c6
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 10 deletions.
12 changes: 11 additions & 1 deletion include/hip/hip_hcc.h
Expand Up @@ -89,13 +89,23 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view** a
* HIP/ROCm actually updates the start event when the associated kernel completes.
*/
hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
uint32_t localWorkSizeZ, size_t sharedMemBytes,
hipStream_t hStream, void** kernelParams, void** extra,
hipEvent_t startEvent = nullptr,
hipEvent_t stopEvent = nullptr,
uint32_t flags = 0);

hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
uint32_t localWorkSizeZ, size_t sharedMemBytes,
hipStream_t hStream, void** kernelParams, void** extra,
hipEvent_t startEvent = nullptr,
hipEvent_t stopEvent = nullptr);
hipEvent_t stopEvent = nullptr)
__attribute__((deprecated("use hipExtModuleLaunchKernel instead")));;

// doxygen end HCC-specific features
/**
Expand Down
2 changes: 1 addition & 1 deletion samples/0_Intro/module_api/launchKernelHcc.cpp
Expand Up @@ -88,7 +88,7 @@ int main() {
HIP_LAUNCH_PARAM_END};

HIP_CHECK(
hipHccModuleLaunchKernel(Function, LEN, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config));
hipExtModuleLaunchKernel(Function, LEN, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config, 0));

hipMemcpyDtoH(B, Bd, SIZE);

Expand Down
25 changes: 20 additions & 5 deletions src/hip_module.cpp
Expand Up @@ -131,7 +131,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
uint32_t localWorkSizeZ, size_t sharedMemBytes,
hipStream_t hStream, void** kernelParams, void** extra,
hipEvent_t startEvent, hipEvent_t stopEvent) {
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) {
auto ctx = ihipGetTlsDefaultCtx();
hipError_t ret = hipSuccess;

Expand Down Expand Up @@ -203,8 +203,11 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
aql.kernel_object = f->_object;
aql.setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
aql.header =
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
(1 << HSA_PACKET_HEADER_BARRIER); // TODO - honor queue setting for execute_in_order
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE);
if((flags & 0x1)== 0 ) {
//in_order
aql.header |= (1 << HSA_PACKET_HEADER_BARRIER);
}

if (HCC_OPT_FLUSH) {
aql.header |= (HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
Expand Down Expand Up @@ -251,9 +254,21 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gr
hStream, kernelParams, extra);
return ihipLogStatus(ihipModuleLaunchKernel(
f, blockDimX * gridDimX, blockDimY * gridDimY, gridDimZ * blockDimZ, blockDimX, blockDimY,
blockDimZ, sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr));
blockDimZ, sharedMemBytes, hStream, kernelParams, extra, nullptr, nullptr, 0));
}

hipError_t hipExtModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t localWorkSizeX, uint32_t localWorkSizeY,
uint32_t localWorkSizeZ, size_t sharedMemBytes,
hipStream_t hStream, void** kernelParams, void** extra,
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags) {
HIP_INIT_API(hipHccModuleLaunchKernel, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX,
localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra);
return ihipLogStatus(ihipModuleLaunchKernel(
f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags));
}

hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
Expand All @@ -265,7 +280,7 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
localWorkSizeY, localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra);
return ihipLogStatus(ihipModuleLaunchKernel(
f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, localWorkSizeX, localWorkSizeY,
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
localWorkSizeZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, 0));
}

namespace {
Expand Down
9 changes: 6 additions & 3 deletions src/hip_stream.cpp
Expand Up @@ -61,8 +61,11 @@ hipError_t ihipStreamCreate(hipStream_t* stream, unsigned int flags, int priorit

// TODO - se try-catch loop to detect memory exception?
//
// Note this is an execute_in_order queue, so all kernels submitted will atuomatically
// wait for prev to complete: This matches CUDA stream behavior:
// Note this is an execute_any_order queue,
// CUDA stream behavior is that all kernels submitted will automatically
// wait for prev to complete, this behaviour will be mainatined by
// hipModuleLaunchKernel. execute_any_order will help
// hipExtModuleLaunchKernel , which uses a special flag

{
// Obtain mutex access to the device critical data, release by destructor
Expand All @@ -71,7 +74,7 @@ hipError_t ihipStreamCreate(hipStream_t* stream, unsigned int flags, int priorit
#if defined(__HCC__) && (__hcc_minor__ < 3)
auto istream = new ihipStream_t(ctx, acc.create_view(), flags);
#else
auto istream = new ihipStream_t(ctx, acc.create_view(Kalmar::execute_in_order, Kalmar::queuing_mode_automatic, (Kalmar::queue_priority)priority), flags);
auto istream = new ihipStream_t(ctx, acc.create_view(Kalmar::execute_any_order, Kalmar::queuing_mode_automatic, (Kalmar::queue_priority)priority), flags);
#endif

ctxCrit->addStream(istream);
Expand Down

0 comments on commit 59081c6

Please sign in to comment.