diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 3c7631d92bbcb..36816eed2d295 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -506,6 +506,9 @@ typedef enum { // Timestamp enqueue PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT = 0x2011D, + + // Return whether cluster launch is supported by device + PI_EXT_ONEAPI_DEVICE_INFO_CLUSTER_LAUNCH = 0x2021, } _pi_device_info; typedef enum { diff --git a/sycl/include/sycl/device_aspect_macros.hpp b/sycl/include/sycl/device_aspect_macros.hpp index df6c827de60f2..824e413c65825 100644 --- a/sycl/include/sycl/device_aspect_macros.hpp +++ b/sycl/include/sycl/device_aspect_macros.hpp @@ -381,6 +381,10 @@ #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_queue_profiling_tag__ 0 #endif +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_cuda_cluster_group__ +// __SYCL_ASPECT(ext_oneapi_cuda_cluster_group, 74) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_cuda_cluster_group__ 0 + #ifndef __SYCL_ANY_DEVICE_HAS_host__ // __SYCL_ASPECT(host, 0) #define __SYCL_ANY_DEVICE_HAS_host__ 0 @@ -750,3 +754,10 @@ // __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_queue_profiling_tag__ 0 #endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_cuda_cluster_group__ +// __SYCL_ASPECT(ext_oneapi_cuda_cluster_group, 74) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_cuda_cluster_group__ 0 +#endif + +#endif diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 2d9cee1351d7a..f91ca1c3e1c36 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -68,3 +68,4 @@ __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 70) __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 71) __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 72) __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73) +__SYCL_ASPECT(ext_oneapi_cuda_cluster_group, 74) diff --git a/sycl/include/sycl/info/device_traits.def b/sycl/include/sycl/info/device_traits.def index dc50c5e920502..f4235778b074a 100644 --- a/sycl/include/sycl/info/device_traits.def +++ b/sycl/include/sycl/info/device_traits.def @@ -233,6 +233,8 @@ __SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_max_work_groups_3d, id<3>, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D) __SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_max_global_work_groups, size_t, PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS) +__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_cuda_cluster_group, bool, + PI_EXT_ONEAPI_DEVICE_INFO_CLUSTER_LAUNCH) #ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index b1ef3f71ae170..682c384a3ef88 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -99,14 +99,15 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + # purely for testing and will be updated once below is merged. + set(UNIFIED_RUNTIME_REPO "https://github.com/JackAKirk/unified-runtime.git") # commit 1e9b1b493fe30e6236bf611ae6d82366c9376f6c # Merge: a011f092 d8500a36 # Author: Kenneth Benzie (Benie) # Date: Fri Jun 21 10:22:52 2024 +0100 # Merge pull request #805 from aarongreig/aaron/kernelSetArgIndirectionFix # Correct level of indirection used in KernelSetArgPointer calls. - set(UNIFIED_RUNTIME_TAG 1e9b1b493fe30e6236bf611ae6d82366c9376f6c) + set(UNIFIED_RUNTIME_TAG 41d94022c2bb29d0cdf549573466c372a87e3a6e) fetch_adapter_source(level_zero ${UNIFIED_RUNTIME_REPO} diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index ddcae71eab8f9..615f7c44c4e08 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -1311,6 +1311,8 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_CLUSTER_LAUNCH, + UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP) #undef PI_TO_UR_MAP_DEVICE_INFO default: return PI_ERROR_UNKNOWN; diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 6d2a8d08736f7..9081c707a3938 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -394,6 +394,8 @@ bool device_impl::has(aspect Aspect) const { return get_info(); case aspect::ext_intel_mem_channel: return get_info(); + case aspect::ext_oneapi_cuda_cluster_group: + return get_info(); case aspect::usm_atomic_host_allocations: return is_host() || (get_device_info_impl +struct get_device_info_impl { + static bool get(const DeviceImplPtr &Dev) { + bool result = false; + if (Dev->getBackend() == backend::ext_oneapi_cuda) + { + sycl::detail::pi::PiResult Err = + Dev->getPlugin()->call_nocheck( + Dev->getHandleRef(), + PiInfoCode::value, + sizeof(result), &result, nullptr); + if (Err != PI_SUCCESS) { + return false; + } + } + return result; + } +}; + // Specialization for exec_capabilities, OpenCL returns a bitfield template <> struct get_device_info_impl, @@ -1552,6 +1573,12 @@ get_device_info_host() { return false; } +template <> +inline bool +get_device_info_host() { + return false; +} + template <> inline uint32_t get_device_info_host() { // current value is the required minimum diff --git a/sycl/test-e2e/ClusterLaunch/cluster_launch_enqueue_functions.cpp b/sycl/test-e2e/ClusterLaunch/cluster_launch_enqueue_functions.cpp index 2c27cfd18c424..63eaa20fc2597 100644 --- a/sycl/test-e2e/ClusterLaunch/cluster_launch_enqueue_functions.cpp +++ b/sycl/test-e2e/ClusterLaunch/cluster_launch_enqueue_functions.cpp @@ -68,10 +68,8 @@ int test_cluster_launch_enqueue_functions(sycl::queue &queue, int main() { sycl::queue queue; - auto computeCapability = std::stof( - queue.get_device().get_info()); - if (computeCapability < 9.0) { + if (!queue.get_device().has(sycl::aspect::ext_oneapi_cuda_cluster_group)) { printf("Cluster group not supported on this arch, exiting...\n"); return 0; } diff --git a/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp b/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp index 6438767036141..a4ae88141079a 100644 --- a/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp +++ b/sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp @@ -68,10 +68,8 @@ int test_cluster_launch_parallel_for(sycl::queue &queue, int main() { sycl::queue queue; - auto computeCapability = std::stof( - queue.get_device().get_info()); - if (computeCapability < 9.0) { + if (!queue.get_device().has(sycl::aspect::ext_oneapi_cuda_cluster_group)) { printf("Cluster group not supported on this arch, exiting...\n"); return 0; } diff --git a/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp b/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp index 2a63c1dbd7887..19e5928bdde81 100644 --- a/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp +++ b/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp @@ -29,9 +29,7 @@ int main() { std::vector a(4096, -20); sycl::queue queue; - auto computeCapability = std::stof( - queue.get_device().get_info()); - if (computeCapability < 9.0) { + if (!queue.get_device().has(sycl::aspect::ext_oneapi_cuda_cluster_group)) { printf("Cluster group not supported on this arch, exiting...\n"); return 0; }