SYCL runtime wrapper: add memcpy support. #141647

silee2 · 2025-05-27T17:56:56Z

No description provided.

llvmbot · 2025-05-27T17:57:34Z

@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-mlir-execution-engine

Author: Sang Ik Lee (silee2)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/141647.diff

2 Files Affected:

(modified) mlir/lib/ExecutionEngine/SyclRuntimeWrappers.cpp (+7-3)
(added) mlir/test/Integration/GPU/SYCL/gpu-memcpy-addf32-to-spirv.mlir (+53)

diff --git a/mlir/lib/ExecutionEngine/SyclRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/SyclRuntimeWrappers.cpp
index acb5d9de07510..d4f557cda9678 100644
--- a/mlir/lib/ExecutionEngine/SyclRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/SyclRuntimeWrappers.cpp
@@ -10,9 +10,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <CL/sycl.hpp>
 #include <level_zero/ze_api.h>
 #include <sycl/ext/oneapi/backend/level_zero.hpp>
+#include <sycl/sycl.hpp>
 
 #ifdef _WIN32
 #define SYCL_RUNTIME_EXPORT __declspec(dllexport)
@@ -81,8 +81,7 @@ static void *allocDeviceMemory(sycl::queue *queue, size_t size, bool isShared) {
     memPtr = sycl::aligned_alloc_shared(64, size, getDefaultDevice(),
                                         getDefaultContext());
   } else {
-    memPtr = sycl::aligned_alloc_device(64, size, getDefaultDevice(),
-                                        getDefaultContext());
+    memPtr = sycl::aligned_alloc_device(64, size, *queue);
   }
   if (memPtr == nullptr) {
     throw std::runtime_error("mem allocation failed!");
@@ -208,3 +207,8 @@ mgpuModuleUnload(ze_module_handle_t module) {
 
   catchAll([&]() { L0_SAFE_CALL(zeModuleDestroy(module)); });
 }
+
+extern "C" SYCL_RUNTIME_EXPORT void
+mgpuMemcpy(void *dst, void *src, size_t sizeBytes, sycl::queue *queue) {
+  catchAll([&]() { queue->memcpy(dst, src, sizeBytes).wait(); });
+}
diff --git a/mlir/test/Integration/GPU/SYCL/gpu-memcpy-addf32-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-memcpy-addf32-to-spirv.mlir
new file mode 100644
index 0000000000000..32888efe3457e
--- /dev/null
+++ b/mlir/test/Integration/GPU/SYCL/gpu-memcpy-addf32-to-spirv.mlir
@@ -0,0 +1,53 @@
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(gpu-async-region),spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \
+// RUN: | mlir-runner \
+// RUN:   --shared-libs=%mlir_sycl_runtime \
+// RUN:   --shared-libs=%mlir_runner_utils \
+// RUN:   --entry-point-result=void \
+// RUN: | FileCheck %s
+
+module @add attributes {gpu.container_module} {
+  memref.global "private" constant @__constant_2x2x2xf32_0 : memref<2x2x2xf32> = dense<[[[1.1, 2.2], [3.3, 4.4]], [[5.5, 6.6], [7.7, 8.8 ]]]>
+  memref.global "private" constant @__constant_2x2x2xf32 : memref<2x2x2xf32> = dense<[[[1.2, 2.3], [4.5, 5.8]], [[7.2, 8.3], [10.5, 11.8]]]>
+  func.func @main() {
+    %0 = memref.get_global @__constant_2x2x2xf32 : memref<2x2x2xf32>
+    %1 = memref.get_global @__constant_2x2x2xf32_0 : memref<2x2x2xf32>
+    %2 = call @test(%0, %1) : (memref<2x2x2xf32>, memref<2x2x2xf32>) -> memref<2x2x2xf32>
+    %cast = memref.cast %2 : memref<2x2x2xf32> to memref<*xf32>
+    call @printMemrefF32(%cast) : (memref<*xf32>) -> ()
+    memref.dealloc %2 : memref<2x2x2xf32>
+    return
+  }
+  func.func private @printMemrefF32(memref<*xf32>)
+  func.func @test(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>) -> memref<2x2x2xf32> {
+    %c2 = arith.constant 2 : index
+    %c1 = arith.constant 1 : index
+    %memref = gpu.alloc () : memref<2x2x2xf32>
+    gpu.memcpy %memref, %arg0 : memref<2x2x2xf32>, memref<2x2x2xf32>
+    %memref_0 = gpu.alloc () : memref<2x2x2xf32>
+    gpu.memcpy %memref_0, %arg1 : memref<2x2x2xf32>, memref<2x2x2xf32>
+    %memref_1 = gpu.alloc () : memref<2x2x2xf32>
+    gpu.launch_func @test_kernel::@test_kernel blocks in (%c2, %c2, %c2) threads in (%c1, %c1, %c1) args(%memref : memref<2x2x2xf32>, %memref_0 : memref<2x2x2xf32>, %memref_1 : memref<2x2x2xf32>)
+    %alloc = memref.alloc() : memref<2x2x2xf32>
+    gpu.memcpy %alloc, %memref_1 : memref<2x2x2xf32>, memref<2x2x2xf32>
+    gpu.dealloc %memref_1 : memref<2x2x2xf32>
+    gpu.dealloc %memref_0 : memref<2x2x2xf32>
+    gpu.dealloc %memref : memref<2x2x2xf32>
+    return %alloc : memref<2x2x2xf32>
+  }
+  gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} {
+    gpu.func @test_kernel(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>, %arg2: memref<2x2x2xf32>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 2, 2, 2>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
+      %0 = gpu.block_id  x
+      %1 = gpu.block_id  y
+      %2 = gpu.block_id  z
+      %3 = memref.load %arg0[%0, %1, %2] : memref<2x2x2xf32>
+      %4 = memref.load %arg1[%0, %1, %2] : memref<2x2x2xf32>
+      %5 = arith.addf %3, %4 : f32
+      memref.store %5, %arg2[%0, %1, %2] : memref<2x2x2xf32>
+      gpu.return
+    }
+  }
+  // CHECK: [2.3, 4.5]
+  // CHECK: [7.8, 10.2]
+  // CHECK: [12.7, 14.9]
+  // CHECK: [18.2, 20.6]
+}

llvmbot · 2025-05-27T17:57:35Z

@llvm/pr-subscribers-mlir

Author: Sang Ik Lee (silee2)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/141647.diff

2 Files Affected:

(modified) mlir/lib/ExecutionEngine/SyclRuntimeWrappers.cpp (+7-3)
(added) mlir/test/Integration/GPU/SYCL/gpu-memcpy-addf32-to-spirv.mlir (+53)

diff --git a/mlir/lib/ExecutionEngine/SyclRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/SyclRuntimeWrappers.cpp
index acb5d9de07510..d4f557cda9678 100644
--- a/mlir/lib/ExecutionEngine/SyclRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/SyclRuntimeWrappers.cpp
@@ -10,9 +10,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <CL/sycl.hpp>
 #include <level_zero/ze_api.h>
 #include <sycl/ext/oneapi/backend/level_zero.hpp>
+#include <sycl/sycl.hpp>
 
 #ifdef _WIN32
 #define SYCL_RUNTIME_EXPORT __declspec(dllexport)
@@ -81,8 +81,7 @@ static void *allocDeviceMemory(sycl::queue *queue, size_t size, bool isShared) {
     memPtr = sycl::aligned_alloc_shared(64, size, getDefaultDevice(),
                                         getDefaultContext());
   } else {
-    memPtr = sycl::aligned_alloc_device(64, size, getDefaultDevice(),
-                                        getDefaultContext());
+    memPtr = sycl::aligned_alloc_device(64, size, *queue);
   }
   if (memPtr == nullptr) {
     throw std::runtime_error("mem allocation failed!");
@@ -208,3 +207,8 @@ mgpuModuleUnload(ze_module_handle_t module) {
 
   catchAll([&]() { L0_SAFE_CALL(zeModuleDestroy(module)); });
 }
+
+extern "C" SYCL_RUNTIME_EXPORT void
+mgpuMemcpy(void *dst, void *src, size_t sizeBytes, sycl::queue *queue) {
+  catchAll([&]() { queue->memcpy(dst, src, sizeBytes).wait(); });
+}
diff --git a/mlir/test/Integration/GPU/SYCL/gpu-memcpy-addf32-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-memcpy-addf32-to-spirv.mlir
new file mode 100644
index 0000000000000..32888efe3457e
--- /dev/null
+++ b/mlir/test/Integration/GPU/SYCL/gpu-memcpy-addf32-to-spirv.mlir
@@ -0,0 +1,53 @@
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(gpu-async-region),spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \
+// RUN: | mlir-runner \
+// RUN:   --shared-libs=%mlir_sycl_runtime \
+// RUN:   --shared-libs=%mlir_runner_utils \
+// RUN:   --entry-point-result=void \
+// RUN: | FileCheck %s
+
+module @add attributes {gpu.container_module} {
+  memref.global "private" constant @__constant_2x2x2xf32_0 : memref<2x2x2xf32> = dense<[[[1.1, 2.2], [3.3, 4.4]], [[5.5, 6.6], [7.7, 8.8 ]]]>
+  memref.global "private" constant @__constant_2x2x2xf32 : memref<2x2x2xf32> = dense<[[[1.2, 2.3], [4.5, 5.8]], [[7.2, 8.3], [10.5, 11.8]]]>
+  func.func @main() {
+    %0 = memref.get_global @__constant_2x2x2xf32 : memref<2x2x2xf32>
+    %1 = memref.get_global @__constant_2x2x2xf32_0 : memref<2x2x2xf32>
+    %2 = call @test(%0, %1) : (memref<2x2x2xf32>, memref<2x2x2xf32>) -> memref<2x2x2xf32>
+    %cast = memref.cast %2 : memref<2x2x2xf32> to memref<*xf32>
+    call @printMemrefF32(%cast) : (memref<*xf32>) -> ()
+    memref.dealloc %2 : memref<2x2x2xf32>
+    return
+  }
+  func.func private @printMemrefF32(memref<*xf32>)
+  func.func @test(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>) -> memref<2x2x2xf32> {
+    %c2 = arith.constant 2 : index
+    %c1 = arith.constant 1 : index
+    %memref = gpu.alloc () : memref<2x2x2xf32>
+    gpu.memcpy %memref, %arg0 : memref<2x2x2xf32>, memref<2x2x2xf32>
+    %memref_0 = gpu.alloc () : memref<2x2x2xf32>
+    gpu.memcpy %memref_0, %arg1 : memref<2x2x2xf32>, memref<2x2x2xf32>
+    %memref_1 = gpu.alloc () : memref<2x2x2xf32>
+    gpu.launch_func @test_kernel::@test_kernel blocks in (%c2, %c2, %c2) threads in (%c1, %c1, %c1) args(%memref : memref<2x2x2xf32>, %memref_0 : memref<2x2x2xf32>, %memref_1 : memref<2x2x2xf32>)
+    %alloc = memref.alloc() : memref<2x2x2xf32>
+    gpu.memcpy %alloc, %memref_1 : memref<2x2x2xf32>, memref<2x2x2xf32>
+    gpu.dealloc %memref_1 : memref<2x2x2xf32>
+    gpu.dealloc %memref_0 : memref<2x2x2xf32>
+    gpu.dealloc %memref : memref<2x2x2xf32>
+    return %alloc : memref<2x2x2xf32>
+  }
+  gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} {
+    gpu.func @test_kernel(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>, %arg2: memref<2x2x2xf32>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 2, 2, 2>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
+      %0 = gpu.block_id  x
+      %1 = gpu.block_id  y
+      %2 = gpu.block_id  z
+      %3 = memref.load %arg0[%0, %1, %2] : memref<2x2x2xf32>
+      %4 = memref.load %arg1[%0, %1, %2] : memref<2x2x2xf32>
+      %5 = arith.addf %3, %4 : f32
+      memref.store %5, %arg2[%0, %1, %2] : memref<2x2x2xf32>
+      gpu.return
+    }
+  }
+  // CHECK: [2.3, 4.5]
+  // CHECK: [7.8, 10.2]
+  // CHECK: [12.7, 14.9]
+  // CHECK: [18.2, 20.6]
+}

charithaintc

LGTM.

chencha3

LGTM

charithaintc · 2025-05-28T18:30:39Z

maybe change title to [mlir][executionengine] ... ?

SYCL runtime wrapper: add memcpy support.

cebbad9

llvmbot added mlir:gpu mlir mlir:execution-engine labels May 27, 2025

charithaintc requested review from charithaintc and chencha3 May 28, 2025 18:24

charithaintc approved these changes May 28, 2025

View reviewed changes

chencha3 approved these changes May 28, 2025

View reviewed changes

Merge branch 'main' into syclMemcpy

2530407

charithaintc merged commit 3fa65de into llvm:main May 28, 2025
6 of 9 checks passed

sivan-shani pushed a commit to sivan-shani/llvm-project that referenced this pull request Jun 3, 2025

[mlir] SYCL runtime wrapper: add memcpy support. (llvm#141647)

5123c05

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

SYCL runtime wrapper: add memcpy support. #141647

SYCL runtime wrapper: add memcpy support. #141647

Uh oh!

silee2 commented May 27, 2025

Uh oh!

llvmbot commented May 27, 2025 •

edited

Loading

Uh oh!

llvmbot commented May 27, 2025

Uh oh!

charithaintc left a comment

Uh oh!

chencha3 left a comment

Uh oh!

charithaintc commented May 28, 2025

Uh oh!

Uh oh!

Uh oh!

SYCL runtime wrapper: add memcpy support. #141647

SYCL runtime wrapper: add memcpy support. #141647

Uh oh!

Conversation

silee2 commented May 27, 2025

Uh oh!

llvmbot commented May 27, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 27, 2025

Uh oh!

charithaintc left a comment

Choose a reason for hiding this comment

Uh oh!

chencha3 left a comment

Choose a reason for hiding this comment

Uh oh!

charithaintc commented May 28, 2025

Uh oh!

Uh oh!

Uh oh!

llvmbot commented May 27, 2025 •

edited

Loading