Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Add work_group_num_dim metadata #13600

Merged
merged 4 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
20 changes: 18 additions & 2 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,10 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
AttrMDArgs.push_back(
llvm::ConstantAsMetadata::get(Builder.getInt(*XDimVal)));

for (auto i = AttrMDArgs.size(); i < 3; ++i)
AttrMDArgs.push_back(
llvm::ConstantAsMetadata::get(Builder.getInt(llvm::APInt(32, 1))));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I always get confused with this: above we add, 'z', then 'y' and then 'x'. But if 'z' or 'y' were missing, when you fill '1' you are adding the 1s after 'x'. Does this lead to the right behavior?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's any consolation, I have to refresh my understanding of this every thing it pops up. The dimension-flipping happens based on the dimensionality, so:
3D: x -> z, y -> y and z -> x, e.g. {32, 16, 8} -> {8, 16, 32}. No padding needed.
2D: x -> y and y -> x, e.g. {32, 16} -> {16, 32}. Padding is added at the end, i.e. {16, 32, 1}.
1D: x -> x, e.g. {32} -> {32}. Padding is added at the end, i.e. {32, 1, 1}.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeap, it's exactly how @steffenlarsen explains it.

The way to think about it is that SYCL allows for non-3-dims wg sizes, however we are using openCL's mechanism to pass it to the backend. So, the flipping has to happen first in order to preserve the SYCL's semantics, only after that we pad, to align with openCL.


Fn->setMetadata("work_group_size_hint",
llvm::MDNode::get(Context, AttrMDArgs));
}
Expand All @@ -690,16 +694,28 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
std::optional<llvm::APSInt> ZDimVal = A->getZDimVal();
llvm::SmallVector<llvm::Metadata *, 3> AttrMDArgs;

llvm::APInt NumDims(32, 1); // X
// On SYCL target the dimensions are reversed if present.
if (ZDimVal)
if (ZDimVal) {
AttrMDArgs.push_back(
llvm::ConstantAsMetadata::get(Builder.getInt(*ZDimVal)));
if (YDimVal)
++NumDims;
}
if (YDimVal) {
AttrMDArgs.push_back(
llvm::ConstantAsMetadata::get(Builder.getInt(*YDimVal)));
++NumDims;
}
AttrMDArgs.push_back(
llvm::ConstantAsMetadata::get(Builder.getInt(*XDimVal)));

for (auto i = NumDims.getZExtValue(); i < 3; ++i)
jchlanda marked this conversation as resolved.
Show resolved Hide resolved
AttrMDArgs.push_back(
llvm::ConstantAsMetadata::get(Builder.getInt(llvm::APInt(32, 1))));

Fn->setMetadata("work_group_num_dim",
llvm::MDNode::get(Context, llvm::ConstantAsMetadata::get(
Builder.getInt(NumDims))));
Fn->setMetadata("reqd_work_group_size",
llvm::MDNode::get(Context, AttrMDArgs));
}
Expand Down
17 changes: 12 additions & 5 deletions clang/test/CodeGenSYCL/check-work-group-attributes-match.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s

// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple amdgcn-amd-amdhsa -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s

// Tests that work_group_size_hint and reqd_work_group_size generate the same
// metadata nodes for the same arguments.

Expand All @@ -11,21 +15,24 @@ int main() {
queue q;

q.submit([&](handler &h) {
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_1d() #0 {{.*}} !work_group_size_hint ![[WG1D:[0-9]+]]{{.*}} !reqd_work_group_size ![[WG1D]]
// CHECK: define {{.*}} void @{{.*}}kernel_1d() #0 {{.*}} !work_group_size_hint ![[WGSH1D:[0-9]+]]{{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]]{{.*}} !reqd_work_group_size ![[WGSH1D]]
h.single_task<class kernel_1d>([]() [[sycl::work_group_size_hint(8)]] [[sycl::reqd_work_group_size(8)]] {});
});

q.submit([&](handler &h) {
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_2d() #0 {{.*}} !work_group_size_hint ![[WG2D:[0-9]+]]{{.*}} !reqd_work_group_size ![[WG2D]]
// CHECK: define {{.*}} void @{{.*}}kernel_2d() #0 {{.*}} !work_group_size_hint ![[WGSH2D:[0-9]+]]{{.*}} !work_group_num_dim ![[NDRWGS2D:[0-9]+]]{{.*}} !reqd_work_group_size ![[WGSH2D:[0-9]+]]{{.*}}
h.single_task<class kernel_2d>([]() [[sycl::work_group_size_hint(8, 16)]] [[sycl::reqd_work_group_size(8, 16)]] {});
});

q.submit([&](handler &h) {
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_3d() #0 {{.*}} !work_group_size_hint ![[WG3D:[0-9]+]]{{.*}} !reqd_work_group_size ![[WG3D]]
// CHECK: define {{.*}} void @{{.*}}kernel_3d() #0 {{.*}} !work_group_size_hint ![[WG3D:[0-9]+]]{{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]]{{.*}} !reqd_work_group_size ![[WG3D]]
h.single_task<class kernel_3d>([]() [[sycl::work_group_size_hint(8, 16, 32)]] [[sycl::reqd_work_group_size(8, 16, 32)]] {});
});
}

// CHECK: ![[WG1D]] = !{i32 8}
// CHECK: ![[WG2D]] = !{i32 16, i32 8}
// CHECK: ![[WGSH1D]] = !{i32 8, i32 1, i32 1}
// CHECK: ![[NDRWGS1D]] = !{i32 1}
// CHECK: ![[WGSH2D]] = !{i32 16, i32 8, i32 1}
// CHECK: ![[NDRWGS2D]] = !{i32 2}
// CHECK: ![[WG3D]] = !{i32 32, i32 16, i32 8}
// CHECK: ![[NDRWGS3D]] = !{i32 3}
74 changes: 40 additions & 34 deletions clang/test/CodeGenSYCL/reqd-work-group-size.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2017 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple amdgcn-amd-amdhsa -disable-llvm-passes -sycl-std=2017 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -disable-llvm-passes -sycl-std=2017 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -disable-llvm-passes -sycl-std=2017 -emit-llvm -o - %s | FileCheck %s

#include "sycl.hpp"

Expand Down Expand Up @@ -163,43 +166,46 @@ int main() {
return 0;
}

// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name1() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D32:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name2() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D8:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name3() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D88:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name4() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D22:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name5() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D44:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name6() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D2:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name7() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D32]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name8() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D8]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name9() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D88]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name10() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D22]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name11() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D44]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name12() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D2]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name13() #0 {{.*}} !reqd_work_group_size ![[WGSIZE2D32:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name14() #0 {{.*}} !reqd_work_group_size ![[WGSIZE2D8:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name15() #0 {{.*}} !reqd_work_group_size ![[WGSIZE2D88:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name16() #0 {{.*}} !reqd_work_group_size ![[WGSIZE2D22:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name17() #0 {{.*}} !reqd_work_group_size ![[WGSIZE2D44:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name18() #0 {{.*}} !reqd_work_group_size ![[WGSIZE2D2:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name19() #0 {{.*}} !reqd_work_group_size ![[WGSIZE1D32:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name20() #0 {{.*}} !reqd_work_group_size ![[WGSIZE1D8:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name21() #0 {{.*}} !reqd_work_group_size ![[WGSIZE1D8]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name22() #0 {{.*}} !reqd_work_group_size ![[WGSIZE1D22:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name23() #0 {{.*}} !reqd_work_group_size ![[WGSIZE1D8]]
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name24() #0 {{.*}} !reqd_work_group_size ![[WGSIZE1D2:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name1() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D32:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name2() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D8:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name3() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D88:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name4() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D22:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name5() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D44:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name6() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D2:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name7() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D32]]
// CHECK: define {{.*}} void @{{.*}}kernel_name8() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D8]]
// CHECK: define {{.*}} void @{{.*}}kernel_name9() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D88]]
// CHECK: define {{.*}} void @{{.*}}kernel_name10() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D22]]
// CHECK: define {{.*}} void @{{.*}}kernel_name11() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D44]]
// CHECK: define {{.*}} void @{{.*}}kernel_name12() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D2]]
// CHECK: define {{.*}} void @{{.*}}kernel_name13() #0 {{.*}} !work_group_num_dim ![[NDRWGS2D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D32:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name14() #0 {{.*}} !work_group_num_dim ![[NDRWGS2D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D8:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name15() #0 {{.*}} !work_group_num_dim ![[NDRWGS2D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D88:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name16() #0 {{.*}} !work_group_num_dim ![[NDRWGS2D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D22:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name17() #0 {{.*}} !work_group_num_dim ![[NDRWGS2D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D44:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name18() #0 {{.*}} !work_group_num_dim ![[NDRWGS2D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D2_or_WGSIZE1D8:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name19() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE1D32:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name20() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D2_or_WGSIZE1D8]]
// CHECK: define {{.*}} void @{{.*}}kernel_name21() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D2_or_WGSIZE1D8]]
// CHECK: define {{.*}} void @{{.*}}kernel_name22() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE1D22:[0-9]+]]
// CHECK: define {{.*}} void @{{.*}}kernel_name23() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D2_or_WGSIZE1D8]]
// CHECK: define {{.*}} void @{{.*}}kernel_name24() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE1D2:[0-9]+]]

// CHECK: ![[NDRWGS3D]] = !{i32 3}
// CHECK: ![[WGSIZE3D32]] = !{i32 16, i32 16, i32 32}
// CHECK: ![[WGSIZE3D8]] = !{i32 1, i32 1, i32 8}
// CHECK: ![[WGSIZE3D88]] = !{i32 8, i32 8, i32 8}
// CHECK: ![[WGSIZE3D22]] = !{i32 2, i32 2, i32 2}
// CHECK: ![[WGSIZE3D44]] = !{i32 4, i32 4, i32 8}
// CHECK: ![[WGSIZE3D2]] = !{i32 2, i32 8, i32 1}
// CHECK: ![[WGSIZE2D32]] = !{i32 16, i32 32}
// CHECK: ![[WGSIZE2D8]] = !{i32 1, i32 8}
// CHECK: ![[WGSIZE2D88]] = !{i32 8, i32 8}
// CHECK: ![[WGSIZE2D22]] = !{i32 2, i32 2}
// CHECK: ![[WGSIZE2D44]] = !{i32 4, i32 8}
// CHECK: ![[WGSIZE2D2]] = !{i32 8, i32 1}
// CHECK: ![[WGSIZE1D32]] = !{i32 32}
// CHECK: ![[WGSIZE1D8]] = !{i32 8}
// CHECK: ![[WGSIZE1D22]] = !{i32 2}
// CHECK: ![[WGSIZE1D2]] = !{i32 1}
// CHECK: ![[NDRWGS2D]] = !{i32 2}
// CHECK: ![[WGSIZE2D32]] = !{i32 16, i32 32, i32 1}
// CHECK: ![[WGSIZE2D8]] = !{i32 1, i32 8, i32 1}
// CHECK: ![[WGSIZE2D88]] = !{i32 8, i32 8, i32 1}
// CHECK: ![[WGSIZE2D22]] = !{i32 2, i32 2, i32 1}
// CHECK: ![[WGSIZE2D44]] = !{i32 4, i32 8, i32 1}
// CHECK: ![[WGSIZE2D2_or_WGSIZE1D8]] = !{i32 8, i32 1, i32 1}
// CHECK: ![[NDRWGS1D]] = !{i32 1}
// CHECK: ![[WGSIZE1D32]] = !{i32 32, i32 1, i32 1}
// CHECK: ![[WGSIZE1D22]] = !{i32 2, i32 1, i32 1}
// CHECK: ![[WGSIZE1D2]] = !{i32 1, i32 1, i32 1}
1 change: 1 addition & 0 deletions llvm/include/llvm/SYCLLowerIR/SYCLDeviceRequirements.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ struct SYCLDeviceRequirements {
std::set<uint32_t> Aspects;
std::set<uint32_t> FixedTarget;
std::optional<llvm::SmallVector<uint64_t, 3>> ReqdWorkGroupSize;
std::optional<uint32_t> WorkGroupNumDim;
std::optional<llvm::SmallString<256>> JointMatrix;
std::optional<llvm::SmallString<256>> JointMatrixMad;
std::optional<uint32_t> SubGroupSize;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/SYCLLowerIR/ModuleSplitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,7 @@ getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly,
Categorizer.registerSimpleStringAttributeRule("sycl-grf-size");
Categorizer.registerListOfIntegersInMetadataSortedRule("sycl_used_aspects");
Categorizer.registerListOfIntegersInMetadataRule("reqd_work_group_size");
Categorizer.registerListOfIntegersInMetadataRule("work_group_num_dim");
Categorizer.registerListOfIntegersInMetadataRule(
"intel_reqd_sub_group_size");
Categorizer.registerSimpleStringAttributeRule(
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) {
}
}

if (auto *MDN = F.getMetadata("work_group_num_dim")) {
uint32_t WGND = ExtractUnsignedIntegerFromMDNodeOperand(MDN, 0);
if (!Reqs.ReqdWorkGroupSize.has_value())
Reqs.WorkGroupNumDim = WGND;
}

if (auto *MDN = F.getMetadata("reqd_work_group_size")) {
llvm::SmallVector<uint64_t, 3> NewReqdWorkGroupSize;
for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I)
Expand Down Expand Up @@ -133,5 +139,8 @@ std::map<StringRef, util::PropertyValue> SYCLDeviceRequirements::asMap() const {
if (SubGroupSize.has_value())
Requirements["reqd_sub_group_size"] = *SubGroupSize;

if (WorkGroupNumDim.has_value())
Requirements["work_group_num_dim"] = *WorkGroupNumDim;

return Requirements;
}
30 changes: 24 additions & 6 deletions llvm/tools/sycl-post-link/sycl-post-link.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,16 @@ bool isModuleUsingAsan(const Module &M) {
return MDVal->getString() == "asan";
}

// Gets work_group_num_dim information for function Func, conviniently 0 if
// metadata is not present.
uint32_t getKernelWorkGroupNumDim(const Function &Func) {
MDNode *MaxDimMD = Func.getMetadata("work_group_num_dim");
if (!MaxDimMD)
return 0;
assert(MaxDimMD->getNumOperands() == 1 && "Malformed node.");
return mdconst::extract<ConstantInt>(MaxDimMD->getOperand(0))->getZExtValue();
}

// Gets reqd_work_group_size information for function Func.
std::vector<uint32_t> getKernelReqdWorkGroupSizeMetadata(const Function &Func) {
MDNode *ReqdWorkGroupSizeMD = Func.getMetadata("reqd_work_group_size");
Expand Down Expand Up @@ -473,15 +483,23 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD,
SmallVector<std::string, 4> MetadataNames;

if (GlobProps.EmitProgramMetadata) {
// Add reqd_work_group_size information to program metadata
// Add reqd_work_group_size and work_group_num_dim information to
// program metadata.
for (const Function &Func : M.functions()) {
std::vector<uint32_t> KernelReqdWorkGroupSize =
getKernelReqdWorkGroupSizeMetadata(Func);
if (KernelReqdWorkGroupSize.empty())
continue;
MetadataNames.push_back(Func.getName().str() + "@reqd_work_group_size");
PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(),
KernelReqdWorkGroupSize);
if (!KernelReqdWorkGroupSize.empty()) {
MetadataNames.push_back(Func.getName().str() + "@reqd_work_group_size");
PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(),
KernelReqdWorkGroupSize);
}

uint32_t WorkGroupNumDim = getKernelWorkGroupNumDim(Func);
if (WorkGroupNumDim) {
MetadataNames.push_back(Func.getName().str() + "@work_group_num_dim");
PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(),
WorkGroupNumDim);
}
}

// Add global_id_mapping information with mapping between device-global
Expand Down
12 changes: 6 additions & 6 deletions sycl/plugins/unified_runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,13 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit 056d653264034e546d8b8f493e1d9f65c697829b
# Merge: b7c89302 bbb04b65
# commit 8cdd099ae3d1a34d3bcd7cbed7f5745c3dc8e112
# Merge: fc9bb61b c893a3c4
# Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
# Date: Fri May 17 15:11:12 2024 +0100
# Merge pull request #1512 from DBDuncan/duncan/fix_pi_mem_leak
# [Bindless][Exp] Remove phMem argument from bindless image creation functions
set(UNIFIED_RUNTIME_TAG 056d653264034e546d8b8f493e1d9f65c697829b)
# Date: Mon May 20 15:50:02 2024 +0100
# Merge pull request #954 from jchlanda/jakub/rqwgs_hip
# [HIP] Handle required wg size attribute in HIP
set(UNIFIED_RUNTIME_TAG 8cdd099ae3d1a34d3bcd7cbed7f5745c3dc8e112)

fetch_adapter_source(level_zero
${UNIFIED_RUNTIME_REPO}
Expand Down
22 changes: 19 additions & 3 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2681,8 +2681,7 @@ checkDevSupportDeviceRequirements(const device &Dev,
const RTDeviceBinaryImage &Img,
const NDRDescT &NDRDesc) {
auto getPropIt = [&Img](const std::string &PropName) {
const RTDeviceBinaryImage::PropertyRange &PropRange =
Img.getDeviceRequirements();
auto &PropRange = Img.getDeviceRequirements();
RTDeviceBinaryImage::PropertyRange::ConstIterator PropIt = std::find_if(
PropRange.begin(), PropRange.end(),
[&PropName](RTDeviceBinaryImage::PropertyRange::ConstIterator &&Prop) {
Expand All @@ -2700,6 +2699,7 @@ checkDevSupportDeviceRequirements(const device &Dev,
auto ReqdWGSizeUint32TPropIt = getPropIt("reqd_work_group_size");
auto ReqdWGSizeUint64TPropIt = getPropIt("reqd_work_group_size_uint64_t");
auto ReqdSubGroupSizePropIt = getPropIt("reqd_sub_group_size");
auto WorkGroupNumDim = getPropIt("work_group_num_dim");

// Checking if device supports defined aspects
if (AspectsPropIt) {
Expand Down Expand Up @@ -2796,7 +2796,23 @@ checkDevSupportDeviceRequirements(const device &Dev,
Dims++;
}

if (NDRDesc.Dims != 0 && NDRDesc.Dims != static_cast<size_t>(Dims))
size_t UserProvidedNumDims = 0;
if (WorkGroupNumDim) {
// We know the dimensions have been padded to 3, make sure that the pad
// value is always set to 1 and record the number of dimensions specified
// by the user.
UserProvidedNumDims =
DeviceBinaryProperty(*(WorkGroupNumDim.value())).asUint32();
#ifndef NDEBUG
for (unsigned i = UserProvidedNumDims; i < 3; ++i)
assert(ReqdWGSizeVec[i] == 1 &&
"Incorrect padding in required work-group size metadata.");
#endif // NDEBUG
} else {
UserProvidedNumDims = Dims;
}

if (NDRDesc.Dims != 0 && NDRDesc.Dims != UserProvidedNumDims)
return sycl::exception(
sycl::errc::nd_range,
"The local size dimension of submitted nd_range doesn't match the "
Expand Down
3 changes: 0 additions & 3 deletions sycl/test-e2e/Basic/reqd_work_group_size.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
//
// Failing negative test with HIP
// UNSUPPORTED: hip

#include <sycl/detail/core.hpp>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out
// RUN: %{run} %t.out

// UNSUPPORTED: hip

#include <sycl/detail/core.hpp>

#define CHECK_INVALID_REQD_WORK_GROUP_SIZE(Dim, ...) \
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

// https://github.com/intel/llvm/issues/9353
// UNSUPPORTED: hip

#include "sycl/sycl.hpp"

using namespace sycl;
Expand Down