Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make helper invocations take part in subgroupQuad operations #1798

Merged
merged 1 commit into from May 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion include/vkgcDefs.h
Expand Up @@ -453,7 +453,6 @@ struct ShaderModuleUsage {
bool enableVarPtrStorageBuf; ///< Whether to enable "VariablePointerStorageBuffer" capability
bool enableVarPtr; ///< Whether to enable "VariablePointer" capability
bool useSubgroupSize; ///< Whether gl_SubgroupSize is used
bool useHelpInvocation; ///< Whether fragment shader has helper-invocation for subgroup
bool useSpecConstant; ///< Whether specialization constant is used
bool keepUnusedFunctions; ///< Whether to keep unused function
bool useIsNan; ///< Whether IsNan is used
Expand Down
7 changes: 4 additions & 3 deletions lgc/builder/BuilderImpl.h
Expand Up @@ -727,13 +727,13 @@ class SubgroupBuilder : virtual public BuilderImplBase {
llvm::Value *CreateSubgroupElect(const llvm::Twine &instName) override final;

// Create a subgroup all.
llvm::Value *CreateSubgroupAll(llvm::Value *const value, bool wqm, const llvm::Twine &instName) override final;
llvm::Value *CreateSubgroupAll(llvm::Value *const value, const llvm::Twine &instName) override final;

// Create a subgroup any
llvm::Value *CreateSubgroupAny(llvm::Value *const value, bool wqm, const llvm::Twine &instName) override final;
llvm::Value *CreateSubgroupAny(llvm::Value *const value, const llvm::Twine &instName) override final;

// Create a subgroup all equal.
llvm::Value *CreateSubgroupAllEqual(llvm::Value *const value, bool wqm, const llvm::Twine &instName) override final;
llvm::Value *CreateSubgroupAllEqual(llvm::Value *const value, const llvm::Twine &instName) override final;

// Create a subgroup broadcast.
llvm::Value *CreateSubgroupBroadcast(llvm::Value *const value, llvm::Value *const index,
Expand Down Expand Up @@ -854,6 +854,7 @@ class SubgroupBuilder : virtual public BuilderImplBase {

llvm::Value *createDsSwizzle(llvm::Value *const value, uint16_t dsPattern);
llvm::Value *createWwm(llvm::Value *const value);
llvm::Value *createWqm(llvm::Value *const value);
llvm::Value *createThreadMask();
llvm::Value *createThreadMaskedSelect(llvm::Value *const threadMask, uint64_t andMask, llvm::Value *const value1,
llvm::Value *const value2);
Expand Down
15 changes: 6 additions & 9 deletions lgc/builder/BuilderRecorder.cpp
Expand Up @@ -1642,30 +1642,27 @@ Value *BuilderRecorder::CreateSubgroupElect(const Twine &instName) {
// Create a subgroup all.
//
// @param value : The value to compare
// @param wqm : Executed in WQM (whole quad mode)
// @param instName : Name to give instruction(s)
Value *BuilderRecorder::CreateSubgroupAll(Value *const value, bool wqm, const Twine &instName) {
return record(Opcode::SubgroupAll, getInt1Ty(), {value, getInt1(wqm)}, instName);
Value *BuilderRecorder::CreateSubgroupAll(Value *const value, const Twine &instName) {
return record(Opcode::SubgroupAll, getInt1Ty(), {value}, instName);
}

// =====================================================================================================================
// Create a subgroup any
//
// @param value : The value to compare
// @param wqm : Executed in WQM (whole quad mode)
// @param instName : Name to give instruction(s)
Value *BuilderRecorder::CreateSubgroupAny(Value *const value, bool wqm, const Twine &instName) {
return record(Opcode::SubgroupAny, getInt1Ty(), {value, getInt1(wqm)}, instName);
Value *BuilderRecorder::CreateSubgroupAny(Value *const value, const Twine &instName) {
return record(Opcode::SubgroupAny, getInt1Ty(), {value}, instName);
}

// =====================================================================================================================
// Create a subgroup all equal.
//
// @param value : The value to compare
// @param wqm : Executed in WQM (whole quad mode)
// @param instName : Name to give instruction(s)
Value *BuilderRecorder::CreateSubgroupAllEqual(Value *const value, bool wqm, const Twine &instName) {
return record(Opcode::SubgroupAllEqual, getInt1Ty(), {value, getInt1(wqm)}, instName);
Value *BuilderRecorder::CreateSubgroupAllEqual(Value *const value, const Twine &instName) {
return record(Opcode::SubgroupAllEqual, getInt1Ty(), {value}, instName);
}

// =====================================================================================================================
Expand Down
6 changes: 3 additions & 3 deletions lgc/builder/BuilderReplayer.cpp
Expand Up @@ -790,13 +790,13 @@ Value *BuilderReplayer::processCall(unsigned opcode, CallInst *call) {
return m_builder->CreateSubgroupElect();
}
case BuilderRecorder::Opcode::SubgroupAll: {
return m_builder->CreateSubgroupAll(args[0], cast<ConstantInt>(args[1])->getZExtValue() != 0);
return m_builder->CreateSubgroupAll(args[0]);
}
case BuilderRecorder::Opcode::SubgroupAny: {
return m_builder->CreateSubgroupAny(args[0], cast<ConstantInt>(args[1])->getZExtValue() != 0);
return m_builder->CreateSubgroupAny(args[0]);
}
case BuilderRecorder::Opcode::SubgroupAllEqual: {
return m_builder->CreateSubgroupAllEqual(args[0], cast<ConstantInt>(args[1])->getZExtValue() != 0);
return m_builder->CreateSubgroupAllEqual(args[0]);
}
case BuilderRecorder::Opcode::SubgroupBroadcast: {
return m_builder->CreateSubgroupBroadcast(args[0], args[1]);
Expand Down
53 changes: 33 additions & 20 deletions lgc/builder/SubgroupBuilder.cpp
Expand Up @@ -83,14 +83,13 @@ Value *SubgroupBuilder::CreateSubgroupElect(const Twine &instName) {
// Create a subgroup all call.
//
// @param value : The value to compare across the subgroup. Must be an integer type.
// @param wqm : Executed in WQM (whole quad mode)
// @param instName : Name to give final instruction.
Value *SubgroupBuilder::CreateSubgroupAll(Value *const value, bool wqm, const Twine &instName) {
Value *SubgroupBuilder::CreateSubgroupAll(Value *const value, const Twine &instName) {
Value *result = CreateICmpEQ(createGroupBallot(value), createGroupBallot(getTrue()));
result = CreateSelect(CreateUnaryIntrinsic(Intrinsic::is_constant, value), value, result);

// Helper invocations of whole quad mode should be included in the subgroup vote execution
if (wqm) {
if (m_shaderStage == ShaderStageFragment) {
result = CreateZExt(result, getInt32Ty());
result = CreateIntrinsic(Intrinsic::amdgcn_softwqm, {getInt32Ty()}, {result});
result = CreateTrunc(result, getInt1Ty());
Expand All @@ -102,14 +101,13 @@ Value *SubgroupBuilder::CreateSubgroupAll(Value *const value, bool wqm, const Tw
// Create a subgroup any call.
//
// @param value : The value to compare across the subgroup. Must be an integer type.
// @param wqm : Executed in WQM (whole quad mode)
// @param instName : Name to give final instruction.
Value *SubgroupBuilder::CreateSubgroupAny(Value *const value, bool wqm, const Twine &instName) {
Value *SubgroupBuilder::CreateSubgroupAny(Value *const value, const Twine &instName) {
Value *result = CreateICmpNE(createGroupBallot(value), getInt64(0));
result = CreateSelect(CreateUnaryIntrinsic(Intrinsic::is_constant, value), value, result);

// Helper invocations of whole quad mode should be included in the subgroup vote execution
if (wqm) {
if (m_shaderStage == ShaderStageFragment) {
result = CreateZExt(result, getInt32Ty());
result = CreateIntrinsic(Intrinsic::amdgcn_softwqm, {getInt32Ty()}, {result});
result = CreateTrunc(result, getInt1Ty());
Expand All @@ -121,9 +119,8 @@ Value *SubgroupBuilder::CreateSubgroupAny(Value *const value, bool wqm, const Tw
// Create a subgroup all equal call.
//
// @param value : The value to compare across the subgroup. Must be an integer type.
// @param wqm : Executed in WQM (whole quad mode)
// @param instName : Name to give final instruction.
Value *SubgroupBuilder::CreateSubgroupAllEqual(Value *const value, bool wqm, const Twine &instName) {
Value *SubgroupBuilder::CreateSubgroupAllEqual(Value *const value, const Twine &instName) {
Type *const type = value->getType();

Value *compare = CreateSubgroupBroadcastFirst(value, instName);
Expand All @@ -141,9 +138,9 @@ Value *SubgroupBuilder::CreateSubgroupAllEqual(Value *const value, bool wqm, con
for (unsigned i = 1, compCount = cast<FixedVectorType>(type)->getNumElements(); i < compCount; i++)
result = CreateAnd(result, CreateExtractElement(compare, i));

return CreateSubgroupAll(result, wqm, instName);
return CreateSubgroupAll(result, instName);
} else
return CreateSubgroupAll(compare, wqm, instName);
return CreateSubgroupAll(compare, instName);
}

// =====================================================================================================================
Expand Down Expand Up @@ -1004,7 +1001,7 @@ Value *SubgroupBuilder::CreateSubgroupQuadBroadcast(Value *const value, Value *c
result = CreateSelect(compare, createDsSwizzle(value, getDsSwizzleQuadMode(3, 3, 3, 3)), result);
}

return result;
return createWqm(result);
}

// =====================================================================================================================
Expand All @@ -1014,9 +1011,9 @@ Value *SubgroupBuilder::CreateSubgroupQuadBroadcast(Value *const value, Value *c
// @param instName : Name to give final instruction.
Value *SubgroupBuilder::CreateSubgroupQuadSwapHorizontal(Value *const value, const Twine &instName) {
if (supportDpp())
return createDppMov(value, DppCtrl::DppQuadPerm1032, 0xF, 0xF, true);
else
return createDsSwizzle(value, getDsSwizzleQuadMode(1, 0, 3, 2));
return createWqm(createDppMov(value, DppCtrl::DppQuadPerm1032, 0xF, 0xF, true));

return createWqm(createDsSwizzle(value, getDsSwizzleQuadMode(1, 0, 3, 2)));
}

// =====================================================================================================================
Expand All @@ -1026,9 +1023,9 @@ Value *SubgroupBuilder::CreateSubgroupQuadSwapHorizontal(Value *const value, con
// @param instName : Name to give final instruction.
Value *SubgroupBuilder::CreateSubgroupQuadSwapVertical(Value *const value, const Twine &instName) {
if (supportDpp())
return createDppMov(value, DppCtrl::DppQuadPerm2301, 0xF, 0xF, true);
else
return createDsSwizzle(value, getDsSwizzleQuadMode(2, 3, 0, 1));
return createWqm(createDppMov(value, DppCtrl::DppQuadPerm2301, 0xF, 0xF, true));

return createWqm(createDsSwizzle(value, getDsSwizzleQuadMode(2, 3, 0, 1)));
}

// =====================================================================================================================
Expand All @@ -1038,9 +1035,9 @@ Value *SubgroupBuilder::CreateSubgroupQuadSwapVertical(Value *const value, const
// @param instName : Name to give final instruction.
Value *SubgroupBuilder::CreateSubgroupQuadSwapDiagonal(Value *const value, const Twine &instName) {
if (supportDpp())
return createDppMov(value, DppCtrl::DppQuadPerm3210, 0xF, 0xF, true);
else
return createDsSwizzle(value, getDsSwizzleQuadMode(3, 2, 1, 0));
return createWqm(createDppMov(value, DppCtrl::DppQuadPerm3210, 0xF, 0xF, true));

return createWqm(createDsSwizzle(value, getDsSwizzleQuadMode(3, 2, 1, 0)));
}

// =====================================================================================================================
Expand Down Expand Up @@ -1358,6 +1355,22 @@ Value *SubgroupBuilder::createWwm(Value *const value) {
return CreateMapToInt32(mapFunc, value, {});
}

// =====================================================================================================================
// Create a call to WQM (whole quad mode).
// Only in fragment shader stage.
//
// @param value : The value to pass to the soft WQM call.
Value *SubgroupBuilder::createWqm(Value *const value) {
auto mapFunc = [](BuilderBase &builder, ArrayRef<Value *> mappedArgs, ArrayRef<Value *>) -> Value * {
return builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_wqm, mappedArgs[0]);
};

if (m_shaderStage == ShaderStageFragment)
return CreateMapToInt32(mapFunc, value, {});

return value;
}

// =====================================================================================================================
// Create a ds_swizzle bit mode pattern.
//
Expand Down
6 changes: 3 additions & 3 deletions lgc/include/lgc/builder/BuilderRecorder.h
Expand Up @@ -544,9 +544,9 @@ class BuilderRecorder final : public Builder, BuilderRecorderMetadataKinds {
llvm::Value *CreateGetWaveSize(const llvm::Twine &instName) override final;
llvm::Value *CreateGetSubgroupSize(const llvm::Twine &instName) override final;
llvm::Value *CreateSubgroupElect(const llvm::Twine &instName) override final;
llvm::Value *CreateSubgroupAll(llvm::Value *const value, bool wqm, const llvm::Twine &instName) override final;
llvm::Value *CreateSubgroupAny(llvm::Value *const value, bool wqm, const llvm::Twine &instName) override final;
llvm::Value *CreateSubgroupAllEqual(llvm::Value *const value, bool wqm, const llvm::Twine &instName) override final;
llvm::Value *CreateSubgroupAll(llvm::Value *const value, const llvm::Twine &instName) override final;
llvm::Value *CreateSubgroupAny(llvm::Value *const value, const llvm::Twine &instName) override final;
llvm::Value *CreateSubgroupAllEqual(llvm::Value *const value, const llvm::Twine &instName) override final;
llvm::Value *CreateSubgroupBroadcast(llvm::Value *const value, llvm::Value *const index,
const llvm::Twine &instName) override final;
llvm::Value *CreateSubgroupBroadcastWaterfall(llvm::Value *const value, llvm::Value *const index,
Expand Down
12 changes: 3 additions & 9 deletions lgc/interface/lgc/Builder.h
Expand Up @@ -1455,26 +1455,20 @@ class Builder : public BuilderCommon {
// Create a subgroup all.
//
// @param value : The value to compare
// @param wqm : Executed in WQM (whole quad mode)
// @param instName : Name to give instruction(s)
virtual llvm::Value *CreateSubgroupAll(llvm::Value *const value, bool wqm = false,
const llvm::Twine &instName = "") = 0;
virtual llvm::Value *CreateSubgroupAll(llvm::Value *const value, const llvm::Twine &instName = "") = 0;

// Create a subgroup any
//
// @param value : The value to compare
// @param wqm : Executed in WQM (whole quad mode)
// @param instName : Name to give instruction(s)
virtual llvm::Value *CreateSubgroupAny(llvm::Value *const value, bool wqm = false,
const llvm::Twine &instName = "") = 0;
virtual llvm::Value *CreateSubgroupAny(llvm::Value *const value, const llvm::Twine &instName = "") = 0;

// Create a subgroup all equal.
//
// @param value : The value to compare
// @param wqm : Executed in WQM (whole quad mode)
// @param instName : Name to give instruction(s)
virtual llvm::Value *CreateSubgroupAllEqual(llvm::Value *const value, bool wqm = false,
const llvm::Twine &instName = "") = 0;
virtual llvm::Value *CreateSubgroupAllEqual(llvm::Value *const value, const llvm::Twine &instName = "") = 0;

// Create a subgroup broadcast.
//
Expand Down
@@ -0,0 +1,45 @@
// BEGIN_SHADERTEST
/*
; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s
; SHADERTEST-LABEL: {{^// LLPC.*}} SPIR-V lowering results
; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.subgroup.quad.broadcast.f32
; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.subgroup.quad.broadcast.f32
; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.subgroup.quad.broadcast.f32
; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.subgroup.quad.broadcast.f32
; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results
; SHADERTEST: call i32 @llvm.amdgcn.mov.dpp.i32
; SHADERTEST: call i32 @llvm.amdgcn.wqm
; SHADERTEST: call i32 @llvm.amdgcn.mov.dpp.i32
; SHADERTEST: call i32 @llvm.amdgcn.wqm
; SHADERTEST: call i32 @llvm.amdgcn.mov.dpp.i32
; SHADERTEST: call i32 @llvm.amdgcn.wqm
; SHADERTEST: call i32 @llvm.amdgcn.mov.dpp.i32
; SHADERTEST: call i32 @llvm.amdgcn.wqm
; SHADERTEST: AMDLLPC SUCCESS
*/
// END_SHADERTEST

#version 450

#extension GL_KHR_shader_subgroup_quad : require

layout(binding = 0) readonly buffer Block0
{
float alpha[];
};

layout(location = 0) out vec4 color;

void main()
{
ivec2 coord = ivec2(gl_FragCoord.xy);
float v = alpha[coord.y * 2 + coord.x];

vec4 lanes;
lanes.x = subgroupQuadBroadcast(v, 0u);
lanes.y = subgroupQuadBroadcast(v, 1u);
lanes.z = subgroupQuadBroadcast(v, 2u);
lanes.w = subgroupQuadBroadcast(v, 3u);

color = lanes;
}
@@ -0,0 +1,33 @@
// BEGIN_SHADERTEST
/*
; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s
; SHADERTEST-LABEL: {{^// LLPC.*}} SPIR-V lowering results
; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.subgroup.quad.swap.diagonal.v4f32
; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results
; SHADERTEST: call i32 @llvm.amdgcn.mov.dpp.i32
; SHADERTEST: call i32 @llvm.amdgcn.mov.dpp.i32
; SHADERTEST: call i32 @llvm.amdgcn.mov.dpp.i32
; SHADERTEST: call i32 @llvm.amdgcn.mov.dpp.i32
; SHADERTEST: call i32 @llvm.amdgcn.wqm
; SHADERTEST: call i32 @llvm.amdgcn.wqm
; SHADERTEST: call i32 @llvm.amdgcn.wqm
; SHADERTEST: call i32 @llvm.amdgcn.wqm
; SHADERTEST: AMDLLPC SUCCESS
*/
// END_SHADERTEST

#version 450

#extension GL_KHR_shader_subgroup_quad : require

layout(location = 0) out vec4 result;

layout(set = 0, binding = 4, std430) readonly buffer Buffer0
{
vec4 data[];
};

void main (void)
{
result = subgroupQuadSwapDiagonal(data[gl_SubgroupInvocationID]);
}
@@ -0,0 +1,33 @@
// BEGIN_SHADERTEST
/*
; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s
; SHADERTEST-LABEL: {{^// LLPC.*}} SPIR-V lowering results
; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.subgroup.quad.swap.vertical.v4f32
; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results
; SHADERTEST: call i32 @llvm.amdgcn.mov.dpp.i32
; SHADERTEST: call i32 @llvm.amdgcn.mov.dpp.i32
; SHADERTEST: call i32 @llvm.amdgcn.mov.dpp.i32
; SHADERTEST: call i32 @llvm.amdgcn.mov.dpp.i32
; SHADERTEST: call i32 @llvm.amdgcn.wqm
; SHADERTEST: call i32 @llvm.amdgcn.wqm
; SHADERTEST: call i32 @llvm.amdgcn.wqm
; SHADERTEST: call i32 @llvm.amdgcn.wqm
; SHADERTEST: AMDLLPC SUCCESS
*/
// END_SHADERTEST

#version 450

#extension GL_KHR_shader_subgroup_quad : require

layout(location = 0) out vec4 result;

layout(set = 0, binding = 4, std430) readonly buffer Buffer0
{
vec4 data[];
};

void main (void)
{
result = subgroupQuadSwapVertical(data[gl_SubgroupInvocationID]);
}