Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Descriptor Offsets Relocatable #457

Merged
merged 1 commit into from
Apr 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions lgc/include/lgc/util/Internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#pragma once

#include "lgc/CommonDefs.h"
#include "../interface/lgc/BuilderBase.h"

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
Expand Down Expand Up @@ -75,6 +77,9 @@ llvm::CallInst *emitCall(llvm::StringRef funcName, llvm::Type *retTy, llvm::Arra
llvm::CallInst *emitCall(llvm::StringRef funcName, llvm::Type *retTy, llvm::ArrayRef<llvm::Value *> args,
llvm::ArrayRef<llvm::Attribute::AttrKind> attribs, llvm::BasicBlock *insertAtEnd);

// Emits a amdgcn.reloc.constant intrinsics that represents a relocatable value with the given symbol name.
llvm::CallInst* emitRelocationConstant(llvm::IRBuilderBase *builder, const llvm::Twine &symbolName);

// Adds LLVM-style type mangling suffix for the specified return type and args to the name.
void addTypeMangling(llvm::Type *returnTy, llvm::ArrayRef<llvm::Value *> args, std::string &name);

Expand Down
54 changes: 41 additions & 13 deletions lgc/patch/PatchDescriptorLoad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@
***********************************************************************************************************************
*/
#include "PatchDescriptorLoad.h"
#include "../interface/lgc/LgcContext.h"
#include "lgc/state/TargetInfo.h"
#include "lgc/util/Internal.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -166,8 +168,8 @@ void PatchDescriptorLoad::processDescriptorGetPtr(CallInst *descPtrCall, StringR
// @param resType : Resource type
// @param descSet : Descriptor set
// @param binding : Binding
// @param topNode : Node in top-level descriptor table (TODO: nullptr for shader compilation)
// @param node : The descriptor node itself (TODO: nullptr for shader compilation)
// @param topNode : Node in top-level descriptor table (nullptr for shader compilation)
// @param node : The descriptor node itself (nullptr for shader compilation)
// @param shadow : Whether to load from shadow descriptor table
// @param [in/out] builder : IRBuilder
Value *PatchDescriptorLoad::getDescPtrAndStride(ResourceNodeType resType, unsigned descSet, unsigned binding,
Expand Down Expand Up @@ -198,11 +200,12 @@ Value *PatchDescriptorLoad::getDescPtrAndStride(ResourceNodeType resType, unsign

if (!stride) {
// Stride is not determinable just from the descriptor type requested by the Builder call.
if (!node) {
// TODO: Shader compilation: Get byte stride using a reloc.
llvm_unreachable("");
if (m_pipelineState->getLgcContext()->buildingRelocatableElf()) {
// Shader compilation: Get byte stride using a reloc.
stride = emitRelocationConstant(&builder, "dstride_" + Twine(descSet) + "_" + Twine(binding));
} else {
// Pipeline compilation: Get the stride from the resource type in the node.
assert(node && "expected valid user data node to determine descriptor stride.");
switch (node->type) {
case ResourceNodeType::DescriptorSampler:
stride = builder.getInt32(DescriptorSizeSampler / 4);
Expand Down Expand Up @@ -261,8 +264,8 @@ Value *PatchDescriptorLoad::getDescPtrAndStride(ResourceNodeType resType, unsign
// @param resType : Resource type
// @param descSet : Descriptor set
// @param binding : Binding
// @param topNode : Node in top-level descriptor table (TODO: nullptr for shader compilation)
// @param node : The descriptor node itself (TODO: nullptr for shader compilation)
// @param topNode : Node in top-level descriptor table (nullptr for shader compilation)
// @param node : The descriptor node itself (nullptr for shader compilation)
// @param shadow : Whether to load from shadow descriptor table
// @param [in/out] builder : IRBuilder
Value *PatchDescriptorLoad::getDescPtr(ResourceNodeType resType, unsigned descSet, unsigned binding,
Expand All @@ -287,22 +290,47 @@ Value *PatchDescriptorLoad::getDescPtr(ResourceNodeType resType, unsigned descSe

// Add on the byte offset of the descriptor.
Value *offset = nullptr;
if (!node) {
// TODO: Shader compilation: Get the offset for the descriptor using a reloc. The reloc symbol name
bool useRelocationForOffsets = !node || m_pipelineState->getLgcContext()->buildingRelocatableElf();
if (useRelocationForOffsets) {
// Get the offset for the descriptor using a reloc. The reloc symbol name
// needs to contain the descriptor set and binding, and, for image, fmask or sampler, whether it is
// a sampler.
llvm_unreachable("");
StringRef relocNameSuffix = "";
switch (resType) {
case ResourceNodeType::DescriptorSampler:
case ResourceNodeType::DescriptorYCbCrSampler:
relocNameSuffix = "_s";
break;
case ResourceNodeType::DescriptorResource:
relocNameSuffix = "_r";
break;
case ResourceNodeType::DescriptorBuffer:
case ResourceNodeType::DescriptorBufferCompact:
case ResourceNodeType::DescriptorTexelBuffer:
relocNameSuffix = "_b";
break;
default:
relocNameSuffix = "_x";
break;
}
offset = emitRelocationConstant(&builder, "doff_" + Twine(descSet) + "_" + Twine(binding) + relocNameSuffix);
// The LLVM's internal handling of GEP instruction results in a lot of junk code and prevented selection
// of the offset-from-register variant of the s_load_dwordx4 instruction. To workaround this issue,
// we use integer arithmetic here so the amdgpu backend can pickup the optimal instruction.
// When relocation is used, offset is in bytes, not in dwords.
descPtr = builder.CreatePtrToInt(descPtr, builder.getInt64Ty());
descPtr = builder.CreateAdd(descPtr, builder.CreateZExt(offset, builder.getInt64Ty()));
descPtr = builder.CreateIntToPtr(descPtr, builder.getInt32Ty()->getPointerTo(ADDR_SPACE_CONST));
} else {
// Get the offset for the descriptor. Where we are getting the second part of a combined resource,
// add on the size of the first part.
unsigned offsetInDwords = node->offsetInDwords;
if (resType == ResourceNodeType::DescriptorSampler && node->type == ResourceNodeType::DescriptorCombinedTexture)
offsetInDwords += DescriptorSizeResource / 4;
offset = builder.getInt32(offsetInDwords);
descPtr = builder.CreateBitCast(descPtr, builder.getInt32Ty()->getPointerTo(ADDR_SPACE_CONST));
descPtr = builder.CreateGEP(builder.getInt32Ty(), descPtr, offset);
}
descPtr = builder.CreateBitCast(descPtr, builder.getInt32Ty()->getPointerTo(ADDR_SPACE_CONST));
descPtr = builder.CreateGEP(builder.getInt32Ty(), descPtr, offset);

return descPtr;
}

Expand Down
12 changes: 12 additions & 0 deletions lgc/util/Internal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@

#include "lgc/BuilderBase.h"
#include "lgc/util/Internal.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"

#define DEBUG_TYPE "llpc-internal"

Expand Down Expand Up @@ -82,6 +83,17 @@ CallInst *emitCall(StringRef funcName, Type *retTy, ArrayRef<Value *> args, Arra
return builder.createNamedCall(funcName, retTy, args, attribs);
}

// =====================================================================================================================
// Emits a amdgcn.reloc.constant intrinsics that represents a relocatable value with the given symbol name.
//
// @param builder : [in,out] An IRBuilder for instruction insertion
// @param symbolName : Name of the relocation symbol associated with this relocation
llvm::CallInst *emitRelocationConstant(llvm::IRBuilderBase *builder, const llvm::Twine &symbolName) {
auto mdNode = llvm::MDNode::get(builder->getContext(), llvm::MDString::get(builder->getContext(), symbolName.str()));
return builder->CreateIntrinsic(llvm::Intrinsic::amdgcn_reloc_constant, {},
{llvm::MetadataAsValue::get(builder->getContext(), mdNode)});
}

// =====================================================================================================================
// Gets LLVM-style name for type.
//
Expand Down
40 changes: 20 additions & 20 deletions llpc/context/llpcCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -734,15 +734,15 @@ Result Compiler::buildPipelineWithRelocatableElf(Context *context, ArrayRef<cons
IShaderCache *userShaderCache = nullptr;
if (context->isGraphics()) {
auto pipelineInfo = reinterpret_cast<const GraphicsPipelineBuildInfo *>(context->getPipelineBuildInfo());
cacheHash = PipelineDumper::generateHashForGraphicsPipeline(pipelineInfo, true, stage);
cacheHash = PipelineDumper::generateHashForGraphicsPipeline(pipelineInfo, true, true, stage);
#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 38
pUserShaderCache = pPipelineInfo->pShaderCache;
userShaderCache = pipelineInfo->pShaderCache;
#endif
} else {
auto pipelineInfo = reinterpret_cast<const ComputePipelineBuildInfo *>(context->getPipelineBuildInfo());
cacheHash = PipelineDumper::generateHashForComputePipeline(pipelineInfo, true);
cacheHash = PipelineDumper::generateHashForComputePipeline(pipelineInfo, true, true);
#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 38
pUserShaderCache = pPipelineInfo->pShaderCache;
userShaderCache = pipelineInfo->pShaderCache;
#endif
}

Expand Down Expand Up @@ -1083,8 +1083,8 @@ unsigned GraphicsShaderCacheChecker::check(const Module *module, unsigned stageM

IShaderCache *appCache = nullptr;
#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 38
auto pPipelineInfo = reinterpret_cast<const GraphicsPipelineBuildInfo *>(m_pContext->GetPipelineBuildInfo());
pAppCache = pPipelineInfo->pShaderCache;
auto pPipelineInfo = reinterpret_cast<const GraphicsPipelineBuildInfo *>(m_context->getPipelineBuildInfo());
appCache = pPipelineInfo->pShaderCache;
#endif
if (stageMask & shaderStageToMask(ShaderStageFragment)) {
m_fragmentCacheEntryState = m_compiler->lookUpShaderCaches(appCache, &fragmentHash, &m_fragmentElf,
Expand Down Expand Up @@ -1115,7 +1115,7 @@ unsigned GraphicsShaderCacheChecker::check(const Module *module, unsigned stageM
void GraphicsShaderCacheChecker::updateRootUserDateOffset(ElfPackage *pipelineElf) {
ElfWriter<Elf64> writer(m_context->getGfxIpVersion());
// Load ELF binary
auto result = writer.ReadFromBuffer(pipelineElf->data(), pipelineElf->size());
auto result = writer.readFromBuffer(pipelineElf->data(), pipelineElf->size());
assert(result == Result::Success);
(void(result)); // unused
writer.updateElfBinary(m_context, pipelineElf);
Expand Down Expand Up @@ -1171,7 +1171,7 @@ void GraphicsShaderCacheChecker::updateAndMerge(Result result, ElfPackage *outpu

// Merge and store the result in pPipelineElf
ElfWriter<Elf64> writer(m_context->getGfxIpVersion());
auto result = writer.ReadFromBuffer(nonFragmentElf.pCode, nonFragmentElf.codeSize);
auto result = writer.readFromBuffer(nonFragmentElf.pCode, nonFragmentElf.codeSize);
assert(result == Result::Success);
(void(result)); // unused
writer.mergeElfBinary(m_context, &fragmentElf, outputPipelineElf);
Expand Down Expand Up @@ -1243,14 +1243,15 @@ Result Compiler::BuildGraphicsPipeline(const GraphicsPipelineBuildInfo *pipeline
const PipelineShaderInfo *shaderInfo[ShaderStageGfxCount] = {
&pipelineInfo->vs, &pipelineInfo->tcs, &pipelineInfo->tes, &pipelineInfo->gs, &pipelineInfo->fs,
};
bool buildingRelocatableElf = canUseRelocatableGraphicsShaderElf(shaderInfo);

for (unsigned i = 0; i < ShaderStageGfxCount && result == Result::Success; ++i)
result = validatePipelineShaderInfo(shaderInfo[i]);

MetroHash::Hash cacheHash = {};
MetroHash::Hash pipelineHash = {};
cacheHash = PipelineDumper::generateHashForGraphicsPipeline(pipelineInfo, true);
pipelineHash = PipelineDumper::generateHashForGraphicsPipeline(pipelineInfo, false);
cacheHash = PipelineDumper::generateHashForGraphicsPipeline(pipelineInfo, true, buildingRelocatableElf);
pipelineHash = PipelineDumper::generateHashForGraphicsPipeline(pipelineInfo, false, false);

if (result == Result::Success && EnableOuts()) {
LLPC_OUTS("===============================================================================\n");
Expand All @@ -1277,10 +1278,9 @@ Result Compiler::BuildGraphicsPipeline(const GraphicsPipelineBuildInfo *pipeline
}

ShaderEntryState cacheEntryState = ShaderEntryState::New;
bool buildingRelocatableElf = canUseRelocatableGraphicsShaderElf(shaderInfo);
IShaderCache *appCache = nullptr;
#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 38
pAppCache = pPipelineInfo->pShaderCache;
appCache = pipelineInfo->pShaderCache;
#endif
ShaderCache *shaderCache = nullptr;
CacheEntryHandle hEntry = nullptr;
Expand Down Expand Up @@ -1371,8 +1371,8 @@ Result Compiler::BuildComputePipeline(const ComputePipelineBuildInfo *pipelineIn

MetroHash::Hash cacheHash = {};
MetroHash::Hash pipelineHash = {};
cacheHash = PipelineDumper::generateHashForComputePipeline(pipelineInfo, true);
pipelineHash = PipelineDumper::generateHashForComputePipeline(pipelineInfo, false);
cacheHash = PipelineDumper::generateHashForComputePipeline(pipelineInfo, true, buildingRelocatableElf);
pipelineHash = PipelineDumper::generateHashForComputePipeline(pipelineInfo, false, buildingRelocatableElf);

if (result == Result::Success && EnableOuts()) {
const ShaderModuleData *moduleData = reinterpret_cast<const ShaderModuleData *>(pipelineInfo->cs.pModuleData);
Expand All @@ -1397,7 +1397,7 @@ Result Compiler::BuildComputePipeline(const ComputePipelineBuildInfo *pipelineIn
ShaderEntryState cacheEntryState = ShaderEntryState::New;
IShaderCache *appCache = nullptr;
#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 38
pAppCache = pPipelineInfo->pShaderCache;
appCache = pipelineInfo->pShaderCache;
#endif
ShaderCache *shaderCache = nullptr;
CacheEntryHandle hEntry = nullptr;
Expand Down Expand Up @@ -1537,7 +1537,7 @@ Result Compiler::CreateShaderCache(const ShaderCacheCreateInfo *pCreateInfo, //
ShaderCache *pShaderCache = new ShaderCache();

if (pShaderCache != nullptr) {
result = pShaderCache->Init(pCreateInfo, &auxCreateInfo);
result = pShaderCache->init(pCreateInfo, &auxCreateInfo);
if (result != Result::Success) {
pShaderCache->Destroy();
pShaderCache = nullptr;
Expand Down Expand Up @@ -1716,7 +1716,7 @@ void Compiler::buildShaderCacheHash(Context *context, unsigned stageMask, ArrayR
MetroHash64 hasher;

// Update common shader info
PipelineDumper::updateHashForPipelineShaderInfo(stage, shaderInfo, true, &hasher);
PipelineDumper::updateHashForPipelineShaderInfo(stage, shaderInfo, true, &hasher, false);
hasher.Update(pipelineInfo->iaState.deviceIndex);

// Update input/output usage (provided by middle-end caller of this callback).
Expand Down Expand Up @@ -1774,14 +1774,14 @@ void Compiler::linkRelocatableShaderElf(ElfPackage *shaderElfs, ElfPackage *pipe
ElfReader<Elf64> fsReader(m_gfxIp);
if (!shaderElfs[ShaderStageVertex].empty()) {
size_t codeSize = shaderElfs[ShaderStageVertex].size_in_bytes();
result = vsReader.ReadFromBuffer(shaderElfs[ShaderStageVertex].data(), &codeSize);
result = vsReader.readFromBuffer(shaderElfs[ShaderStageVertex].data(), &codeSize);
if (result != Result::Success)
return;
}

if (!shaderElfs[ShaderStageFragment].empty()) {
size_t codeSize = shaderElfs[ShaderStageFragment].size_in_bytes();
result = fsReader.ReadFromBuffer(shaderElfs[ShaderStageFragment].data(), &codeSize);
result = fsReader.readFromBuffer(shaderElfs[ShaderStageFragment].data(), &codeSize);
if (result != Result::Success)
return;
}
Expand All @@ -1790,7 +1790,7 @@ void Compiler::linkRelocatableShaderElf(ElfPackage *shaderElfs, ElfPackage *pipe
} else {
ElfReader<Elf64> csReader(m_gfxIp);
size_t codeSize = shaderElfs[ShaderStageCompute].size_in_bytes();
result = csReader.ReadFromBuffer(shaderElfs[ShaderStageCompute].data(), &codeSize);
result = csReader.readFromBuffer(shaderElfs[ShaderStageCompute].data(), &codeSize);
if (result != Result::Success)
return;
result = writer.linkComputeRelocatableElf(csReader, context);
Expand Down
2 changes: 1 addition & 1 deletion llpc/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ project(shadertest

if(DEFINED XGL_LLVM_SRC_PATH)
# This is a build where LLPC lit testing is integrated into AMDVLK cmake files.
set(AMDLLPC_TEST_DEPS amdllpc spvgen FileCheck count not)
set(AMDLLPC_TEST_DEPS amdllpc spvgen FileCheck llvm-objdump count not)
set(LLVM_DIR ${XGL_LLVM_SRC_PATH})
endif()

Expand Down
2 changes: 1 addition & 1 deletion llpc/test/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,6 @@

tool_dirs = [config.llvm_tools_dir, config.amdllpc_dir]

tools = ['amdllpc']
tools = ['amdllpc', 'llvm-objdump']

llvm_config.add_tool_substitutions(tools, tool_dirs)
66 changes: 66 additions & 0 deletions llpc/test/shaderdb/PipelineCs_RelocCombinedTextureSampler.pipe
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@

// This test case checks that two relocation entries are generated for a combined texture sampler descriptor,
// one for the resource handle offset, and another for the sampler handle offset.
; BEGIN_SHADERTEST
; RUN: amdllpc -spvgen-dir=%spvgendir% -use-relocatable-shader-elf -o %t.elf %gfxip %s && llvm-objdump --triple=amdgcn --mcpu=gfx900 -d %t.elf | FileCheck -check-prefix=SHADERTEST %s
// Test that correct relocated offsets are in the linked texture fetching code.
; SHADERTEST-LABEL: 0000000000000000 <_amdgpu_cs_main>:
// Matching the relocation entry for the resource descriptor
; SHADERTEST-DAG: s_mov_b32 s[[RELOREG_RESOURCE:[0-9]+]], 16 //{{.*}}
// Matching the relocation entry for the sampler descriptor
; SHADERTEST-DAG: s_mov_b32 s[[RELOREG_SAMPLER:[0-9]+]], 48 //{{.*}}
// Loading the resource descriptor
; SHADERTEST-DAG: s_load_dwordx8 s[[RESOURCE_REG:\[[0-9]+:[0-9]+\]]], s[{{.*}}:{{.*}}], s[[RELOREG_RESOURCE]] //{{.*}}
// Loading the sampler descriptor
; SHADERTEST-DAG: s_load_dwordx4 s[[SAMPLER_REG:\[[0-9]+:[0-9]+\]]], s[{{.*}}:{{.*}}], s[[RELOREG_SAMPLER]] //{{.*}}
// Sampling the texture
; SHADERTEST: image_sample_lz v[0:3], v0, s[[RESOURCE_REG]], s[[SAMPLER_REG]] {{.*}}
; END_SHADERTEST

; BEGIN_SHADERTEST
; RUN: amdllpc -spvgen-dir=%spvgendir% -use-relocatable-shader-elf -v %gfxip %s | FileCheck -check-prefix=SHADERTEST1 %s
; SHADERTEST1-LABEL: {{^// LLPC}} pipeline patching results
; SHADERTEST1-DAG: ![[METADATANODE1:[0-9]+]] = !{!"doff_0_0_r"}
; SHADERTEST1-DAG: ![[METADATANODE2:[0-9]+]] = !{!"doff_0_0_s"}
; SHADERTEST1-DAG: %[[RELOCONST1:[0-9]+]] = call i32 @llvm.amdgcn.reloc.constant(metadata ![[METADATANODE1]])
; SHADERTEST1-DAG: %[[RELOCONST2:[0-9]+]] = call i32 @llvm.amdgcn.reloc.constant(metadata ![[METADATANODE2]])
; SHADERTEST1: AMDLLPC SUCCESS
; END_SHADERTEST

[CsGlsl]
#version 450
#extension GL_ARB_separate_shader_objects : enable

layout(binding = 0) uniform sampler2D texSampler;

layout(set = 1, binding = 0, std430) buffer OUT
{
vec4 o;
};

layout(local_size_x = 2, local_size_y = 3) in;
void main() {
o = texture(texSampler, vec2(0.0, 0.0));
}


[CsInfo]
entryPoint = main
userDataNode[0].type = DescriptorTableVaPtr
userDataNode[0].offsetInDwords = 0
userDataNode[0].sizeInDwords = 1
userDataNode[0].set = 0
userDataNode[0].next[0].type = DescriptorCombinedTexture
userDataNode[0].next[0].offsetInDwords = 4
userDataNode[0].next[0].sizeInDwords = 12
userDataNode[0].next[0].set = 0
userDataNode[0].next[0].binding = 0
userDataNode[1].type = DescriptorTableVaPtr
userDataNode[1].offsetInDwords = 1
userDataNode[1].sizeInDwords = 1
userDataNode[1].set = 1
userDataNode[1].next[0].type = DescriptorBuffer
userDataNode[1].next[0].offsetInDwords = 16
userDataNode[1].next[0].sizeInDwords = 8
userDataNode[1].next[0].set = 1
userDataNode[1].next[0].binding = 0
Loading