Skip to content

Commit

Permalink
Make descriptor offset relocatable.
Browse files Browse the repository at this point in the history
This commit consists of the original changes made by s-perron
that emits a relocatable entry for each descriptor load, instead
of baking in a constant offset during initial compilation. The
compiler patches up the compiled binary with actual descriptor
offset at a later time when that info is available.
  • Loading branch information
Yong He committed Apr 13, 2020
1 parent 913dbed commit 7bc698d
Show file tree
Hide file tree
Showing 17 changed files with 781 additions and 136 deletions.
103 changes: 66 additions & 37 deletions lgc/patch/llpcPatchDescriptorLoad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
*/
#include "llpcPatchDescriptorLoad.h"
#include "llpcTargetInfo.h"
#include "../include/lgc/llpcBuilderContext.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -148,12 +149,16 @@ void PatchDescriptorLoad::processDescriptorGetPtr(CallInst *descPtrCall, StringR
}

Value *descPtrAndStride = nullptr;
if (!node) {
// We did not find the resource node. Use an undef value.
descPtrAndStride = UndefValue::get(descPtrCall->getType());
if (m_pipelineState->getBuilderContext()->buildingRelocatableElf()) {
descPtrAndStride = getDescPtrAndStride(resType, descSet, binding, nullptr, nullptr, shadow, builder);
} else {
// Get the descriptor pointer and stride as a struct.
descPtrAndStride = getDescPtrAndStride(resType, descSet, binding, topNode, node, shadow, builder);
if (!node) {
// We did not find the resource node. Use an undef value.
descPtrAndStride = UndefValue::get(descPtrCall->getType());
} else {
// Get the descriptor pointer and stride as a struct.
descPtrAndStride = getDescPtrAndStride(resType, descSet, binding, topNode, node, shadow, builder);
}
}
descPtrCall->replaceAllUsesWith(descPtrAndStride);
m_descLoadCalls.push_back(descPtrCall);
Expand Down Expand Up @@ -200,7 +205,7 @@ Value *PatchDescriptorLoad::getDescPtrAndStride(ResourceNodeType resType, unsign
// Stride is not determinable just from the descriptor type requested by the Builder call.
if (!node) {
// TODO: Shader compilation: Get byte stride using a reloc.
llvm_unreachable("");
stride = emitRelocationConstant(&builder, "dstride_" + Twine(descSet) + "_" + Twine(binding));
} else {
// Pipeline compilation: Get the stride from the resource type in the node.
switch (node->type) {
Expand Down Expand Up @@ -285,10 +290,28 @@ Value *PatchDescriptorLoad::getDescPtr(ResourceNodeType resType, unsigned descSe
// Add on the byte offset of the descriptor.
Value *offset = nullptr;
if (!node) {
// TODO: Shader compilation: Get the offset for the descriptor using a reloc. The reloc symbol name
// Get the offset for the descriptor using a reloc. The reloc symbol name
// needs to contain the descriptor set and binding, and, for image, fmask or sampler, whether it is
// a sampler.
llvm_unreachable("");
StringRef relocNameSuffix = "";
switch (resType) {
case ResourceNodeType::DescriptorSampler:
case ResourceNodeType::DescriptorYCbCrSampler:
relocNameSuffix = "_s";
break;
case ResourceNodeType::DescriptorResource:
relocNameSuffix = "_r";
break;
case ResourceNodeType::DescriptorBuffer:
case ResourceNodeType::DescriptorBufferCompact:
case ResourceNodeType::DescriptorTexelBuffer:
relocNameSuffix = "_b";
break;
default:
relocNameSuffix = "_x";
break;
}
offset = emitRelocationConstant(&builder, "doff_" + Twine(descSet) + "_" + Twine(binding) + relocNameSuffix);
} else {
// Get the offset for the descriptor. Where we are getting the second part of a combined resource,
// add on the size of the first part.
Expand All @@ -297,9 +320,12 @@ Value *PatchDescriptorLoad::getDescPtr(ResourceNodeType resType, unsigned descSe
offsetInDwords += DescriptorSizeResource / 4;
offset = builder.getInt32(offsetInDwords);
}
descPtr = builder.CreateBitCast(descPtr, builder.getInt32Ty()->getPointerTo(ADDR_SPACE_CONST));
descPtr = builder.CreateGEP(builder.getInt32Ty(), descPtr, offset);

// The LLVM's internal handling of GEP instruction results in a lot of junk code and prevented selection
// of the offset-from-register variant of the s_load_dwordx4 instruction. To workaround this issue,
// we use integer arithmetic here so the amdgpu backend can pickup the optimal instruction.
descPtr = builder.CreatePtrToInt(descPtr, builder.getInt64Ty());
descPtr = builder.CreateAdd(descPtr, builder.CreateZExt(offset, builder.getInt64Ty()));
descPtr = builder.CreateIntToPtr(descPtr, builder.getInt32Ty()->getPointerTo(ADDR_SPACE_CONST));
return descPtr;
}

Expand Down Expand Up @@ -445,34 +471,37 @@ Value *PatchDescriptorLoad::loadBufferDescriptor(unsigned descSet, unsigned bind
// Find the descriptor node, either a DescriptorBuffer or PushConst (inline buffer).
const ResourceNode *topNode = nullptr;
const ResourceNode *node = nullptr;
std::tie(topNode, node) = m_pipelineState->findResourceNode(ResourceNodeType::DescriptorBuffer, descSet, binding);

if (!node) {
// We did not find the resource node. Use an undef value.
return UndefValue::get(VectorType::get(builder.getInt32Ty(), 4));
}
if (!m_pipelineState->getBuilderContext()->buildingRelocatableElf()) {
std::tie(topNode, node) = m_pipelineState->findResourceNode(ResourceNodeType::DescriptorBuffer, descSet, binding);

if (node == topNode && node->type == ResourceNodeType::DescriptorBufferCompact) {
// This is a compact buffer descriptor (only two dwords) in the top-level table. We special-case
// that to use user data SGPRs directly, if PatchEntryPointMutate managed to fit the value into
// user data SGPRs.
unsigned resNodeIdx = topNode - m_pipelineState->getUserDataNodes().data();
auto intfData = m_pipelineState->getShaderInterfaceData(m_shaderStage);
unsigned argIdx = intfData->entryArgIdxs.resNodeValues[resNodeIdx];
if (argIdx > 0) {
// Resource node isn't spilled. Load its value from function argument.
Argument *descArg = m_entryPoint->getArg(argIdx);
descArg->setName(Twine("resNode") + Twine(resNodeIdx));
// The function argument is a vector of i32. Treat it as an array of <2 x i32> and
// extract the required array element.
arrayOffset = builder.CreateMul(arrayOffset, builder.getInt32(2));
Value *descDword0 = builder.CreateExtractElement(descArg, arrayOffset);
arrayOffset = builder.CreateAdd(arrayOffset, builder.getInt32(1));
Value *descDword1 = builder.CreateExtractElement(descArg, arrayOffset);
Value *desc = UndefValue::get(VectorType::get(builder.getInt32Ty(), 2));
desc = builder.CreateInsertElement(desc, descDword0, uint64_t(0));
desc = builder.CreateInsertElement(desc, descDword1, 1);
return buildBufferCompactDesc(desc, &*builder.GetInsertPoint());
if (!node) {
// We did not find the resource node. Use an undef value.
return UndefValue::get(VectorType::get(builder.getInt32Ty(), 4));
}

if (node == topNode && node->type == ResourceNodeType::DescriptorBufferCompact) {
// This is a compact buffer descriptor (only two dwords) in the top-level table. We special-case
// that to use user data SGPRs directly, if PatchEntryPointMutate managed to fit the value into
// user data SGPRs.
unsigned resNodeIdx = topNode - m_pipelineState->getUserDataNodes().data();
auto intfData = m_pipelineState->getShaderInterfaceData(m_shaderStage);
unsigned argIdx = intfData->entryArgIdxs.resNodeValues[resNodeIdx];
if (argIdx > 0) {
// Resource node isn't spilled. Load its value from function argument.
Argument *descArg = m_entryPoint->getArg(argIdx);
descArg->setName(Twine("resNode") + Twine(resNodeIdx));
// The function argument is a vector of i32. Treat it as an array of <2 x i32> and
// extract the required array element.
arrayOffset = builder.CreateMul(arrayOffset, builder.getInt32(2));
Value *descDword0 = builder.CreateExtractElement(descArg, arrayOffset);
arrayOffset = builder.CreateAdd(arrayOffset, builder.getInt32(1));
Value *descDword1 = builder.CreateExtractElement(descArg, arrayOffset);
Value *desc = UndefValue::get(VectorType::get(builder.getInt32Ty(), 2));
desc = builder.CreateInsertElement(desc, descDword0, uint64_t(0));
desc = builder.CreateInsertElement(desc, descDword1, 1);
return buildBufferCompactDesc(desc, &*builder.GetInsertPoint());
}
}
}

Expand Down
13 changes: 13 additions & 0 deletions lgc/util/llpcInternal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@

#include "llpcInternal.h"
#include "lgc/llpcBuilderBase.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"

#define DEBUG_TYPE "llpc-internal"

Expand Down Expand Up @@ -82,6 +83,18 @@ CallInst *emitCall(StringRef funcName, Type *retTy, ArrayRef<Value *> args, Arra
return builder.createNamedCall(funcName, retTy, args, attribs);
}

// =====================================================================================================================
// Emits a amdgcn.reloc.constant intrinsics that represents a relocatable value with the given symbol name.
//
// @param builder : [in,out] An IRBuilder for instruction insertion
// @param symbolName : Name of the relocation symbol associated with this relocation
llvm::CallInst* emitRelocationConstant(llvm::IRBuilderBase* builder, const llvm::Twine& symbolName) {
auto pMDNode = llvm::MDNode::get(builder->getContext(), llvm::MDString::get(builder->getContext(),
symbolName.str()));
return builder->CreateIntrinsic(llvm::Intrinsic::amdgcn_reloc_constant, {},
{llvm::MetadataAsValue::get(builder->getContext(), pMDNode)});
}

// =====================================================================================================================
// Gets LLVM-style name for type.
//
Expand Down
3 changes: 3 additions & 0 deletions lgc/util/llpcInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ llvm::CallInst *emitCall(llvm::StringRef funcName, llvm::Type *retTy, llvm::Arra
llvm::CallInst *emitCall(llvm::StringRef funcName, llvm::Type *retTy, llvm::ArrayRef<llvm::Value *> args,
llvm::ArrayRef<llvm::Attribute::AttrKind> attribs, llvm::BasicBlock *insertAtEnd);

// Emits a amdgcn.reloc.constant intrinsics that represents a relocatable value with the given symbol name.
llvm::CallInst* emitRelocationConstant(llvm::IRBuilderBase* builder, const llvm::Twine& symbolName);

// Adds LLVM-style type mangling suffix for the specified return type and args to the name.
void addTypeMangling(llvm::Type *returnTy, llvm::ArrayRef<llvm::Value *> args, std::string &name);

Expand Down
40 changes: 20 additions & 20 deletions llpc/context/llpcCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -734,15 +734,15 @@ Result Compiler::buildPipelineWithRelocatableElf(Context *context, ArrayRef<cons
IShaderCache *userShaderCache = nullptr;
if (context->isGraphics()) {
auto pipelineInfo = reinterpret_cast<const GraphicsPipelineBuildInfo *>(context->getPipelineBuildInfo());
cacheHash = PipelineDumper::generateHashForGraphicsPipeline(pipelineInfo, true, stage);
cacheHash = PipelineDumper::generateHashForGraphicsPipeline(pipelineInfo, true, true, stage);
#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 38
pUserShaderCache = pPipelineInfo->pShaderCache;
userShaderCache = pipelineInfo->pShaderCache;
#endif
} else {
auto pipelineInfo = reinterpret_cast<const ComputePipelineBuildInfo *>(context->getPipelineBuildInfo());
cacheHash = PipelineDumper::generateHashForComputePipeline(pipelineInfo, true);
cacheHash = PipelineDumper::generateHashForComputePipeline(pipelineInfo, true, true);
#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 38
pUserShaderCache = pPipelineInfo->pShaderCache;
userShaderCache = pipelineInfo->pShaderCache;
#endif
}

Expand Down Expand Up @@ -1083,8 +1083,8 @@ unsigned GraphicsShaderCacheChecker::check(const Module *module, unsigned stageM

IShaderCache *appCache = nullptr;
#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 38
auto pPipelineInfo = reinterpret_cast<const GraphicsPipelineBuildInfo *>(m_pContext->GetPipelineBuildInfo());
pAppCache = pPipelineInfo->pShaderCache;
auto pPipelineInfo = reinterpret_cast<const GraphicsPipelineBuildInfo *>(m_context->getPipelineBuildInfo());
appCache = pPipelineInfo->pShaderCache;
#endif
if (stageMask & shaderStageToMask(ShaderStageFragment)) {
m_fragmentCacheEntryState = m_compiler->lookUpShaderCaches(appCache, &fragmentHash, &m_fragmentElf,
Expand Down Expand Up @@ -1115,7 +1115,7 @@ unsigned GraphicsShaderCacheChecker::check(const Module *module, unsigned stageM
void GraphicsShaderCacheChecker::updateRootUserDateOffset(ElfPackage *pipelineElf) {
ElfWriter<Elf64> writer(m_context->getGfxIpVersion());
// Load ELF binary
auto result = writer.ReadFromBuffer(pipelineElf->data(), pipelineElf->size());
auto result = writer.readFromBuffer(pipelineElf->data(), pipelineElf->size());
assert(result == Result::Success);
(void(result)); // unused
writer.updateElfBinary(m_context, pipelineElf);
Expand Down Expand Up @@ -1171,7 +1171,7 @@ void GraphicsShaderCacheChecker::updateAndMerge(Result result, ElfPackage *outpu

// Merge and store the result in pPipelineElf
ElfWriter<Elf64> writer(m_context->getGfxIpVersion());
auto result = writer.ReadFromBuffer(nonFragmentElf.pCode, nonFragmentElf.codeSize);
auto result = writer.readFromBuffer(nonFragmentElf.pCode, nonFragmentElf.codeSize);
assert(result == Result::Success);
(void(result)); // unused
writer.mergeElfBinary(m_context, &fragmentElf, outputPipelineElf);
Expand Down Expand Up @@ -1243,14 +1243,15 @@ Result Compiler::BuildGraphicsPipeline(const GraphicsPipelineBuildInfo *pipeline
const PipelineShaderInfo *shaderInfo[ShaderStageGfxCount] = {
&pipelineInfo->vs, &pipelineInfo->tcs, &pipelineInfo->tes, &pipelineInfo->gs, &pipelineInfo->fs,
};
bool buildingRelocatableElf = canUseRelocatableGraphicsShaderElf(shaderInfo);

for (unsigned i = 0; i < ShaderStageGfxCount && result == Result::Success; ++i)
result = validatePipelineShaderInfo(shaderInfo[i]);

MetroHash::Hash cacheHash = {};
MetroHash::Hash pipelineHash = {};
cacheHash = PipelineDumper::generateHashForGraphicsPipeline(pipelineInfo, true);
pipelineHash = PipelineDumper::generateHashForGraphicsPipeline(pipelineInfo, false);
cacheHash = PipelineDumper::generateHashForGraphicsPipeline(pipelineInfo, true, buildingRelocatableElf);
pipelineHash = PipelineDumper::generateHashForGraphicsPipeline(pipelineInfo, false, buildingRelocatableElf);

if (result == Result::Success && EnableOuts()) {
LLPC_OUTS("===============================================================================\n");
Expand All @@ -1277,10 +1278,9 @@ Result Compiler::BuildGraphicsPipeline(const GraphicsPipelineBuildInfo *pipeline
}

ShaderEntryState cacheEntryState = ShaderEntryState::New;
bool buildingRelocatableElf = canUseRelocatableGraphicsShaderElf(shaderInfo);
IShaderCache *appCache = nullptr;
#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 38
pAppCache = pPipelineInfo->pShaderCache;
appCache = pipelineInfo->pShaderCache;
#endif
ShaderCache *shaderCache = nullptr;
CacheEntryHandle hEntry = nullptr;
Expand Down Expand Up @@ -1371,8 +1371,8 @@ Result Compiler::BuildComputePipeline(const ComputePipelineBuildInfo *pipelineIn

MetroHash::Hash cacheHash = {};
MetroHash::Hash pipelineHash = {};
cacheHash = PipelineDumper::generateHashForComputePipeline(pipelineInfo, true);
pipelineHash = PipelineDumper::generateHashForComputePipeline(pipelineInfo, false);
cacheHash = PipelineDumper::generateHashForComputePipeline(pipelineInfo, true, buildingRelocatableElf);
pipelineHash = PipelineDumper::generateHashForComputePipeline(pipelineInfo, false, buildingRelocatableElf);

if (result == Result::Success && EnableOuts()) {
const ShaderModuleData *moduleData = reinterpret_cast<const ShaderModuleData *>(pipelineInfo->cs.pModuleData);
Expand All @@ -1397,7 +1397,7 @@ Result Compiler::BuildComputePipeline(const ComputePipelineBuildInfo *pipelineIn
ShaderEntryState cacheEntryState = ShaderEntryState::New;
IShaderCache *appCache = nullptr;
#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 38
pAppCache = pPipelineInfo->pShaderCache;
appCache = pipelineInfo->pShaderCache;
#endif
ShaderCache *shaderCache = nullptr;
CacheEntryHandle hEntry = nullptr;
Expand Down Expand Up @@ -1537,7 +1537,7 @@ Result Compiler::CreateShaderCache(const ShaderCacheCreateInfo *pCreateInfo, //
ShaderCache *pShaderCache = new ShaderCache();

if (pShaderCache != nullptr) {
result = pShaderCache->Init(pCreateInfo, &auxCreateInfo);
result = pShaderCache->init(pCreateInfo, &auxCreateInfo);
if (result != Result::Success) {
pShaderCache->Destroy();
pShaderCache = nullptr;
Expand Down Expand Up @@ -1716,7 +1716,7 @@ void Compiler::buildShaderCacheHash(Context *context, unsigned stageMask, ArrayR
MetroHash64 hasher;

// Update common shader info
PipelineDumper::updateHashForPipelineShaderInfo(stage, shaderInfo, true, &hasher);
PipelineDumper::updateHashForPipelineShaderInfo(stage, shaderInfo, true, &hasher, false);
hasher.Update(pipelineInfo->iaState.deviceIndex);

// Update input/output usage (provided by middle-end caller of this callback).
Expand Down Expand Up @@ -1774,14 +1774,14 @@ void Compiler::linkRelocatableShaderElf(ElfPackage *shaderElfs, ElfPackage *pipe
ElfReader<Elf64> fsReader(m_gfxIp);
if (!shaderElfs[ShaderStageVertex].empty()) {
size_t codeSize = shaderElfs[ShaderStageVertex].size_in_bytes();
result = vsReader.ReadFromBuffer(shaderElfs[ShaderStageVertex].data(), &codeSize);
result = vsReader.readFromBuffer(shaderElfs[ShaderStageVertex].data(), &codeSize);
if (result != Result::Success)
return;
}

if (!shaderElfs[ShaderStageFragment].empty()) {
size_t codeSize = shaderElfs[ShaderStageFragment].size_in_bytes();
result = fsReader.ReadFromBuffer(shaderElfs[ShaderStageFragment].data(), &codeSize);
result = fsReader.readFromBuffer(shaderElfs[ShaderStageFragment].data(), &codeSize);
if (result != Result::Success)
return;
}
Expand All @@ -1790,7 +1790,7 @@ void Compiler::linkRelocatableShaderElf(ElfPackage *shaderElfs, ElfPackage *pipe
} else {
ElfReader<Elf64> csReader(m_gfxIp);
size_t codeSize = shaderElfs[ShaderStageCompute].size_in_bytes();
result = csReader.ReadFromBuffer(shaderElfs[ShaderStageCompute].data(), &codeSize);
result = csReader.readFromBuffer(shaderElfs[ShaderStageCompute].data(), &codeSize);
if (result != Result::Success)
return;
result = writer.linkComputeRelocatableElf(csReader, context);
Expand Down
2 changes: 1 addition & 1 deletion llpc/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ project(shadertest

if(DEFINED XGL_LLVM_SRC_PATH)
# This is a build where LLPC lit testing is integrated into AMDVLK cmake files.
set(AMDLLPC_TEST_DEPS amdllpc spvgen FileCheck count not)
set(AMDLLPC_TEST_DEPS amdllpc spvgen FileCheck llvm-objdump count not)
set(LLVM_DIR ${XGL_LLVM_SRC_PATH})
endif()

Expand Down
2 changes: 1 addition & 1 deletion llpc/test/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@

tool_dirs = [config.llvm_tools_dir, config.amdllpc_dir]

tools = ['amdllpc']
tools = ['amdllpc', 'llvm-objdump']

llvm_config.add_tool_substitutions(tools, tool_dirs)

Loading

0 comments on commit 7bc698d

Please sign in to comment.