Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement part-pipeline scheme #1704

Merged
merged 15 commits into from
Apr 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 10 additions & 2 deletions include/vkgcDefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
#define LLPC_INTERFACE_MAJOR_VERSION 53

/// LLPC minor interface version.
#define LLPC_INTERFACE_MINOR_VERSION 0
#define LLPC_INTERFACE_MINOR_VERSION 1

#ifndef LLPC_CLIENT_INTERFACE_MAJOR_VERSION
#if VFX_INSIDE_SPVGEN
Expand Down Expand Up @@ -86,7 +86,8 @@
// %Version History
// | %Version | Change Description |
// | -------- | ----------------------------------------------------------------------------------------------------- |
// | 53.0 | Add optimizationLevel to PipelineOptions
// | 53.1 | Add PartPipelineStage enum for part-pipeline mode |
// | 53.0 | Add optimizationLevel to PipelineOptions |
// | 52.3 | Add fastMathFlags to PipelineShaderOptions |
// | 52.2 | Add provokingVertexMode to rsState |
// | 52.1 | Add pageMigrationEnabled to PipelineOptions |
Expand Down Expand Up @@ -276,6 +277,13 @@ enum class ResourceMappingNodeType : unsigned {
Count, ///< Count of resource mapping node types.
};

/// Enumerates part-pipeline stages of compilation.
piotrAMD marked this conversation as resolved.
Show resolved Hide resolved
enum PartPipelineStage : unsigned {
PartPipelineStageFragment, ///< Fragment stage
PartPipelineStagePreRasterization, ///< Pre-rasterization stage
PartPipelineStageCount ///< Count of part-pipeline stages
};

/// Represents one node in a graph defining how the user data bound in a command buffer at draw/dispatch time maps to
/// resources referenced by a shader (t#, u#, etc.).
struct ResourceMappingNode {
Expand Down
9 changes: 9 additions & 0 deletions lgc/builder/Builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,15 @@ void Builder::setComputeShaderMode(const ComputeShaderMode &computeShaderMode) {
getShaderModes()->setComputeShaderMode(computeShaderMode);
}

// =====================================================================================================================
// Set subgroup size usage
//
// @param stage : Shader stage
// @param usage : Subgroup size usage
void Builder::setSubgroupSizeUsage(ShaderStage stage, bool usage) {
getShaderModes()->setSubgroupSizeUsage(stage, usage);
}

// =====================================================================================================================
// Get the compute shader mode (workgroup size)
const ComputeShaderMode &Builder::getComputeShaderMode() {
Expand Down
6 changes: 4 additions & 2 deletions lgc/builder/InOutBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -518,8 +518,10 @@ void InOutBuilder::markGenericInputOutputUsage(bool isOutput, unsigned location,
if (!isOutput || m_shaderStage != ShaderStageGeometry) {
bool keepAllLocations = false;
if (getPipelineState()->isUnlinked()) {
if (isOutput && m_pipelineState->getNextShaderStage(m_shaderStage, true) == ShaderStageFragment)
keepAllLocations = true;
if (isOutput && m_shaderStage != ShaderStageFragment) {
ShaderStage nextStage = m_pipelineState->getNextShaderStage(m_shaderStage);
keepAllLocations = nextStage == ShaderStageFragment || nextStage == ShaderStageInvalid;
}
if (m_shaderStage == ShaderStageFragment && !isOutput)
keepAllLocations = true;
}
Expand Down
7 changes: 6 additions & 1 deletion lgc/elfLinker/ElfLinker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,9 @@ StringRef ElfLinkerImpl::compileGlue(unsigned glueIndex) {
// @param [out] outStream : Stream to write linked ELF to
// @returns : True for success, false if something about the pipeline state stops linking
bool ElfLinkerImpl::link(raw_pwrite_stream &outStream) {
doneInputs();
// The call to doneInputs creates any needed glue shaders, but we only need to do it here for unlinked shaders.
if (m_pipelineState->isUnlinked())
doneInputs();
piotrAMD marked this conversation as resolved.
Show resolved Hide resolved

// Insert glue shaders (if any).
if (!insertGlueShaders())
Expand Down Expand Up @@ -449,6 +451,9 @@ bool ElfLinkerImpl::link(raw_pwrite_stream &outStream) {
for (auto &elfInput : m_elfInputs) {
for (object::SymbolRef symRef : elfInput.objectFile->symbols()) {
object::ELFSymbolRef elfSymRef(symRef);
StringRef name = cantFail(elfSymRef.getName());
if (name == "llvmir" && findSymbol(getStringIndex(name)) != 0)
continue;
piotrAMD marked this conversation as resolved.
Show resolved Hide resolved
if (elfSymRef.getBinding() == ELF::STB_GLOBAL) {
object::section_iterator containingSect = cantFail(elfSymRef.getSection());
if (containingSect != elfInput.objectFile->section_end()) {
Expand Down
21 changes: 21 additions & 0 deletions lgc/include/lgc/state/AbiMetadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ constexpr unsigned mmSPI_SHADER_Z_FORMAT = 0xA1C4;
constexpr unsigned mmCB_SHADER_MASK = 0xA08F;

// PS register numbers in PAL metadata
constexpr unsigned mmSPI_PS_INPUT_CNTL_0 = 0xA191; // NOLINT
constexpr unsigned mmSPI_PS_INPUT_ENA = 0xA1B3;
constexpr unsigned mmSPI_PS_INPUT_ADDR = 0xA1B4;
constexpr unsigned mmSPI_PS_IN_CONTROL = 0xA1B6;
Expand Down Expand Up @@ -366,4 +367,24 @@ union PA_SC_AA_CONFIG {
unsigned u32All;
};

union SPI_PS_INPUT_CNTL_0 {
struct {
unsigned OFFSET : 6; // NOLINT
unsigned : 2; // NOLINT
unsigned DEFAULT_VAL : 2; // NOLINT
unsigned FLAT_SHADE : 1; // NOLINT
unsigned : 6; // NOLINT
unsigned PT_SPRITE_TEX : 1; // NOLINT
unsigned DUP : 1; // NOLINT
unsigned FP16_INTERP_MODE : 1; // NOLINT
unsigned USE_DEFAULT_ATTR1 : 1; // NOLINT
unsigned DEFAULT_VAL_ATTR1 : 2; // NOLINT
unsigned PT_SPRITE_TEX_ATTR1 : 1; // NOLINT
unsigned ATTR0_VALID : 1; // NOLINT
unsigned ATTR1_VALID : 1; // NOLINT
unsigned : 6; // NOLINT
} bits, bitfields;
unsigned u32All;
};

} // namespace lgc
1 change: 1 addition & 0 deletions lgc/include/lgc/state/AbiUnlinked.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ static const char VertexInputs[] = ".vertexInputs";
static const char ColorExports[] = ".colorExports";
static const char FragInputMapping1[] = ".fragInputs";
static const char FragInputMapping2[] = ".fragBuiltInInputs";
static const char FragInputMapping3[] = ".fragBuiltInInputInfo";

} // namespace PipelineMetadataKey

Expand Down
14 changes: 13 additions & 1 deletion lgc/include/lgc/state/PalMetadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ struct FsInputMappings {
// For each built-in input that is implemented as a generic input passed from the previous shader stage,
// such as CullDistance and ClipDistance, the built-in id and the mapped location number.
llvm::SmallVector<std::pair<unsigned, unsigned>> builtInLocationInfo;
// Array sizes for ClipDistance and CullDistance.
unsigned clipDistanceCount;
unsigned cullDistanceCount;
};

// =====================================================================================================================
Expand Down Expand Up @@ -134,7 +137,7 @@ class PalMetadata {
// Get a register value in PAL metadata.
unsigned getRegister(unsigned regNum);

// Set a register value in PAL metadata. If the register is already set, this ORs in the value.
// Set a register value in PAL metadata. If the register has a value set already, it gets overwritten.
void setRegister(unsigned regNum, unsigned value);

// Store the vertex fetch in PAL metadata for a fetchless vertex shader with shader compilation.
Expand Down Expand Up @@ -196,6 +199,9 @@ class PalMetadata {
// Returns true if the fragment input info has an entry for a builtin.
bool fragmentShaderUsesMappedBuiltInInputs();

// Returns the location of the fragment builtin or InvalidValue if the builtin is not found.
unsigned getFragmentShaderBuiltInLoc(unsigned builtIn);

private:
// Initialize the PalMetadata object after reading in already-existing PAL metadata if any
void initialize();
Expand All @@ -218,6 +224,12 @@ class PalMetadata {
// Finalize PAL metadata user data limit for any compilation (shader, part-pipeline, whole pipeline)
void finalizeUserDataLimit();

// Finalize PAL register settings for pipeline, part-pipeline or shader compilation.
void finalizeRegisterSettings(bool isWholePipeline);

// Finalize SPI_PS_INPUT_CNTL_0_* register setting for pipeline or part-pipeline compilation.
void finalizeInputControlRegisterSetting();

// The maximum possible value for the spill threshold entry in the PAL metadata.
static constexpr uint64_t MAX_SPILL_THRESHOLD = UINT_MAX;

Expand Down
8 changes: 7 additions & 1 deletion lgc/include/lgc/state/PipelineState.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ class PipelineState final : public Pipeline {
// Set shader stage mask
void setShaderStageMask(unsigned mask) override final { m_stageMask = mask; }

// Set whether pre-rasterization part has a geometry shader
// NOTE: Only applicable in the part pipeline compilation mode.
void setPreRasterHasGs(bool preRasterHasGs) override final { m_preRasterHasGs = preRasterHasGs; }

// Set client name
void setClient(llvm::StringRef client) override final { m_client = client.str(); }

Expand Down Expand Up @@ -218,12 +222,13 @@ class PipelineState final : public Pipeline {

// Accessors for shader stage mask
unsigned getShaderStageMask() const { return m_stageMask; }
bool getPreRasterHasGs() const { return m_preRasterHasGs; }
bool hasShaderStage(ShaderStage stage) const { return (getShaderStageMask() >> stage) & 1; }
bool isGraphics() const;
bool isComputeLibrary() const { return m_computeLibrary; }
ShaderStage getLastVertexProcessingStage() const;
ShaderStage getPrevShaderStage(ShaderStage shaderStage) const;
ShaderStage getNextShaderStage(ShaderStage shaderStage, bool fakeFs = false) const;
ShaderStage getNextShaderStage(ShaderStage shaderStage) const;

// Get client name
const char *getClient() const { return m_client.c_str(); }
Expand Down Expand Up @@ -475,6 +480,7 @@ class PipelineState final : public Pipeline {
// Whether generating pipeline or unlinked part-pipeline
PipelineLink m_pipelineLink = PipelineLink::WholePipeline;
unsigned m_stageMask = 0; // Mask of active shader stages
bool m_preRasterHasGs = false; // Whether pre-rasterization part has a geometry shader
bool m_computeLibrary = false; // Whether pipeline is in fact a compute library
std::string m_client; // Client name for PAL metadata
Options m_options = {}; // Per-pipeline options
Expand Down
3 changes: 3 additions & 0 deletions lgc/include/lgc/state/ShaderModes.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ class ShaderModes {
// Get the compute shader mode (workgroup size)
const ComputeShaderMode &getComputeShaderMode();

// Set subgroup size usage
void setSubgroupSizeUsage(ShaderStage stage, bool usage);

// Clear all modes
void clear();

Expand Down
10 changes: 10 additions & 0 deletions lgc/include/lgc/state/ShaderStage.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,16 @@ constexpr unsigned shaderStageToMask(Stage theStage, Stages... otherStages) {
return (1U << static_cast<unsigned>(theStage)) | shaderStageToMask(otherStages...);
}

// Return true iff `stage` is present in the `stageMask`.
//
// @param stage : Shader stage to look for
// @param stageMask : Stage mask to check
// @returns : True iff `stageMask` contains `stage`
inline bool isShaderStageInMask(ShaderStage stage, unsigned stageMask) {
assert(stage != ShaderStageInvalid);
return (shaderStageToMask(stage) & stageMask) != 0;
}

// Set shader stage metadata on every defined function in a module
void setShaderStage(llvm::Module *module, ShaderStage stage);

Expand Down
3 changes: 3 additions & 0 deletions lgc/interface/lgc/Builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ class Builder : public BuilderCommon {
// add more fields. A local struct variable can be zero-initialized with " = {}".
void setComputeShaderMode(const ComputeShaderMode &computeShaderMode);

// Set subgroup size usage
void setSubgroupSizeUsage(ShaderStage stage, bool usage);

// Get the compute shader mode (workgroup size)
const ComputeShaderMode &getComputeShaderMode();

Expand Down
4 changes: 4 additions & 0 deletions lgc/interface/lgc/Pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,10 @@ class Pipeline {
// Set the shader stage mask
virtual void setShaderStageMask(unsigned mask) = 0;

// Set whether pre-rasterization part has a geometry shader
// NOTE: Only applicable in the part pipeline compilation mode.
piotrAMD marked this conversation as resolved.
Show resolved Hide resolved
virtual void setPreRasterHasGs(bool preRasterHasGs) = 0;

// Set client name
virtual void setClient(llvm::StringRef client) = 0;

Expand Down
14 changes: 3 additions & 11 deletions lgc/patch/Gfx6ConfigBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -914,20 +914,13 @@ template <typename T> void ConfigBuilder::buildPsRegConfig(ShaderStage shaderSta
SET_REG_FIELD(&config->psRegs, SPI_PS_IN_CONTROL, NUM_INTERP, resUsage->inOutUsage.fs.interpInfo.size());

unsigned pointCoordLoc = InvalidValue;
unsigned viewportIndexLoc = InvalidValue;

auto builtInInputLocMapIt = resUsage->inOutUsage.builtInInputLocMap.find(BuiltInPointCoord);
if (builtInInputLocMapIt != resUsage->inOutUsage.builtInInputLocMap.end()) {
// Get generic input corresponding to gl_PointCoord (to set the field PT_SPRITE_TEX)
pointCoordLoc = builtInInputLocMapIt->second;
}

builtInInputLocMapIt = resUsage->inOutUsage.builtInInputLocMap.find(BuiltInViewportIndex);
if (builtInInputLocMapIt != resUsage->inOutUsage.builtInInputLocMap.end()) {
// Get generic input corresponding to gl_ViewportIndex (to set the field OFFSET and FLAT_SHADE)
viewportIndexLoc = builtInInputLocMapIt->second;
}

// NOTE: PAL expects at least one mmSPI_PS_INPUT_CNTL_0 register set, so we always patch it at least one if none
// were identified in the shader.
const std::vector<FsInterpInfo> dummyInterpInfo{{0, false, false, false, false, false}};
Expand Down Expand Up @@ -967,12 +960,11 @@ template <typename T> void ConfigBuilder::buildPsRegConfig(ShaderStage shaderSta

// NOTE: Set the offset value to force hardware to select input defaults (no VS match).
spiPsInputCntl.bits.OFFSET = UseDefaultVal;
} else if (viewportIndexLoc == i && !usesViewportArrayIndex()) {
// NOTE: Use default value 0 for viewport array index if it is only used in FS (not set in other stages)
spiPsInputCntl.bits.OFFSET = UseDefaultVal;
spiPsInputCntl.bits.FLAT_SHADE = false;
}

// NOTE: Set SPI_PS_INPUT_CNTL_* here, but the register can still be changed later,
// when it becomes known that gl_ViewportIndex is not used and fields OFFSET and FLAT_SHADE
// can be amended.
appendConfig(mmSPI_PS_INPUT_CNTL_0 + i, spiPsInputCntl.u32All);
}

Expand Down
16 changes: 4 additions & 12 deletions lgc/patch/Gfx9ConfigBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1799,7 +1799,7 @@ template <typename T> void ConfigBuilder::buildPsRegConfig(ShaderStage shaderSta
SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, Z_EXPORT_ENABLE, builtInUsage.fragDepth);
SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, STENCIL_TEST_VAL_EXPORT_ENABLE, builtInUsage.fragStencilRef);
SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, MASK_EXPORT_ENABLE, builtInUsage.sampleMask);
SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, ALPHA_TO_MASK_DISABLE, 0); // Set during pipeline finalization.
SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, ALPHA_TO_MASK_DISABLE, 1); // Set during pipeline finalization.
SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, DEPTH_BEFORE_SHADER, fragmentMode.earlyFragmentTests);
SET_REG_FIELD(&config->psRegs, DB_SHADER_CONTROL, EXEC_ON_NOOP,
(fragmentMode.earlyFragmentTests && resUsage->resourceWrite));
Expand Down Expand Up @@ -1834,20 +1834,13 @@ template <typename T> void ConfigBuilder::buildPsRegConfig(ShaderStage shaderSta
setWaveFrontSize(Util::Abi::HardwareStage::Ps, waveFrontSize);

unsigned pointCoordLoc = InvalidValue;
unsigned viewportIndexLoc = InvalidValue;

auto builtInInputLocMapIt = resUsage->inOutUsage.builtInInputLocMap.find(BuiltInPointCoord);
if (builtInInputLocMapIt != resUsage->inOutUsage.builtInInputLocMap.end()) {
// Get generic input corresponding to gl_PointCoord (to set the field PT_SPRITE_TEX)
pointCoordLoc = builtInInputLocMapIt->second;
}

builtInInputLocMapIt = resUsage->inOutUsage.builtInInputLocMap.find(BuiltInViewportIndex);
if (builtInInputLocMapIt != resUsage->inOutUsage.builtInInputLocMap.end()) {
// Get generic input corresponding to gl_ViewportIndex (to set the field OFFSET and FLAT_SHADE)
viewportIndexLoc = builtInInputLocMapIt->second;
}

// NOTE: PAL expects at least one mmSPI_PS_INPUT_CNTL_0 register set, so we always patch it at least one if none
// were identified in the shader.
const std::vector<FsInterpInfo> dummyInterpInfo{{0, false, false, false, false, false}};
Expand Down Expand Up @@ -1886,12 +1879,11 @@ template <typename T> void ConfigBuilder::buildPsRegConfig(ShaderStage shaderSta

// NOTE: Set the offset value to force hardware to select input defaults (no VS match).
spiPsInputCntl.bits.OFFSET = UseDefaultVal;
} else if (viewportIndexLoc == i && !usesViewportArrayIndex()) {
// NOTE: Use default value 0 for viewport array index if it is only used in FS (not set in other stages)
spiPsInputCntl.bits.OFFSET = UseDefaultVal;
spiPsInputCntl.bits.FLAT_SHADE = false;
}

// NOTE: Set SPI_PS_INPUT_CNTL_* here, but the register can still be changed later,
// when it becomes known that gl_ViewportIndex is not used and fields OFFSET and FLAT_SHADE
// can be amended.
appendConfig(mmSPI_PS_INPUT_CNTL_0 + i, spiPsInputCntl.u32All);
}

Expand Down
8 changes: 6 additions & 2 deletions lgc/patch/PatchInOutImportExport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5151,10 +5151,10 @@ void PatchInOutImportExport::addExportInstForGenericOutput(Value *output, unsign
// @param insertPos : Where to insert the "exp" instruction
void PatchInOutImportExport::addExportInstForBuiltInOutput(Value *output, unsigned builtInId, Instruction *insertPos) {
// Check if the shader stage is valid to use "exp" instruction to export output
const auto nextStage = m_pipelineState->getNextShaderStage(m_shaderStage, true);
const auto nextStage = m_pipelineState->getNextShaderStage(m_shaderStage);
const bool useExpInst = ((m_shaderStage == ShaderStageVertex || m_shaderStage == ShaderStageTessEval ||
m_shaderStage == ShaderStageCopyShader) &&
(nextStage == ShaderStageFragment));
(nextStage == ShaderStageFragment || nextStage == ShaderStageInvalid));
assert(useExpInst);
(void(useExpInst)); // unused

Expand Down Expand Up @@ -5224,6 +5224,8 @@ void PatchInOutImportExport::addExportInstForBuiltInOutput(Value *output, unsign
const auto &nextBuiltInUsage = m_pipelineState->getShaderResourceUsage(ShaderStageFragment)->builtInUsage.fs;

hasLayerExport = nextBuiltInUsage.layer || nextBuiltInUsage.viewIndex;
} else if (nextStage == ShaderStageInvalid) {
hasLayerExport = false;
}

if (hasLayerExport) {
Expand Down Expand Up @@ -5268,6 +5270,8 @@ void PatchInOutImportExport::addExportInstForBuiltInOutput(Value *output, unsign
const auto &nextBuiltInUsage = m_pipelineState->getShaderResourceUsage(ShaderStageFragment)->builtInUsage.fs;

hasViewportIndexExport = nextBuiltInUsage.viewportIndex;
} else if (nextStage == ShaderStageInvalid) {
hasViewportIndexExport = false;
}

if (hasViewportIndexExport) {
Expand Down