Skip to content

Commit

Permalink
Pack input/output on GS pipeline
Browse files Browse the repository at this point in the history
Support packing on the pipeline that includes geometry shader. The
mainly changes are to implement the packing:
-{VS/TES} output -> GS input: use GS input to build the location info
map to fill GS outputLocInfoMap and copy to the previsou stage
inputLocInfoMap. This process is similar to VS-TCS.
-Gs output ->Copy shader output: this is a new process due to transform
feedback export. First, scalarize xfb outputs to build the
`locInfoXfbOutInfoMap` in InOutBuilder to corresponding to the key of GS
outputLocInfoMap which is built by scalarized generic output exports.
Secondly, collect the byte sizes of the mapped location to load output
value from LDS at the mapped location. Lastly, the obtained output value
at the mapped location is the packed result, so we can create export
generice call directly; For xfb export, we need an extract-insert
process to re-build the original xfb output value with respect to the
xfbOutInfo.
  • Loading branch information
xuechen417 authored and JaxLinAMD committed Jan 11, 2021
1 parent 97ab8bc commit b1bc7a3
Show file tree
Hide file tree
Showing 8 changed files with 393 additions and 236 deletions.
47 changes: 36 additions & 11 deletions lgc/builder/InOutBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -568,23 +568,48 @@ Instruction *InOutBuilder::CreateWriteXfbOutput(Value *valueToWrite, bool isBuil

if (m_shaderStage == ShaderStageGeometry) {
// Mark the XFB output for copy shader generation.
InOutLocationInfo outLocationInfo;
outLocationInfo.setLocation(location);
outLocationInfo.setBuiltIn(isBuiltIn);
outLocationInfo.setStreamId(streamId);

XfbOutInfo xfbOutInfo = {};
xfbOutInfo.xfbBuffer = xfbBuffer;
xfbOutInfo.xfbOffset = cast<ConstantInt>(xfbOffset)->getZExtValue();
xfbOutInfo.is16bit = valueToWrite->getType()->getScalarSizeInBits() == 16;
xfbOutInfo.xfbExtraOffset = 0;

auto resUsage = getPipelineState()->getShaderResourceUsage(ShaderStageGeometry);
resUsage->inOutUsage.gs.xfbOutsInfo[outLocationInfo] = xfbOutInfo.u32All;
if (valueToWrite->getType()->getPrimitiveSizeInBits() > 128) {
outLocationInfo.setLocation(location + 1);
xfbOutInfo.xfbOffset += 32;
resUsage->inOutUsage.gs.xfbOutsInfo[outLocationInfo] = xfbOutInfo.u32All;
// For packed generic GS output, the XFB output should be scalarized to align with the scalarized GS output
if (getPipelineState()->canPackInOut() && !isBuiltIn) {
Type *elementTy = valueToWrite->getType();
unsigned scalarizeBy = 1;
if (auto vectorTy = dyn_cast<FixedVectorType>(elementTy)) {
scalarizeBy = vectorTy->getNumElements();
elementTy = vectorTy->getElementType();
}
if (elementTy->getPrimitiveSizeInBits() == 64)
scalarizeBy *= 2;
unsigned xfbOffset = xfbOutInfo.xfbOffset;
for (unsigned i = 0; i < scalarizeBy; ++i) {
InOutLocationInfo outLocInfo;
outLocInfo.setLocation(location);
outLocInfo.setStreamId(streamId);
outLocInfo.setComponent(i);
outLocInfo.setBuiltIn(isBuiltIn);
if (i >= 4) {
outLocInfo.setLocation(location + 1);
outLocInfo.setComponent(i - 4);
xfbOutInfo.xfbOffset = xfbOffset + 16;
}
resUsage->inOutUsage.gs.locInfoXfbOutInfoMap[outLocInfo] = xfbOutInfo;
}
} else {
InOutLocationInfo outLocInfo;
outLocInfo.setLocation(location);
outLocInfo.setBuiltIn(isBuiltIn);
outLocInfo.setStreamId(streamId);
resUsage->inOutUsage.gs.locInfoXfbOutInfoMap[outLocInfo] = xfbOutInfo;

if (valueToWrite->getType()->getPrimitiveSizeInBits() > 128) {
outLocInfo.setLocation(location + 1);
xfbOutInfo.xfbOffset += 32;
resUsage->inOutUsage.gs.locInfoXfbOutInfoMap[outLocInfo] = xfbOutInfo;
}
}
}

Expand Down
4 changes: 2 additions & 2 deletions lgc/include/lgc/state/ResourceUsage.h
Original file line number Diff line number Diff line change
Expand Up @@ -371,8 +371,8 @@ struct ResourceUsage {
// <location, <component, byteSize>>
std::unordered_map<unsigned, std::vector<unsigned>> genericOutByteSizes[MaxGsStreams];

// Map from output location to the transform feedback info
std::map<InOutLocationInfo, unsigned> xfbOutsInfo;
// Map from output location info to the transform feedback info
std::map<InOutLocationInfo, XfbOutInfo> locInfoXfbOutInfoMap;

// ID of the vertex stream sent to rasterizor
unsigned rasterStream = 0;
Expand Down
257 changes: 153 additions & 104 deletions lgc/patch/PatchCopyShader.cpp

Large diffs are not rendered by default.

38 changes: 15 additions & 23 deletions lgc/patch/PatchInOutImportExport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -555,8 +555,9 @@ void PatchInOutImportExport::visitCallInst(CallInst &callInst) {
}
} else {
if (m_pipelineState->canPackInOut() &&
(m_shaderStage == ShaderStageFragment || m_shaderStage == ShaderStageTessControl)) {
// The new InOutLocationInfo is used to map scalarized FS and TCS input import as compact as possible
(m_shaderStage == ShaderStageFragment || m_shaderStage == ShaderStageTessControl ||
m_shaderStage == ShaderStageGeometry)) {
// The inputLocInfoMap of {TCS, GS, FS} maps original InOutLocationInfo to tightly compact InOutLocationInfo
const bool isTcs = m_shaderStage == ShaderStageTessControl;
const uint32_t elemIdxArgIdx = (isInterpolantInputImport || isTcs) ? 2 : 1;
bool hasDynIndex = false;
Expand Down Expand Up @@ -622,8 +623,11 @@ void PatchInOutImportExport::visitCallInst(CallInst &callInst) {
}
case ShaderStageGeometry: {
assert(callInst.getNumArgOperands() == 3);
if (!elemIdx)
elemIdx = cast<ConstantInt>(callInst.getOperand(1));

const unsigned compIdx = cast<ConstantInt>(callInst.getOperand(1))->getZExtValue();
const unsigned compIdx = cast<ConstantInt>(elemIdx)->getZExtValue();
assert(isDontCareValue(elemIdx) == false);

Value *vertexIdx = callInst.getOperand(2);
assert(isDontCareValue(vertexIdx) == false);
Expand Down Expand Up @@ -853,20 +857,14 @@ void PatchInOutImportExport::visitCallInst(CallInst &callInst) {
} else if (m_shaderStage == ShaderStageCopyShader) {
exist = true;
loc = value;
} else if (m_shaderStage == ShaderStageGeometry) {
assert(callInst.getNumArgOperands() == 4);

origLocInfo.setStreamId(cast<ConstantInt>(callInst.getOperand(2))->getZExtValue());
locInfoMapIt = resUsage->inOutUsage.outputLocInfoMap.find(origLocInfo);
if (locInfoMapIt != resUsage->inOutUsage.outputLocInfoMap.end()) {
exist = true;
loc = locInfoMapIt->second.getLocation();
}
} else {
if (m_pipelineState->canPackInOut()) {
const bool isVs = m_shaderStage == ShaderStageVertex;
assert(isVs || m_shaderStage == ShaderStageTessEval);
const bool isVs = (m_shaderStage == ShaderStageVertex);
const bool isGs = (m_shaderStage == ShaderStageGeometry);
assert(isVs || isGs || m_shaderStage == ShaderStageTessEval);
origLocInfo.setComponent(cast<ConstantInt>(callInst.getOperand(1))->getZExtValue());
if (isGs)
origLocInfo.setStreamId(cast<ConstantInt>(callInst.getOperand(2))->getZExtValue());
locInfoMapIt = resUsage->inOutUsage.outputLocInfoMap.find(origLocInfo);
bool relateDynIndex = false;
const bool checkDynIndex = (isVs && m_pipelineState->hasShaderStage(ShaderStageTessControl));
Expand Down Expand Up @@ -924,9 +922,10 @@ void PatchInOutImportExport::visitCallInst(CallInst &callInst) {
}
case ShaderStageGeometry: {
assert(callInst.getNumArgOperands() == 4);
const unsigned compIdx = cast<ConstantInt>(callInst.getOperand(1))->getZExtValue();
if (elemIdx == InvalidValue)
elemIdx = cast<ConstantInt>(callInst.getOperand(1))->getZExtValue();
const unsigned streamId = cast<ConstantInt>(callInst.getOperand(2))->getZExtValue();
patchGsGenericOutputExport(output, loc, compIdx, streamId, &callInst);
patchGsGenericOutputExport(output, loc, elemIdx, streamId, &callInst);
break;
}
case ShaderStageFragment: {
Expand Down Expand Up @@ -1891,13 +1890,6 @@ void PatchInOutImportExport::patchGsGenericOutputExport(Value *output, unsigned

assert(compIdx <= 4);

// Field "genericOutByteSizes" now gets set when generating the copy shader. Just assert that we agree on the
// byteSize.
auto &genericOutByteSizes =
m_pipelineState->getShaderResourceUsage(ShaderStageGeometry)->inOutUsage.gs.genericOutByteSizes;
assert(genericOutByteSizes[streamId][location][compIdx] == byteSize);
(void(genericOutByteSizes)); // unused

storeValueToGsVsRing(output, location, compIdx, streamId, insertPos);
}

Expand Down
Loading

0 comments on commit b1bc7a3

Please sign in to comment.