-
Notifications
You must be signed in to change notification settings - Fork 14.1k
AMDGPU: Fix tracking subreg defs when folding through reg_sequence #140608
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
AMDGPU: Fix tracking subreg defs when folding through reg_sequence #140608
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesWe weren't fully respecting the type of a def of an immediate vs. Fixes #139317 Patch is 34.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140608.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 9bbc8e75fd31b..eb7fb94e25f5c 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -25,52 +25,151 @@ using namespace llvm;
namespace {
-struct FoldCandidate {
- MachineInstr *UseMI;
+/// Track a value we may want to fold into downstream users, applying
+/// subregister extracts along the way.
+struct FoldableDef {
union {
- MachineOperand *OpToFold;
+ MachineOperand *OpToFold = nullptr;
uint64_t ImmToFold;
int FrameIndexToFold;
};
- int ShrinkOpcode;
- unsigned UseOpNo;
+
+ /// Register class of the originally defined value.
+ const TargetRegisterClass *DefRC = nullptr;
+
+ /// Track the original defining instruction for the value.
+ const MachineInstr *DefMI = nullptr;
+
+ /// Subregister to apply to the value at the use point.
+ unsigned DefSubReg = AMDGPU::NoSubRegister;
+
+ /// Kind of value stored in the union.
MachineOperand::MachineOperandType Kind;
- bool Commuted;
- FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
- bool Commuted_ = false,
- int ShrinkOp = -1) :
- UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
- Kind(FoldOp->getType()),
- Commuted(Commuted_) {
- if (FoldOp->isImm()) {
- ImmToFold = FoldOp->getImm();
- } else if (FoldOp->isFI()) {
- FrameIndexToFold = FoldOp->getIndex();
+ FoldableDef() = delete;
+ FoldableDef(MachineOperand &FoldOp, const TargetRegisterClass *DefRC,
+ unsigned DefSubReg = AMDGPU::NoSubRegister)
+ : DefRC(DefRC), DefSubReg(DefSubReg), Kind(FoldOp.getType()) {
+
+ if (FoldOp.isImm()) {
+ ImmToFold = FoldOp.getImm();
+ } else if (FoldOp.isFI()) {
+ FrameIndexToFold = FoldOp.getIndex();
} else {
- assert(FoldOp->isReg() || FoldOp->isGlobal());
- OpToFold = FoldOp;
+ assert(FoldOp.isReg() || FoldOp.isGlobal());
+ OpToFold = &FoldOp;
}
+
+ DefMI = FoldOp.getParent();
}
- FoldCandidate(MachineInstr *MI, unsigned OpNo, int64_t FoldImm,
- bool Commuted_ = false, int ShrinkOp = -1)
- : UseMI(MI), ImmToFold(FoldImm), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
- Kind(MachineOperand::MO_Immediate), Commuted(Commuted_) {}
+ FoldableDef(int64_t FoldImm, const TargetRegisterClass *DefRC,
+ unsigned DefSubReg = AMDGPU::NoSubRegister)
+ : ImmToFold(FoldImm), DefRC(DefRC), DefSubReg(DefSubReg),
+ Kind(MachineOperand::MO_Immediate) {}
+
+ /// Copy the current def and apply \p SubReg to the value.
+ FoldableDef getWithSubReg(const SIRegisterInfo &TRI, unsigned SubReg) const {
+ FoldableDef Copy(*this);
+ Copy.DefSubReg = TRI.composeSubRegIndices(DefSubReg, SubReg);
+ return Copy;
+ }
+
+ bool isReg() const { return Kind == MachineOperand::MO_Register; }
+
+ Register getReg() const {
+ assert(isReg());
+ return OpToFold->getReg();
+ }
+
+ unsigned getSubReg() const {
+ assert(isReg());
+ return OpToFold->getSubReg();
+ }
+
+ bool isImm() const { return Kind == MachineOperand::MO_Immediate; }
bool isFI() const {
return Kind == MachineOperand::MO_FrameIndex;
}
- bool isImm() const {
- return Kind == MachineOperand::MO_Immediate;
+ int getFI() const {
+ assert(isFI());
+ return FrameIndexToFold;
}
- bool isReg() const {
- return Kind == MachineOperand::MO_Register;
+ bool isGlobal() const { return OpToFold->isGlobal(); }
+
+ /// Return the effective immediate value defined by this instruction, after
+ /// application of any subregister extracts which may exist between the use
+ /// and def instruction.
+ std::optional<int64_t> getEffectiveImmVal() const {
+ assert(isImm());
+ return SIInstrInfo::extractSubregFromImm(ImmToFold, DefSubReg);
}
- bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; }
+ /// Check if it is legal to fold this effective value into \p MI's \p OpNo
+ /// operand.
+ bool isOperandLegal(const SIInstrInfo &TII, const MachineInstr &MI,
+ unsigned OpIdx) const {
+ switch (Kind) {
+ case MachineOperand::MO_Immediate: {
+ std::optional<int64_t> ImmToFold = getEffectiveImmVal();
+ if (!ImmToFold)
+ return false;
+
+ // TODO: Should verify the subregister index is supported by the class
+ // TODO: Avoid the temporary MachineOperand
+ MachineOperand TmpOp = MachineOperand::CreateImm(*ImmToFold);
+ return TII.isOperandLegal(MI, OpIdx, &TmpOp);
+ }
+ case MachineOperand::MO_FrameIndex: {
+ if (DefSubReg != AMDGPU::NoSubRegister)
+ return false;
+ MachineOperand TmpOp = MachineOperand::CreateFI(FrameIndexToFold);
+ return TII.isOperandLegal(MI, OpIdx, &TmpOp);
+ }
+ default:
+ // TODO: Try to apply DefSubReg, for global address we can extract
+ // low/high.
+ if (DefSubReg != AMDGPU::NoSubRegister)
+ return false;
+ return TII.isOperandLegal(MI, OpIdx, OpToFold);
+ }
+
+ llvm_unreachable("covered MachineOperand kind switch");
+ }
+};
+
+struct FoldCandidate {
+ MachineInstr *UseMI;
+ FoldableDef Def;
+ int ShrinkOpcode;
+ unsigned UseOpNo;
+ bool Commuted;
+
+ FoldCandidate(MachineInstr *MI, unsigned OpNo, FoldableDef Def,
+ bool Commuted_ = false, int ShrinkOp = -1)
+ : UseMI(MI), Def(Def), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
+ Commuted(Commuted_) {}
+
+ bool isFI() const { return Def.Kind == MachineOperand::MO_FrameIndex; }
+
+ int getFI() const {
+ assert(isFI());
+ return Def.FrameIndexToFold;
+ }
+
+ bool isImm() const { return Def.isImm(); }
+
+ bool isReg() const { return Def.isReg(); }
+
+ Register getReg() const {
+ assert(isReg());
+ return Def.OpToFold->getReg();
+ }
+
+ bool isGlobal() const { return Def.Kind == MachineOperand::MO_GlobalAddress; }
bool needsShrink() const { return ShrinkOpcode != -1; }
};
@@ -84,7 +183,7 @@ class SIFoldOperandsImpl {
const SIMachineFunctionInfo *MFI;
bool frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
- const MachineOperand &OpToFold) const;
+ const FoldableDef &OpToFold) const;
// TODO: Just use TII::getVALUOp
unsigned convertToVALUOp(unsigned Opc, bool UseVOP3 = false) const {
@@ -112,11 +211,11 @@ class SIFoldOperandsImpl {
bool canUseImmWithOpSel(FoldCandidate &Fold) const;
- bool tryFoldImmWithOpSel(FoldCandidate &Fold) const;
+ bool tryFoldImmWithOpSel(FoldCandidate &Fold, int64_t ImmVal) const;
bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
- MachineOperand *OpToFold) const;
+ const FoldableDef &OpToFold) const;
bool isUseSafeToFold(const MachineInstr &MI,
const MachineOperand &UseMO) const;
@@ -135,12 +234,10 @@ class SIFoldOperandsImpl {
MachineOperand *SplatVal,
const TargetRegisterClass *SplatRC) const;
- bool tryToFoldACImm(MachineOperand &OpToFold, MachineInstr *UseMI,
+ bool tryToFoldACImm(const FoldableDef &OpToFold, MachineInstr *UseMI,
unsigned UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList) const;
- void foldOperand(MachineOperand &OpToFold,
- MachineInstr *UseMI,
- int UseOpIdx,
+ void foldOperand(FoldableDef OpToFold, MachineInstr *UseMI, int UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList,
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
@@ -148,7 +245,7 @@ class SIFoldOperandsImpl {
bool tryConstantFoldOp(MachineInstr *MI) const;
bool tryFoldCndMask(MachineInstr &MI) const;
bool tryFoldZeroHighBits(MachineInstr &MI) const;
- bool foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
+ bool foldInstOperand(MachineInstr &MI, const FoldableDef &OpToFold) const;
bool foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const;
bool tryFoldFoldableCopy(MachineInstr &MI,
@@ -240,8 +337,8 @@ static unsigned macToMad(unsigned Opc) {
// TODO: Add heuristic that the frame index might not fit in the addressing mode
// immediate offset to avoid materializing in loops.
-bool SIFoldOperandsImpl::frameIndexMayFold(
- const MachineInstr &UseMI, int OpNo, const MachineOperand &OpToFold) const {
+bool SIFoldOperandsImpl::frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
+ const FoldableDef &OpToFold) const {
if (!OpToFold.isFI())
return false;
@@ -381,7 +478,8 @@ bool SIFoldOperandsImpl::canUseImmWithOpSel(FoldCandidate &Fold) const {
return true;
}
-bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
+bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold,
+ int64_t ImmVal) const {
MachineInstr *MI = Fold.UseMI;
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
unsigned Opcode = MI->getOpcode();
@@ -391,8 +489,8 @@ bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
// If the literal can be inlined as-is, apply it and short-circuit the
// tests below. The main motivation for this is to avoid unintuitive
// uses of opsel.
- if (AMDGPU::isInlinableLiteralV216(Fold.ImmToFold, OpType)) {
- Old.ChangeToImmediate(Fold.ImmToFold);
+ if (AMDGPU::isInlinableLiteralV216(ImmVal, OpType)) {
+ Old.ChangeToImmediate(ImmVal);
return true;
}
@@ -415,10 +513,10 @@ bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
MachineOperand &Mod = MI->getOperand(ModIdx);
unsigned ModVal = Mod.getImm();
- uint16_t ImmLo = static_cast<uint16_t>(
- Fold.ImmToFold >> (ModVal & SISrcMods::OP_SEL_0 ? 16 : 0));
- uint16_t ImmHi = static_cast<uint16_t>(
- Fold.ImmToFold >> (ModVal & SISrcMods::OP_SEL_1 ? 16 : 0));
+ uint16_t ImmLo =
+ static_cast<uint16_t>(ImmVal >> (ModVal & SISrcMods::OP_SEL_0 ? 16 : 0));
+ uint16_t ImmHi =
+ static_cast<uint16_t>(ImmVal >> (ModVal & SISrcMods::OP_SEL_1 ? 16 : 0));
uint32_t Imm = (static_cast<uint32_t>(ImmHi) << 16) | ImmLo;
unsigned NewModVal = ModVal & ~(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1);
@@ -510,16 +608,20 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
assert(Old.isReg());
if (Fold.isImm() && canUseImmWithOpSel(Fold)) {
- if (tryFoldImmWithOpSel(Fold))
+ std::optional<int64_t> ImmVal = Fold.Def.getEffectiveImmVal();
+ if (!ImmVal)
+ return false;
+
+ if (tryFoldImmWithOpSel(Fold, *ImmVal))
return true;
// We can't represent the candidate as an inline constant. Try as a literal
// with the original opsel, checking constant bus limitations.
- MachineOperand New = MachineOperand::CreateImm(Fold.ImmToFold);
+ MachineOperand New = MachineOperand::CreateImm(*ImmVal);
int OpNo = MI->getOperandNo(&Old);
if (!TII->isOperandLegal(*MI, OpNo, &New))
return false;
- Old.ChangeToImmediate(Fold.ImmToFold);
+ Old.ChangeToImmediate(*ImmVal);
return true;
}
@@ -575,22 +677,34 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
MI->setDesc(TII->get(NewMFMAOpc));
MI->untieRegOperand(0);
}
- Old.ChangeToImmediate(Fold.ImmToFold);
+
+ std::optional<int64_t> ImmVal = Fold.Def.getEffectiveImmVal();
+ if (!ImmVal)
+ return false;
+
+ // TODO: Should we try to avoid adding this to the candidate list?
+ MachineOperand New = MachineOperand::CreateImm(*ImmVal);
+ int OpNo = MI->getOperandNo(&Old);
+ if (!TII->isOperandLegal(*MI, OpNo, &New))
+ return false;
+
+ Old.ChangeToImmediate(*ImmVal);
return true;
}
if (Fold.isGlobal()) {
- Old.ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
- Fold.OpToFold->getTargetFlags());
+ Old.ChangeToGA(Fold.Def.OpToFold->getGlobal(),
+ Fold.Def.OpToFold->getOffset(),
+ Fold.Def.OpToFold->getTargetFlags());
return true;
}
if (Fold.isFI()) {
- Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
+ Old.ChangeToFrameIndex(Fold.getFI());
return true;
}
- MachineOperand *New = Fold.OpToFold;
+ MachineOperand *New = Fold.Def.OpToFold;
// Rework once the VS_16 register class is updated to include proper
// 16-bit SGPRs instead of 32-bit ones.
if (Old.getSubReg() == AMDGPU::lo16 && TRI->isSGPRReg(*MRI, New->getReg()))
@@ -613,26 +727,19 @@ static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
- MachineOperand *FoldOp, bool Commuted = false,
- int ShrinkOp = -1) {
- appendFoldCandidate(FoldList,
- FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
-}
-
-static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
- MachineInstr *MI, unsigned OpNo, int64_t ImmVal,
+ const FoldableDef &FoldOp,
bool Commuted = false, int ShrinkOp = -1) {
appendFoldCandidate(FoldList,
- FoldCandidate(MI, OpNo, ImmVal, Commuted, ShrinkOp));
+ FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
}
bool SIFoldOperandsImpl::tryAddToFoldList(
SmallVectorImpl<FoldCandidate> &FoldList, MachineInstr *MI, unsigned OpNo,
- MachineOperand *OpToFold) const {
+ const FoldableDef &OpToFold) const {
const unsigned Opc = MI->getOpcode();
auto tryToFoldAsFMAAKorMK = [&]() {
- if (!OpToFold->isImm())
+ if (!OpToFold.isImm())
return false;
const bool TryAK = OpNo == 3;
@@ -665,8 +772,8 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
return false;
};
- bool IsLegal = TII->isOperandLegal(*MI, OpNo, OpToFold);
- if (!IsLegal && OpToFold->isImm()) {
+ bool IsLegal = OpToFold.isOperandLegal(*TII, *MI, OpNo);
+ if (!IsLegal && OpToFold.isImm()) {
FoldCandidate Fold(MI, OpNo, OpToFold);
IsLegal = canUseImmWithOpSel(Fold);
}
@@ -700,7 +807,7 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
}
// Special case for s_setreg_b32
- if (OpToFold->isImm()) {
+ if (OpToFold.isImm()) {
unsigned ImmOpc = 0;
if (Opc == AMDGPU::S_SETREG_B32)
ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
@@ -741,10 +848,10 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
return false;
int Op32 = -1;
- if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
+ if (!OpToFold.isOperandLegal(*TII, *MI, CommuteOpNo)) {
if ((Opc != AMDGPU::V_ADD_CO_U32_e64 && Opc != AMDGPU::V_SUB_CO_U32_e64 &&
Opc != AMDGPU::V_SUBREV_CO_U32_e64) || // FIXME
- (!OpToFold->isImm() && !OpToFold->isFI() && !OpToFold->isGlobal())) {
+ (!OpToFold.isImm() && !OpToFold.isFI() && !OpToFold.isGlobal())) {
TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo);
return false;
}
@@ -763,7 +870,8 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
}
- appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
+ appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, /*Commuted=*/true,
+ Op32);
return true;
}
@@ -938,7 +1046,7 @@ MachineOperand *SIFoldOperandsImpl::tryFoldRegSeqSplat(
}
bool SIFoldOperandsImpl::tryToFoldACImm(
- MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
+ const FoldableDef &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList) const {
const MCInstrDesc &Desc = UseMI->getDesc();
if (UseOpIdx >= Desc.getNumOperands())
@@ -949,27 +1057,9 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
return false;
MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
- if (OpToFold.isImm()) {
- if (unsigned UseSubReg = UseOp.getSubReg()) {
- std::optional<int64_t> SubImm =
- SIInstrInfo::extractSubregFromImm(OpToFold.getImm(), UseSubReg);
- if (!SubImm)
- return false;
-
- // TODO: Avoid the temporary MachineOperand
- MachineOperand TmpOp = MachineOperand::CreateImm(*SubImm);
- if (TII->isOperandLegal(*UseMI, UseOpIdx, &TmpOp)) {
- appendFoldCandidate(FoldList, UseMI, UseOpIdx, *SubImm);
- return true;
- }
-
- return false;
- }
-
- if (TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
- appendFoldCandidate(FoldList, UseMI, UseOpIdx, &OpToFold);
- return true;
- }
+ if (OpToFold.isImm() && OpToFold.isOperandLegal(*TII, *UseMI, UseOpIdx)) {
+ appendFoldCandidate(FoldList, UseMI, UseOpIdx, OpToFold);
+ return true;
}
// TODO: Verify the following code handles subregisters correctly.
@@ -985,11 +1075,17 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
return false;
// Maybe it is just a COPY of an immediate itself.
+
+ // FIXME: Remove this handling. There is already special case folding of
+ // immediate into copy in foldOperand. This is looking for the def of the
+ // value the folding started from in the first place.
MachineInstr *Def = MRI->getVRegDef(UseReg);
if (Def && TII->isFoldableCopy(*Def)) {
MachineOperand &DefOp = Def->getOperand(1);
if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
- appendFoldCandidate(FoldList, UseMI, UseOpIdx, &DefOp);
+ FoldableDef FoldableImm(DefOp.getImm(), OpToFold.DefRC,
+ OpToFold.DefSubReg);
+ appendFoldCandidate(FoldList, UseMI, UseOpIdx, FoldableImm);
return true;
}
}
@@ -998,7 +1094,7 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
}
void SIFoldOperandsImpl::foldOperand(
- MachineOperand &OpToFold, MachineInstr *UseMI, int UseOpIdx,
+ FoldableDef OpToFold, MachineInstr *UseMI, int UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList,
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
const MachineOperand *UseOp = &UseMI->getOperand(UseOpIdx);
@@ -1038,18 +1134,20 @@ void SIFoldOperandsImpl::foldOperand(
if (SplatVal) {
if (MachineOperand *Foldable =
tryFoldRegSeqSplat(RSUseMI, OpNo, SplatVal, SplatRC)) {
- appendFoldCandidate(FoldList, RSUseMI, OpNo, Foldable);
+ FoldableDef SplatDef(*Foldable, SplatRC);
+ appendFoldCandidate(FoldList, RSUseMI, OpNo, SplatDef);
continue;
}
}
+ // TODO: Handle general compose
if (RSUse->getSubReg() != RegSeqDstSubReg)
continue;
- if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI, OpNo, FoldList))
- continue;
-
- foldOperand(OpToFold, RSUseMI, OpNo, FoldList, CopiesToReplace);
+ // FIXME: We should avoid recursing here. There should be a cleaner split
+ // between the in-place mutations and adding to the fold list.
+ foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(RSUse), FoldList,
+ CopiesToReplace);
}
return;
@@ -1077,7 +1175,7 @@ void SIFoldOperandsImpl::foldOperand(
// A frame index will resolve to a positive constant, so it should always be
// safe to fold the addressing mode, even pre-GFX9.
- UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
+ UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getFI());
const unsigned Opc = UseMI->getOpcode();
if (TII->isFLATScratch(*UseMI) &&
@@ -1107,11 +1205,12 @@ void SIFoldOperandsImpl::foldOperand(
return;
const TargetRegisterClass *DestRC = TRI->getRegClassForReg(*MRI, DestReg);
- if (!DestReg.isPhysical()) {
- if (DestRC == &AMDGPU::AGPR_32RegClass &&
- TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
+ if (!DestReg.isPhysical() && DestRC == &AMDGPU::AGPR_32RegClass) {
+ std::optional<int64_t> UseImmVal = OpToFold.getEffectiveImmVal();
+ if (UseImmVal && TII->isInlineConstant(
+ ...
[truncated]
|
4021541
to
ad698bf
Compare
045021f
to
4ec9c56
Compare
ping |
ad698bf
to
8a6cb7b
Compare
4ec9c56
to
6e7ab22
Compare
8a6cb7b
to
999939f
Compare
6e7ab22
to
1a8e2fb
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The idea seems good. I haven't reviewed it all in detail.
999939f
to
6deba1d
Compare
83141b8
to
686562f
Compare
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ping
ping |
We weren't fully respecting the type of a def of an immediate vs. the type at the use point. Refactor the folding logic to track the value to fold, as well as a subregister to apply to the underlying value. This is similar to how PeepholeOpt tracks subregisters (though only for pure copy-like instructions, no constants). Fixes #139317
686562f
to
7aab391
Compare
We weren't fully respecting the type of a def of an immediate vs.
the type at the use point. Refactor the folding logic to track the
value to fold, as well as a subregister to apply to the underlying
value. This is similar to how PeepholeOpt tracks subregisters (though
only for pure copy-like instructions, no constants).
Fixes #139317