Skip to content

Commit a642872

Browse files
committed
[GISel] Support llvm.memcpy.inline
Differential revision: https://reviews.llvm.org/D105072
1 parent 2eb7bbb commit a642872

File tree

20 files changed

+613
-23
lines changed

20 files changed

+613
-23
lines changed

llvm/docs/GlobalISel/GenericOpcode.rst

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,36 @@ G_FENCE
715715

716716
I couldn't find any documentation on this at the time of writing.
717717

718+
G_MEMCPY
719+
^^^^^^^^
720+
721+
Generic memcpy. Expects two MachineMemOperands covering the store and load
722+
respectively, in addition to explicit operands.
723+
724+
G_MEMCPY_INLINE
725+
^^^^^^^^^^^^^^^
726+
727+
Generic inlined memcpy. Like G_MEMCPY, but it is guaranteed that this version
728+
will not be lowered as a call to an external function. Currently the size
729+
operand is required to evaluate as a constant (not an immediate), though that is
730+
expected to change when llvm.memcpy.inline is taught to support dynamic sizes.
731+
732+
G_MEMMOVE
733+
^^^^^^^^^
734+
735+
Generic memmove. Similar to G_MEMCPY, but the source and destination memory
736+
ranges are allowed to overlap.
737+
738+
G_MEMSET
739+
^^^^^^^^
740+
741+
Generic memset. Expects a MachineMemOperand in addition to explicit operands.
742+
743+
G_BZERO
744+
^^^^^^^
745+
746+
Generic bzero. Expects a MachineMemOperand in addition to explicit operands.
747+
718748
Control Flow
719749
------------
720750

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -532,16 +532,25 @@ class CombinerHelper {
532532
/// combine functions. Returns true if changed.
533533
bool tryCombine(MachineInstr &MI);
534534

535+
/// Emit loads and stores that perform the given memcpy.
536+
/// Assumes \p MI is a G_MEMCPY_INLINE
537+
/// TODO: implement dynamically sized inline memcpy,
538+
/// and rename: s/bool tryEmit/void emit/
539+
bool tryEmitMemcpyInline(MachineInstr &MI);
540+
535541
private:
536542
// Memcpy family optimization helpers.
543+
bool tryEmitMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
544+
uint64_t KnownLen, Align DstAlign, Align SrcAlign,
545+
bool IsVolatile);
537546
bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src,
538-
unsigned KnownLen, Align DstAlign, Align SrcAlign,
539-
bool IsVolatile);
547+
uint64_t KnownLen, uint64_t Limit, Align DstAlign,
548+
Align SrcAlign, bool IsVolatile);
540549
bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src,
541-
unsigned KnownLen, Align DstAlign, Align SrcAlign,
550+
uint64_t KnownLen, Align DstAlign, Align SrcAlign,
542551
bool IsVolatile);
543552
bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
544-
unsigned KnownLen, Align DstAlign, bool IsVolatile);
553+
uint64_t KnownLen, Align DstAlign, bool IsVolatile);
545554

546555
/// Given a non-indexed load or store instruction \p MI, find an offset that
547556
/// can be usefully and legally folded into it as a post-indexing operation.

llvm/include/llvm/Support/TargetOpcodes.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,9 @@ HANDLE_TARGET_OPCODE(G_WRITE_REGISTER)
739739
/// llvm.memcpy intrinsic
740740
HANDLE_TARGET_OPCODE(G_MEMCPY)
741741

742+
/// llvm.memcpy.inline intrinsic
743+
HANDLE_TARGET_OPCODE(G_MEMCPY_INLINE)
744+
742745
/// llvm.memmove intrinsic
743746
HANDLE_TARGET_OPCODE(G_MEMMOVE)
744747

llvm/include/llvm/Target/GenericOpcodes.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,6 +1353,14 @@ def G_MEMCPY : GenericInstruction {
13531353
let mayStore = true;
13541354
}
13551355

1356+
def G_MEMCPY_INLINE : GenericInstruction {
1357+
let OutOperandList = (outs);
1358+
let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size);
1359+
let hasSideEffects = false;
1360+
let mayLoad = true;
1361+
let mayStore = true;
1362+
}
1363+
13561364
def G_MEMMOVE : GenericInstruction {
13571365
let OutOperandList = (outs);
13581366
let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall);

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 64 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,7 +1218,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
12181218
}
12191219

12201220
bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
1221-
Register Val, unsigned KnownLen,
1221+
Register Val, uint64_t KnownLen,
12221222
Align Alignment, bool IsVolatile) {
12231223
auto &MF = *MI.getParent()->getParent();
12241224
const auto &TLI = *MF.getSubtarget().getTargetLowering();
@@ -1330,10 +1330,51 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
13301330
return true;
13311331
}
13321332

1333+
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
1334+
assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
1335+
1336+
Register Dst = MI.getOperand(0).getReg();
1337+
Register Src = MI.getOperand(1).getReg();
1338+
Register Len = MI.getOperand(2).getReg();
1339+
1340+
const auto *MMOIt = MI.memoperands_begin();
1341+
const MachineMemOperand *MemOp = *MMOIt;
1342+
bool IsVolatile = MemOp->isVolatile();
1343+
1344+
// See if this is a constant length copy
1345+
auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1346+
// FIXME: support dynamically sized G_MEMCPY_INLINE
1347+
assert(LenVRegAndVal.hasValue() &&
1348+
"inline memcpy with dynamic size is not yet supported");
1349+
uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
1350+
if (KnownLen == 0) {
1351+
MI.eraseFromParent();
1352+
return true;
1353+
}
1354+
1355+
const auto &DstMMO = **MI.memoperands_begin();
1356+
const auto &SrcMMO = **std::next(MI.memoperands_begin());
1357+
Align DstAlign = DstMMO.getBaseAlign();
1358+
Align SrcAlign = SrcMMO.getBaseAlign();
1359+
1360+
return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
1361+
IsVolatile);
1362+
}
1363+
1364+
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst,
1365+
Register Src, uint64_t KnownLen,
1366+
Align DstAlign, Align SrcAlign,
1367+
bool IsVolatile) {
1368+
assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
1369+
return optimizeMemcpy(MI, Dst, Src, KnownLen,
1370+
std::numeric_limits<uint64_t>::max(), DstAlign,
1371+
SrcAlign, IsVolatile);
1372+
}
1373+
13331374
bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1334-
Register Src, unsigned KnownLen,
1335-
Align DstAlign, Align SrcAlign,
1336-
bool IsVolatile) {
1375+
Register Src, uint64_t KnownLen,
1376+
uint64_t Limit, Align DstAlign,
1377+
Align SrcAlign, bool IsVolatile) {
13371378
auto &MF = *MI.getParent()->getParent();
13381379
const auto &TLI = *MF.getSubtarget().getTargetLowering();
13391380
auto &DL = MF.getDataLayout();
@@ -1343,7 +1384,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
13431384

13441385
bool DstAlignCanChange = false;
13451386
MachineFrameInfo &MFI = MF.getFrameInfo();
1346-
bool OptSize = shouldLowerMemFuncForSize(MF);
13471387
Align Alignment = commonAlignment(DstAlign, SrcAlign);
13481388

13491389
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
@@ -1354,7 +1394,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
13541394
// FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
13551395
// if the memcpy is in a tail call position.
13561396

1357-
unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
13581397
std::vector<LLT> MemOps;
13591398

13601399
const auto &DstMMO = **MI.memoperands_begin();
@@ -1437,7 +1476,7 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
14371476
}
14381477

14391478
bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1440-
Register Src, unsigned KnownLen,
1479+
Register Src, uint64_t KnownLen,
14411480
Align DstAlign, Align SrcAlign,
14421481
bool IsVolatile) {
14431482
auto &MF = *MI.getParent()->getParent();
@@ -1550,10 +1589,6 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
15501589

15511590
auto MMOIt = MI.memoperands_begin();
15521591
const MachineMemOperand *MemOp = *MMOIt;
1553-
bool IsVolatile = MemOp->isVolatile();
1554-
// Don't try to optimize volatile.
1555-
if (IsVolatile)
1556-
return false;
15571592

15581593
Align DstAlign = MemOp->getBaseAlign();
15591594
Align SrcAlign;
@@ -1571,18 +1606,33 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
15711606
auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
15721607
if (!LenVRegAndVal)
15731608
return false; // Leave it to the legalizer to lower it to a libcall.
1574-
unsigned KnownLen = LenVRegAndVal->Value.getZExtValue();
1609+
uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
15751610

15761611
if (KnownLen == 0) {
15771612
MI.eraseFromParent();
15781613
return true;
15791614
}
15801615

1616+
bool IsVolatile = MemOp->isVolatile();
1617+
if (Opc == TargetOpcode::G_MEMCPY_INLINE)
1618+
return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
1619+
IsVolatile);
1620+
1621+
// Don't try to optimize volatile.
1622+
if (IsVolatile)
1623+
return false;
1624+
15811625
if (MaxLen && KnownLen > MaxLen)
15821626
return false;
15831627

1584-
if (Opc == TargetOpcode::G_MEMCPY)
1585-
return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1628+
if (Opc == TargetOpcode::G_MEMCPY) {
1629+
auto &MF = *MI.getParent()->getParent();
1630+
const auto &TLI = *MF.getSubtarget().getTargetLowering();
1631+
bool OptSize = shouldLowerMemFuncForSize(MF);
1632+
uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1633+
return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
1634+
IsVolatile);
1635+
}
15861636
if (Opc == TargetOpcode::G_MEMMOVE)
15871637
return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
15881638
if (Opc == TargetOpcode::G_MEMSET)

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,6 +1589,9 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
15891589
if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
15901590
DstAlign = MCI->getDestAlign().valueOrOne();
15911591
SrcAlign = MCI->getSourceAlign().valueOrOne();
1592+
} else if (auto *MCI = dyn_cast<MemCpyInlineInst>(&CI)) {
1593+
DstAlign = MCI->getDestAlign().valueOrOne();
1594+
SrcAlign = MCI->getSourceAlign().valueOrOne();
15921595
} else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
15931596
DstAlign = MMI->getDestAlign().valueOrOne();
15941597
SrcAlign = MMI->getSourceAlign().valueOrOne();
@@ -1597,10 +1600,12 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
15971600
DstAlign = MSI->getDestAlign().valueOrOne();
15981601
}
15991602

1600-
// We need to propagate the tail call flag from the IR inst as an argument.
1601-
// Otherwise, we have to pessimize and assume later that we cannot tail call
1602-
// any memory intrinsics.
1603-
ICall.addImm(CI.isTailCall() ? 1 : 0);
1603+
if (Opcode != TargetOpcode::G_MEMCPY_INLINE) {
1604+
// We need to propagate the tail call flag from the IR inst as an argument.
1605+
// Otherwise, we have to pessimize and assume later that we cannot tail call
1606+
// any memory intrinsics.
1607+
ICall.addImm(CI.isTailCall() ? 1 : 0);
1608+
}
16041609

16051610
// Create mem operands to store the alignment and volatile info.
16061611
auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
@@ -2033,6 +2038,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
20332038
getOrCreateVReg(*CI.getArgOperand(0)),
20342039
MachineInstr::copyFlagsFromInstruction(CI));
20352040
return true;
2041+
case Intrinsic::memcpy_inline:
2042+
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
20362043
case Intrinsic::memcpy:
20372044
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
20382045
case Intrinsic::memmove:

llvm/lib/CodeGen/MachineVerifier.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1477,6 +1477,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
14771477
}
14781478
break;
14791479
}
1480+
case TargetOpcode::G_MEMCPY_INLINE:
14801481
case TargetOpcode::G_MEMCPY:
14811482
case TargetOpcode::G_MEMMOVE: {
14821483
ArrayRef<MachineMemOperand *> MMOs = MI->memoperands();
@@ -1507,6 +1508,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
15071508
if (SrcPtrTy.getAddressSpace() != MMOs[1]->getAddrSpace())
15081509
report("inconsistent load address space", MI);
15091510

1511+
if (Opc != TargetOpcode::G_MEMCPY_INLINE)
1512+
if (!MI->getOperand(3).isImm() || (MI->getOperand(3).getImm() & ~1LL))
1513+
report("'tail' flag (operand 3) must be an immediate 0 or 1", MI);
1514+
15101515
break;
15111516
}
15121517
case TargetOpcode::G_BZERO:
@@ -1532,6 +1537,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
15321537
if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
15331538
report("inconsistent " + Twine(Name, " address space"), MI);
15341539

1540+
if (!MI->getOperand(MI->getNumOperands() - 1).isImm() ||
1541+
(MI->getOperand(MI->getNumOperands() - 1).getImm() & ~1LL))
1542+
report("'tail' flag (last operand) must be an immediate 0 or 1", MI);
1543+
15351544
break;
15361545
}
15371546
case TargetOpcode::G_VECREDUCE_SEQ_FADD:

llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ bool AArch64O0PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
8585
return Helper.tryCombineConcatVectors(MI);
8686
case TargetOpcode::G_SHUFFLE_VECTOR:
8787
return Helper.tryCombineShuffleVector(MI);
88+
case TargetOpcode::G_MEMCPY_INLINE:
89+
return Helper.tryEmitMemcpyInline(MI);
8890
case TargetOpcode::G_MEMCPY:
8991
case TargetOpcode::G_MEMMOVE:
9092
case TargetOpcode::G_MEMSET: {

llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,8 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
272272
return Helper.tryCombineConcatVectors(MI);
273273
case TargetOpcode::G_SHUFFLE_VECTOR:
274274
return Helper.tryCombineShuffleVector(MI);
275+
case TargetOpcode::G_MEMCPY_INLINE:
276+
return Helper.tryEmitMemcpyInline(MI);
275277
case TargetOpcode::G_MEMCPY:
276278
case TargetOpcode::G_MEMMOVE:
277279
case TargetOpcode::G_MEMSET: {

llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,8 @@ bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
205205
return true;
206206

207207
switch (MI.getOpcode()) {
208+
case TargetOpcode::G_MEMCPY_INLINE:
209+
return Helper.tryEmitMemcpyInline(MI);
208210
case TargetOpcode::G_CONCAT_VECTORS:
209211
return Helper.tryCombineConcatVectors(MI);
210212
case TargetOpcode::G_SHUFFLE_VECTOR:

0 commit comments

Comments
 (0)