Skip to content

Commit 23f657c

Browse files
author
Jessica Paquette
committed
[AArch64][GlobalISel] Emit bzero on Darwin
Darwin platforms for both AArch64 and X86 can provide optimized `bzero()` routines. In this case, it may be preferable to use `bzero` in place of a memset of 0. This adds a G_BZERO generic opcode, similar to G_MEMSET et al. This opcode can be generated by platforms which may want to use bzero. To emit the G_BZERO, this adds a pre-legalize combine for AArch64. The conditions for this are largely a port of the bzero case in `AArch64SelectionDAGInfo::EmitTargetCodeForMemset`. The only difference in comparison to the SelectionDAG code is that, when compiling for minsize, this will fire for all memsets of 0. The original code notes that it's not beneficial to do this for small memsets; however, using bzero here will save a mov from wzr. For minsize, I think that it's preferable to prioritise omitting the mov. This also fixes a bug in the libcall legalization code which would delete instructions which could not be legalized. It also adds a check to make sure that we actually get a libcall name. Code size improvements (Darwin): - CTMark -Os: -0.0% geomean (-0.1% on pairlocalalign) - CTMark -Oz: -0.2% geomean (-0.5% on bullet) Differential Revision: https://reviews.llvm.org/D99358
1 parent 11bf268 commit 23f657c

File tree

11 files changed

+323
-9
lines changed

11 files changed

+323
-9
lines changed

llvm/include/llvm/Support/TargetOpcodes.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,7 @@ HANDLE_TARGET_OPCODE(G_MEMMOVE)
731731

732732
/// llvm.memset intrinsic
733733
HANDLE_TARGET_OPCODE(G_MEMSET)
734+
HANDLE_TARGET_OPCODE(G_BZERO)
734735

735736
/// Vector reductions
736737
HANDLE_TARGET_OPCODE(G_VECREDUCE_SEQ_FADD)

llvm/include/llvm/Target/GenericOpcodes.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1354,6 +1354,13 @@ def G_MEMSET : GenericInstruction {
13541354
let mayStore = true;
13551355
}
13561356

1357+
def G_BZERO : GenericInstruction {
1358+
let OutOperandList = (outs);
1359+
let InOperandList = (ins ptype0:$dst_addr, type1:$size, untyped_imm_0:$tailcall);
1360+
let hasSideEffects = false;
1361+
let mayStore = true;
1362+
}
1363+
13571364
//------------------------------------------------------------------------------
13581365
// Bitfield extraction.
13591366
//------------------------------------------------------------------------------

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,11 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
582582
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
583583
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
584584
RTLIB::Libcall RTLibcall;
585-
switch (MI.getOpcode()) {
585+
unsigned Opc = MI.getOpcode();
586+
switch (Opc) {
587+
case TargetOpcode::G_BZERO:
588+
RTLibcall = RTLIB::BZERO;
589+
break;
586590
case TargetOpcode::G_MEMCPY:
587591
RTLibcall = RTLIB::MEMCPY;
588592
break;
@@ -597,6 +601,13 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
597601
}
598602
const char *Name = TLI.getLibcallName(RTLibcall);
599603

604+
// Unsupported libcall on the target.
605+
if (!Name) {
606+
LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
607+
<< MIRBuilder.getTII().getName(Opc) << "\n");
608+
return LegalizerHelper::UnableToLegalize;
609+
}
610+
600611
CallLowering::CallLoweringInfo Info;
601612
Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
602613
Info.Callee = MachineOperand::CreateES(Name);
@@ -748,10 +759,14 @@ LegalizerHelper::libcall(MachineInstr &MI) {
748759
return Status;
749760
break;
750761
}
762+
case TargetOpcode::G_BZERO:
751763
case TargetOpcode::G_MEMCPY:
752764
case TargetOpcode::G_MEMMOVE:
753765
case TargetOpcode::G_MEMSET: {
754-
LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI);
766+
LegalizeResult Result =
767+
createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI);
768+
if (Result != Legalized)
769+
return Result;
755770
MI.eraseFromParent();
756771
return Result;
757772
}

llvm/lib/CodeGen/MachineVerifier.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,26 +1509,28 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
15091509

15101510
break;
15111511
}
1512+
case TargetOpcode::G_BZERO:
15121513
case TargetOpcode::G_MEMSET: {
15131514
ArrayRef<MachineMemOperand *> MMOs = MI->memoperands();
1515+
std::string Name = Opc == TargetOpcode::G_MEMSET ? "memset" : "bzero";
15141516
if (MMOs.size() != 1) {
1515-
report("memset must have 1 memory operand", MI);
1517+
report(Twine(Name, " must have 1 memory operand"), MI);
15161518
break;
15171519
}
15181520

15191521
if ((!MMOs[0]->isStore() || MMOs[0]->isLoad())) {
1520-
report("memset memory operand must be a store", MI);
1522+
report(Twine(Name, " memory operand must be a store"), MI);
15211523
break;
15221524
}
15231525

15241526
LLT DstPtrTy = MRI->getType(MI->getOperand(0).getReg());
15251527
if (!DstPtrTy.isPointer()) {
1526-
report("memset operand must be a pointer", MI);
1528+
report(Twine(Name, " operand must be a pointer"), MI);
15271529
break;
15281530
}
15291531

15301532
if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
1531-
report("inconsistent memset address space", MI);
1533+
report("inconsistent " + Twine(Name, " address space"), MI);
15321534

15331535
break;
15341536
}

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
682682

683683
getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
684684

685-
getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
685+
getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
686+
.libcall();
686687

687688
getActionDefinitionsBuilder(G_ABS).lowerIf(
688689
[=](const LegalityQuery &Query) { return Query.Types[0].isScalar(); });

llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,46 @@ static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
217217
return true;
218218
}
219219

220+
/// Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is
221+
/// supported and beneficial to do so.
222+
///
223+
/// \note This only applies on Darwin.
224+
///
225+
/// \returns true if \p MI was replaced with a G_BZERO.
226+
static bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
227+
bool MinSize) {
228+
assert(MI.getOpcode() == TargetOpcode::G_MEMSET);
229+
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
230+
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
231+
if (!TLI.getLibcallName(RTLIB::BZERO))
232+
return false;
233+
auto Zero = getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI);
234+
if (!Zero || Zero->Value.getSExtValue() != 0)
235+
return false;
236+
237+
// It's not faster to use bzero rather than memset for sizes <= 256.
238+
// However, it *does* save us a mov from wzr, so if we're going for
239+
// minsize, use bzero even if it's slower.
240+
if (!MinSize) {
241+
// If the size is known, check it. If it is not known, assume using bzero is
242+
// better.
243+
if (auto Size =
244+
getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
245+
if (Size->Value.getSExtValue() <= 256)
246+
return false;
247+
}
248+
}
249+
250+
MIRBuilder.setInstrAndDebugLoc(MI);
251+
MIRBuilder
252+
.buildInstr(TargetOpcode::G_BZERO, {},
253+
{MI.getOperand(0), MI.getOperand(2)})
254+
.addImm(MI.getOperand(3).getImm())
255+
.addMemOperand(*MI.memoperands_begin());
256+
MI.eraseFromParent();
257+
return true;
258+
}
259+
220260
class AArch64PreLegalizerCombinerHelperState {
221261
protected:
222262
CombinerHelper &Helper;
@@ -263,7 +303,8 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
263303
if (Generated.tryCombineAll(Observer, MI, B))
264304
return true;
265305

266-
switch (MI.getOpcode()) {
306+
unsigned Opc = MI.getOpcode();
307+
switch (Opc) {
267308
case TargetOpcode::G_CONCAT_VECTORS:
268309
return Helper.tryCombineConcatVectors(MI);
269310
case TargetOpcode::G_SHUFFLE_VECTOR:
@@ -275,7 +316,11 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
275316
// heuristics decide.
276317
unsigned MaxLen = EnableOpt ? 0 : 32;
277318
// Try to inline memcpy type calls if optimizations are enabled.
278-
return !EnableMinSize ? Helper.tryCombineMemCpyFamily(MI, MaxLen) : false;
319+
if (!EnableMinSize && Helper.tryCombineMemCpyFamily(MI, MaxLen))
320+
return true;
321+
if (Opc == TargetOpcode::G_MEMSET)
322+
return tryEmitBZero(MI, B, EnableMinSize);
323+
return false;
279324
}
280325
}
281326

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# RUN: not llc -mtriple=aarch64 -global-isel-abort=1 -run-pass=legalizer -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s
2+
# RUN: not llc -mtriple=aarch64-linux-gnu -global-isel-abort=1 -run-pass=legalizer -verify-machineinstrs %s -o /dev/null 2>&1 | FileCheck %s
3+
...
4+
---
5+
name: bzero
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
# CHECK: LLVM ERROR: unable to legalize instruction: G_BZERO
10+
liveins: $x0, $x1
11+
%ptr:_(p0) = COPY $x0
12+
%width:_(s64) = COPY $x1
13+
G_BZERO %ptr(p0), %width(s64), 0 :: (store 4)
14+
RET_ReallyLR
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64-apple-ios -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
# Check that we can legalize G_BZERO on Darwin.
5+
6+
...
7+
---
8+
name: bzero
9+
tracksRegLiveness: true
10+
body: |
11+
bb.0:
12+
liveins: $x0, $x1
13+
; CHECK-LABEL: name: bzero
14+
; CHECK: liveins: $x0, $x1
15+
; CHECK: %ptr:_(p0) = COPY $x0
16+
; CHECK: %width:_(s64) = COPY $x1
17+
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
18+
; CHECK: $x0 = COPY %ptr(p0)
19+
; CHECK: $x1 = COPY %width(s64)
20+
; CHECK: BL &bzero, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1
21+
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
22+
; CHECK: RET_ReallyLR
23+
%ptr:_(p0) = COPY $x0
24+
%width:_(s64) = COPY $x1
25+
G_BZERO %ptr(p0), %width(s64), 0 :: (store 4)
26+
RET_ReallyLR
27+
28+
...
29+
---
30+
name: bzero_tail_call
31+
tracksRegLiveness: true
32+
body: |
33+
bb.0:
34+
liveins: $x0, $x1
35+
; CHECK-LABEL: name: bzero_tail_call
36+
; CHECK: liveins: $x0, $x1
37+
; CHECK: %ptr:_(p0) = COPY $x0
38+
; CHECK: %width:_(s64) = COPY $x1
39+
; CHECK: $x0 = COPY %ptr(p0)
40+
; CHECK: $x1 = COPY %width(s64)
41+
; CHECK: TCRETURNdi &bzero, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x0, implicit $x1
42+
%ptr:_(p0) = COPY $x0
43+
%width:_(s64) = COPY $x1
44+
G_BZERO %ptr(p0), %width(s64), 1 :: (store 4)
45+
RET_ReallyLR

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,7 @@
619619
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
620620
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
621621
# DEBUG-NEXT: G_MEMCPY (opcode {{[0-9]+}}): 3 type indices, 1 imm index
622+
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
622623
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
623624
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
624625
# DEBUG-NEXT: G_MEMMOVE (opcode {{[0-9]+}}): 3 type indices, 1 imm index
@@ -629,6 +630,9 @@
629630
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
630631
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
631632
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
633+
# DEBUG-NEXT: G_BZERO (opcode {{[0-9]+}}): 2 type indices, 1 imm index
634+
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
635+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
632636
# DEBUG-NEXT: G_VECREDUCE_SEQ_FADD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
633637
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
634638
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined

0 commit comments

Comments
 (0)