Skip to content

Commit 1ee6ce9

Browse files
committed
GlobalISel: Allow forming atomic/volatile G_ZEXTLOAD
SelectionDAG has a target hook, getExtendForAtomicOps, which it uses in the computeKnownBits implementation for ATOMIC_LOAD. This is pretty ugly (as is having a separate load opcode for atomics), so instead allow making use of atomic zextload. Enable this for AArch64 since the DAG path defaults in to the zext behavior. The tablegen changes are pretty ugly, but partially helps migrate SelectionDAG from using ISD::ATOMIC_LOAD to regular ISD::LOAD with atomic memory operands. For now the DAG emitter will emit matchers for patterns which the DAG will not produce. I'm still a bit confused by the intent of the isLoad/isStore/isAtomic bits. The DAG implementation rejects trying to use any of these in combination. For now I've opted to make the isLoad checks also check isAtomic, although I think having isLoad and isAtomic set on these makes most sense.
1 parent 0d7161a commit 1ee6ce9

File tree

12 files changed

+351
-52
lines changed

12 files changed

+351
-52
lines changed

llvm/docs/GlobalISel/GenericOpcode.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,10 @@ Only G_LOAD is valid if the result is a vector type. If the result is larger
684684
than the memory size, the high elements are undefined (i.e. this is not a
685685
per-element, vector anyextload)
686686

687+
Unlike in SelectionDAG, atomic loads are expressed with the same
688+
opcodes as regular loads. G_LOAD, G_SEXTLOAD and G_ZEXTLOAD may all
689+
have atomic memory operands.
690+
687691
G_INDEXED_LOAD
688692
^^^^^^^^^^^^^^
689693

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ def : GINodeEquiv<G_STORE, st> { let CheckMMOIsNonAtomic = true; }
184184
def : GINodeEquiv<G_LOAD, atomic_load> {
185185
let CheckMMOIsNonAtomic = false;
186186
let CheckMMOIsAtomic = true;
187+
let IfSignExtend = G_SEXTLOAD;
188+
let IfZeroExtend = G_ZEXTLOAD;
187189
}
188190

189191
// Operands are swapped for atomic_store vs. regular store

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1627,18 +1627,34 @@ defm atomic_load_umax : binary_atomic_op<atomic_load_umax>;
16271627
defm atomic_store : binary_atomic_op<atomic_store>;
16281628
defm atomic_cmp_swap : ternary_atomic_op<atomic_cmp_swap>;
16291629

1630+
/// Atomic load which zeroes the excess high bits.
1631+
def atomic_load_zext :
1632+
PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> {
1633+
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
1634+
let IsZeroExtLoad = true;
1635+
}
1636+
1637+
/// Atomic load which sign extends the excess high bits.
1638+
def atomic_load_sext :
1639+
PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> {
1640+
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
1641+
let IsSignExtLoad = true;
1642+
}
1643+
16301644
def atomic_load_8 :
16311645
PatFrag<(ops node:$ptr),
16321646
(atomic_load node:$ptr)> {
16331647
let IsAtomic = true;
16341648
let MemoryVT = i8;
16351649
}
1650+
16361651
def atomic_load_16 :
16371652
PatFrag<(ops node:$ptr),
16381653
(atomic_load node:$ptr)> {
16391654
let IsAtomic = true;
16401655
let MemoryVT = i16;
16411656
}
1657+
16421658
def atomic_load_32 :
16431659
PatFrag<(ops node:$ptr),
16441660
(atomic_load node:$ptr)> {
@@ -1652,6 +1668,40 @@ def atomic_load_64 :
16521668
let MemoryVT = i64;
16531669
}
16541670

1671+
def atomic_load_zext_8 :
1672+
PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> {
1673+
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
1674+
let MemoryVT = i8;
1675+
}
1676+
1677+
def atomic_load_zext_16 :
1678+
PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> {
1679+
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
1680+
let MemoryVT = i16;
1681+
}
1682+
1683+
def atomic_load_sext_8 :
1684+
PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> {
1685+
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
1686+
let MemoryVT = i8;
1687+
}
1688+
1689+
def atomic_load_sext_16 :
1690+
PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> {
1691+
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
1692+
let MemoryVT = i16;
1693+
}
1694+
1695+
// Atomic load which zeroes or anyextends the high bits.
1696+
def atomic_load_az_8 : PatFrags<(ops node:$op),
1697+
[(atomic_load_8 node:$op),
1698+
(atomic_load_zext_8 node:$op)]>;
1699+
1700+
// Atomic load which zeroes or anyextends the high bits.
1701+
def atomic_load_az_16 : PatFrags<(ops node:$op),
1702+
[(atomic_load_16 node:$op),
1703+
(atomic_load_zext_16 node:$op)]>;
1704+
16551705
def nonext_masked_gather :
16561706
PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx),
16571707
(masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -698,13 +698,13 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
698698

699699
Register SrcReg = MI.getOperand(1).getReg();
700700
GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI);
701-
if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) ||
702-
!LoadMI->isSimple())
701+
if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
703702
return false;
704703

705704
Register LoadReg = LoadMI->getDstReg();
706-
LLT LoadTy = MRI.getType(LoadReg);
705+
LLT RegTy = MRI.getType(LoadReg);
707706
Register PtrReg = LoadMI->getPointerReg();
707+
unsigned RegSize = RegTy.getSizeInBits();
708708
uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
709709
unsigned MaskSizeBits = MaskVal.countTrailingOnes();
710710

@@ -715,7 +715,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
715715

716716
// If the mask covers the whole destination register, there's nothing to
717717
// extend
718-
if (MaskSizeBits >= LoadTy.getSizeInBits())
718+
if (MaskSizeBits >= RegSize)
719719
return false;
720720

721721
// Most targets cannot deal with loads of size < 8 and need to re-legalize to
@@ -725,17 +725,25 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
725725

726726
const MachineMemOperand &MMO = LoadMI->getMMO();
727727
LegalityQuery::MemDesc MemDesc(MMO);
728-
MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
728+
729+
// Don't modify the memory access size if this is atomic/volatile, but we can
730+
// still adjust the opcode to indicate the high bit behavior.
731+
if (LoadMI->isSimple())
732+
MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
733+
else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize)
734+
return false;
735+
729736
if (!isLegalOrBeforeLegalizer(
730-
{TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}}))
737+
{TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
731738
return false;
732739

733740
MatchInfo = [=](MachineIRBuilder &B) {
734741
B.setInstrAndDebugLoc(*LoadMI);
735742
auto &MF = B.getMF();
736743
auto PtrInfo = MMO.getPointerInfo();
737-
auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8);
744+
auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
738745
B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
746+
LoadMI->eraseFromParent();
739747
};
740748
return true;
741749
}

llvm/lib/Target/AArch64/AArch64InstrAtomics.td

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -29,21 +29,21 @@ def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>;
2929

3030
// An atomic load operation that does not need either acquire or release
3131
// semantics.
32-
class relaxed_load<PatFrag base>
32+
class relaxed_load<PatFrags base>
3333
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
3434
let IsAtomic = 1;
3535
let IsAtomicOrderingAcquireOrStronger = 0;
3636
}
3737

3838
// A atomic load operation that actually needs acquire semantics.
39-
class acquiring_load<PatFrag base>
39+
class acquiring_load<PatFrags base>
4040
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
4141
let IsAtomic = 1;
4242
let IsAtomicOrderingAcquire = 1;
4343
}
4444

4545
// An atomic load operation that needs sequential consistency.
46-
class seq_cst_load<PatFrag base>
46+
class seq_cst_load<PatFrags base>
4747
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
4848
let IsAtomic = 1;
4949
let IsAtomicOrderingSequentiallyConsistent = 1;
@@ -63,34 +63,34 @@ let Predicates = [HasLDAPR] in {
6363
}
6464

6565
// 8-bit loads
66-
def : Pat<(seq_cst_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
67-
def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
68-
def : Pat<(relaxed_load<atomic_load_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
66+
def : Pat<(seq_cst_load<atomic_load_az_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
67+
def : Pat<(acquiring_load<atomic_load_az_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
68+
def : Pat<(relaxed_load<atomic_load_az_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
6969
ro_Wextend8:$offset)),
7070
(LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>;
71-
def : Pat<(relaxed_load<atomic_load_8> (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
72-
ro_Xextend8:$offset)),
71+
def : Pat<(relaxed_load<atomic_load_az_8> (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
72+
ro_Xextend8:$offset)),
7373
(LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>;
74-
def : Pat<(relaxed_load<atomic_load_8> (am_indexed8 GPR64sp:$Rn,
75-
uimm12s1:$offset)),
74+
def : Pat<(relaxed_load<atomic_load_az_8> (am_indexed8 GPR64sp:$Rn,
75+
uimm12s1:$offset)),
7676
(LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
77-
def : Pat<(relaxed_load<atomic_load_8>
77+
def : Pat<(relaxed_load<atomic_load_az_8>
7878
(am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
7979
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
8080

8181
// 16-bit loads
82-
def : Pat<(seq_cst_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
83-
def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
84-
def : Pat<(relaxed_load<atomic_load_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
82+
def : Pat<(seq_cst_load<atomic_load_az_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
83+
def : Pat<(acquiring_load<atomic_load_az_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
84+
def : Pat<(relaxed_load<atomic_load_az_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
8585
ro_Wextend16:$extend)),
8686
(LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>;
87-
def : Pat<(relaxed_load<atomic_load_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
88-
ro_Xextend16:$extend)),
87+
def : Pat<(relaxed_load<atomic_load_az_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
88+
ro_Xextend16:$extend)),
8989
(LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>;
90-
def : Pat<(relaxed_load<atomic_load_16> (am_indexed16 GPR64sp:$Rn,
91-
uimm12s2:$offset)),
90+
def : Pat<(relaxed_load<atomic_load_az_16> (am_indexed16 GPR64sp:$Rn,
91+
uimm12s2:$offset)),
9292
(LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
93-
def : Pat<(relaxed_load<atomic_load_16>
93+
def : Pat<(relaxed_load<atomic_load_az_16>
9494
(am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
9595
(LDURHHi GPR64sp:$Rn, simm9:$offset)>;
9696

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
260260
.maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
261261
.maxScalarIf(typeInSet(1, {s128}), 0, s64);
262262

263-
getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
264-
.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered))
263+
264+
for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
265+
auto &Actions = getActionDefinitionsBuilder(Op);
266+
267+
if (Op == G_SEXTLOAD)
268+
Actions.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered));
269+
270+
// Atomics have zero extending behavior.
271+
Actions
265272
.legalForTypesWithMemDesc({{s32, p0, s8, 8},
266273
{s32, p0, s16, 8},
267274
{s32, p0, s32, 8},
@@ -278,6 +285,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
278285
.unsupportedIfMemSizeNotPow2()
279286
// Lower anything left over into G_*EXT and G_LOAD
280287
.lower();
288+
}
281289

282290
auto IsPtrVecPred = [=](const LegalityQuery &Query) {
283291
const LLT &ValTy = Query.Types[0];

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,7 @@
161161
# DEBUG-NEXT: G_SEXTLOAD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
162162
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
163163
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
164-
# DEBUG-NEXT: G_ZEXTLOAD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
165-
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
164+
# DEBUG-NEXT: G_ZEXTLOAD (opcode 80): 2 type indices, 0 imm indices
166165
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
167166
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
168167
# DEBUG-NEXT: G_INDEXED_LOAD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-and-mask.mir

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,12 @@ body: |
8888
...
8989

9090
---
91-
name: test_load_s32_atomic
91+
name: test_load_mask_s8_s32_atomic
9292
tracksRegLiveness: true
9393
body: |
9494
bb.0:
9595
liveins: $x0
96-
; CHECK-LABEL: name: test_load_s32_atomic
96+
; CHECK-LABEL: name: test_load_mask_s8_s32_atomic
9797
; CHECK: liveins: $x0
9898
; CHECK-NEXT: {{ $}}
9999
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
@@ -108,6 +108,49 @@ body: |
108108
$w0 = COPY %3
109109
...
110110

111+
# The mask is equal to the memory size.
112+
---
113+
name: test_load_mask_s16_s16_atomic
114+
tracksRegLiveness: true
115+
body: |
116+
bb.0:
117+
liveins: $x0
118+
; CHECK-LABEL: name: test_load_mask_s16_s16_atomic
119+
; CHECK: liveins: $x0
120+
; CHECK-NEXT: {{ $}}
121+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
122+
; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load seq_cst (s16))
123+
; CHECK-NEXT: $w0 = COPY [[ZEXTLOAD]](s32)
124+
%0:_(p0) = COPY $x0
125+
%1:_(s32) = G_CONSTANT i32 65535
126+
%2:_(s32) = G_LOAD %0 :: (load seq_cst (s16))
127+
%3:_(s32) = G_AND %2, %1
128+
$w0 = COPY %3
129+
...
130+
131+
# The mask is smaller than the memory size which must be preserved, so
132+
# there's little point to folding.
133+
---
134+
name: test_load_mask_s8_s16_atomic
135+
tracksRegLiveness: true
136+
body: |
137+
bb.0:
138+
liveins: $x0
139+
; CHECK-LABEL: name: test_load_mask_s8_s16_atomic
140+
; CHECK: liveins: $x0
141+
; CHECK-NEXT: {{ $}}
142+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
143+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
144+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load seq_cst (s16))
145+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
146+
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
147+
%0:_(p0) = COPY $x0
148+
%1:_(s32) = G_CONSTANT i32 255
149+
%2:_(s32) = G_LOAD %0 :: (load seq_cst (s16))
150+
%3:_(s32) = G_AND %2, %1
151+
$w0 = COPY %3
152+
...
153+
111154
---
112155
name: test_load_mask_size_equals_dst_size
113156
tracksRegLiveness: true
@@ -272,13 +315,32 @@ body: |
272315
; CHECK: liveins: $x0
273316
; CHECK-NEXT: {{ $}}
274317
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
318+
; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (volatile load (s8))
319+
; CHECK-NEXT: $w0 = COPY [[ZEXTLOAD]](s32)
320+
%0:_(p0) = COPY $x0
321+
%1:_(s32) = G_CONSTANT i32 255
322+
%2:_(s32) = G_LOAD %0 :: (volatile load (s8))
323+
%3:_(s32) = G_AND %2, %1
324+
$w0 = COPY %3
325+
...
326+
327+
---
328+
name: test_volatile_mask_smaller_mem
329+
tracksRegLiveness: true
330+
body: |
331+
bb.0:
332+
liveins: $x0
333+
; CHECK-LABEL: name: test_volatile_mask_smaller_mem
334+
; CHECK: liveins: $x0
335+
; CHECK-NEXT: {{ $}}
336+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
275337
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
276-
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile load (s8))
338+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile load (s16))
277339
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]]
278340
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
279341
%0:_(p0) = COPY $x0
280342
%1:_(s32) = G_CONSTANT i32 255
281-
%2:_(s32) = G_LOAD %0 :: (volatile load (s8))
343+
%2:_(s32) = G_LOAD %0 :: (volatile load (s16))
282344
%3:_(s32) = G_AND %2, %1
283345
$w0 = COPY %3
284346
...

0 commit comments

Comments
 (0)