Skip to content

Commit 10d34f5

Browse files
committed
AArch64: use CAS instead of LDXR/STXR if available
This covers 128-bit loads, and atomicrmw operations without a single native instruction. Using CAS saves has a better chance of succeeding with high contention on some systems.
1 parent 247d8d4 commit 10d34f5

File tree

6 files changed

+641
-207
lines changed

6 files changed

+641
-207
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21903,7 +21903,10 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
2190321903
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
2190421904
return AtomicExpansionKind::CmpXChg;
2190521905

21906-
return AtomicExpansionKind::LLSC;
21906+
// Using CAS for an atomic load has a better chance of succeeding under high
21907+
// contention situations. So use it if available.
21908+
return Subtarget->hasLSE() ? AtomicExpansionKind::CmpXChg
21909+
: AtomicExpansionKind::LLSC;
2190721910
}
2190821911

2190921912
// For the real atomic operations, we have ldxr/stxr up to 128 bits,
@@ -21940,8 +21943,10 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2194021943
// implement atomicrmw without spilling. If the target address is also on the
2194121944
// stack and close enough to the spill slot, this can lead to a situation
2194221945
// where the monitor always gets cleared and the atomic operation can never
21943-
// succeed. So at -O0 lower this operation to a CAS loop.
21944-
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
21946+
// succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
21947+
// we have a single CAS instruction that can replace the loop.
21948+
if (getTargetMachine().getOptLevel() == CodeGenOpt::None ||
21949+
Subtarget->hasLSE())
2194521950
return AtomicExpansionKind::CmpXChg;
2194621951

2194721952
return AtomicExpansionKind::LLSC;

llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -360,14 +360,11 @@ define void @atomic_load_relaxed(i64, i64, ptr %p, ptr %p2) {
360360
;
361361
; CHECK-CAS-O1-LABEL: atomic_load_relaxed:
362362
; CHECK-CAS-O1: // %bb.0:
363-
; CHECK-CAS-O1-NEXT: .LBB4_1: // %atomicrmw.start
364-
; CHECK-CAS-O1-NEXT: // =>This Inner Loop Header: Depth=1
365-
; CHECK-CAS-O1-NEXT: ldxp x9, x8, [x2]
366-
; CHECK-CAS-O1-NEXT: stxp w10, x9, x8, [x2]
367-
; CHECK-CAS-O1-NEXT: cbnz w10, .LBB4_1
368-
; CHECK-CAS-O1-NEXT: // %bb.2: // %atomicrmw.end
369-
; CHECK-CAS-O1-NEXT: mov v0.d[0], x9
370-
; CHECK-CAS-O1-NEXT: mov v0.d[1], x8
363+
; CHECK-CAS-O1-NEXT: mov x0, xzr
364+
; CHECK-CAS-O1-NEXT: mov x1, xzr
365+
; CHECK-CAS-O1-NEXT: casp x0, x1, x0, x1, [x2]
366+
; CHECK-CAS-O1-NEXT: mov v0.d[0], x0
367+
; CHECK-CAS-O1-NEXT: mov v0.d[1], x1
371368
; CHECK-CAS-O1-NEXT: str q0, [x3]
372369
; CHECK-CAS-O1-NEXT: ret
373370
;

llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -362,15 +362,17 @@ define i32 @fetch_and_nand(ptr %p) #0 {
362362
;
363363
; CHECK-LSE-O1-LABEL: fetch_and_nand:
364364
; CHECK-LSE-O1: ; %bb.0:
365+
; CHECK-LSE-O1-NEXT: mov x8, x0
366+
; CHECK-LSE-O1-NEXT: ldr w0, [x0]
365367
; CHECK-LSE-O1-NEXT: LBB6_1: ; %atomicrmw.start
366368
; CHECK-LSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
367-
; CHECK-LSE-O1-NEXT: ldxr w8, [x0]
368-
; CHECK-LSE-O1-NEXT: and w9, w8, #0x7
369-
; CHECK-LSE-O1-NEXT: mvn w9, w9
370-
; CHECK-LSE-O1-NEXT: stlxr w10, w9, [x0]
371-
; CHECK-LSE-O1-NEXT: cbnz w10, LBB6_1
369+
; CHECK-LSE-O1-NEXT: mov x9, x0
370+
; CHECK-LSE-O1-NEXT: and w10, w0, #0x7
371+
; CHECK-LSE-O1-NEXT: mvn w10, w10
372+
; CHECK-LSE-O1-NEXT: casl w0, w10, [x8]
373+
; CHECK-LSE-O1-NEXT: cmp w0, w9
374+
; CHECK-LSE-O1-NEXT: b.ne LBB6_1
372375
; CHECK-LSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
373-
; CHECK-LSE-O1-NEXT: mov x0, x8
374376
; CHECK-LSE-O1-NEXT: ret
375377
;
376378
; CHECK-LSE-O0-LABEL: fetch_and_nand:
@@ -455,15 +457,17 @@ define i64 @fetch_and_nand_64(ptr %p) #0 {
455457
;
456458
; CHECK-LSE-O1-LABEL: fetch_and_nand_64:
457459
; CHECK-LSE-O1: ; %bb.0:
460+
; CHECK-LSE-O1-NEXT: mov x8, x0
461+
; CHECK-LSE-O1-NEXT: ldr x0, [x0]
458462
; CHECK-LSE-O1-NEXT: LBB7_1: ; %atomicrmw.start
459463
; CHECK-LSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
460-
; CHECK-LSE-O1-NEXT: ldaxr x8, [x0]
461-
; CHECK-LSE-O1-NEXT: and x9, x8, #0x7
462-
; CHECK-LSE-O1-NEXT: mvn x9, x9
463-
; CHECK-LSE-O1-NEXT: stlxr w10, x9, [x0]
464-
; CHECK-LSE-O1-NEXT: cbnz w10, LBB7_1
464+
; CHECK-LSE-O1-NEXT: mov x9, x0
465+
; CHECK-LSE-O1-NEXT: and x10, x0, #0x7
466+
; CHECK-LSE-O1-NEXT: mvn x10, x10
467+
; CHECK-LSE-O1-NEXT: casal x0, x10, [x8]
468+
; CHECK-LSE-O1-NEXT: cmp x0, x9
469+
; CHECK-LSE-O1-NEXT: b.ne LBB7_1
465470
; CHECK-LSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
466-
; CHECK-LSE-O1-NEXT: mov x0, x8
467471
; CHECK-LSE-O1-NEXT: ret
468472
;
469473
; CHECK-LSE-O0-LABEL: fetch_and_nand_64:

0 commit comments

Comments
 (0)