Skip to content

Commit 8fbee8c

Browse files
Dudeldurorth
authored andcommitted
[AArch64] Fix invalid address-mode folding (llvm#142167)
In some cases, we are too aggressive when folding an add-lsl into an ldr/str due to an accidental truncation of the 64-bit scale to 32-bit. In cases where we shift by more than 31 bits (which is valid for 64-bit registers) we just drop the shift...
1 parent 43768d3 commit 8fbee8c

File tree

2 files changed

+49
-0
lines changed

2 files changed

+49
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3232,6 +3232,8 @@ bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
32323232
ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool {
32333233
if (MemI.getOperand(2).getImm() != 0)
32343234
return false;
3235+
if ((unsigned)Scale != Scale)
3236+
return false;
32353237
if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
32363238
return false;
32373239
AM.BaseReg = AddrI.getOperand(1).getReg();
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=aarch64 -o - %s -run-pass machine-sink | FileCheck %s
3+
4+
# we want to fuse an addition with lsl into an ldr but we have to be careful with
5+
# the shift distance: we can only represent specific shift distances: e.g: 3
6+
# but nothing large like 32
7+
8+
--- |
9+
define dso_local i64 @fuse_shift_add_into_addr_mode() {
10+
entry:
11+
ret i64 0
12+
}
13+
14+
---
15+
name: fuse_shift_add_into_addr_mode
16+
body: |
17+
bb.1.entry:
18+
liveins: $x0, $x1
19+
20+
; CHECK-LABEL: name: fuse_shift_add_into_addr_mode
21+
; CHECK: liveins: $x0, $x1
22+
; CHECK-NEXT: {{ $}}
23+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
24+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
25+
; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY]], [[COPY1]], 0, 1 :: (load (s64))
26+
; CHECK-NEXT: [[ADDXrs:%[0-9]+]]:gpr64common = ADDXrs [[COPY]], [[COPY1]], 5
27+
; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64))
28+
; CHECK-NEXT: [[ADDXrs1:%[0-9]+]]:gpr64common = ADDXrs [[COPY]], [[COPY1]], 32
29+
; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs1]], 0 :: (load (s64))
30+
; CHECK-NEXT: [[ADDXrs2:%[0-9]+]]:gpr64common = ADDXrs [[COPY]], [[COPY1]], 35
31+
; CHECK-NEXT: [[LDRXui2:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs1]], 0 :: (load (s64))
32+
; CHECK-NEXT: [[ADDXrs3:%[0-9]+]]:gpr64common = ADDXrs [[COPY]], [[COPY1]], 63
33+
; CHECK-NEXT: [[LDRXui3:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs1]], 0 :: (load (s64))
34+
; CHECK-NEXT: RET_ReallyLR implicit $x0
35+
%0:gpr64 = COPY $x0
36+
%1:gpr64 = COPY $x1
37+
%2:gpr64common = ADDXrs %0, %1, 3
38+
%3:gpr64 = LDRXui %2, 0 :: (load (s64))
39+
%4:gpr64common = ADDXrs %0, %1, 5
40+
%5:gpr64 = LDRXui %4, 0 :: (load (s64))
41+
%6:gpr64common = ADDXrs %0, %1, 32
42+
%7:gpr64 = LDRXui %6, 0 :: (load (s64))
43+
%8:gpr64common = ADDXrs %0, %1, 35
44+
%9:gpr64 = LDRXui %6, 0 :: (load (s64))
45+
%10:gpr64common = ADDXrs %0, %1, 63
46+
%11:gpr64 = LDRXui %6, 0 :: (load (s64))
47+
RET_ReallyLR implicit $x0

0 commit comments

Comments
 (0)