Skip to content

Commit 2481e59

Browse files
MacDuetstellar
authored andcommitted
[AArch64][SME] Fix accessing the emergency spill slot with hazard padding (llvm#142190)
This patch fixes an issue where when hazard padding was enabled locals, including the emergency spill slot, could not be directly addressed. Generally, this is fine, we can materialize the constant offset in a scratch register, but if there's no register free we need to spill, and if we can't even reach the emergency spill slot then we fail to compile. This patch fixes this by ensuring that if a function has variable-sized objects and is likely to have hazard padding we enable the base pointer. Then if we know a function has hazard padding, place the emergency spill slot next to the BP/SP, to ensure it can be directly accessed without stepping over any hazard padding. (cherry picked from commit b5cf030)
1 parent 22a3e6b commit 2481e59

File tree

3 files changed

+130
-18
lines changed

3 files changed

+130
-18
lines changed

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "AArch64Subtarget.h"
1919
#include "MCTargetDesc/AArch64AddressingModes.h"
2020
#include "MCTargetDesc/AArch64InstPrinter.h"
21+
#include "Utils/AArch64SMEAttributes.h"
2122
#include "llvm/ADT/BitVector.h"
2223
#include "llvm/BinaryFormat/Dwarf.h"
2324
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -615,14 +616,27 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
615616
return true;
616617

617618
auto &ST = MF.getSubtarget<AArch64Subtarget>();
619+
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
618620
if (ST.hasSVE() || ST.isStreaming()) {
619-
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
620621
// Frames that have variable sized objects and scalable SVE objects,
621622
// should always use a basepointer.
622623
if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
623624
return true;
624625
}
625626

627+
// Frames with hazard padding can have a large offset between the frame
628+
// pointer and GPR locals, which includes the emergency spill slot. If the
629+
// emergency spill slot is not within range of the load/store instructions
630+
// (which have a signed 9-bit range), we will fail to compile if it is used.
631+
// Since hasBasePointer() is called before we know if we have hazard padding
632+
// or an emergency spill slot we need to enable the basepointer
633+
// conservatively.
634+
if (AFI->hasStackHazardSlotIndex() ||
635+
(ST.getStreamingHazardSize() &&
636+
!SMEAttrs(MF.getFunction()).hasNonStreamingInterfaceAndBody())) {
637+
return true;
638+
}
639+
626640
// Conservatively estimate whether the negative offset from the frame
627641
// pointer will be sufficient to reach. If a function has a smallish
628642
// frame, it's less likely to have lots of spills and callee saved
@@ -747,7 +761,8 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
747761
assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
748762
AFI->hasCalculatedStackSizeSVE()) &&
749763
"Expected SVE area to be calculated by this point");
750-
return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE();
764+
return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() &&
765+
!AFI->hasStackHazardSlotIndex();
751766
}
752767

753768
bool AArch64RegisterInfo::requiresFrameIndexScavenging(
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-stack-hazard-size=1024 -run-pass=prologepilog %s -o - | FileCheck %s
2+
--- |
3+
4+
define void @stack_hazard_streaming_compat() "aarch64_pstate_sm_compatible" { entry: unreachable }
5+
define void @stack_hazard_streaming_compat_emergency_spill_slot() "aarch64_pstate_sm_compatible" { entry: unreachable }
6+
7+
...
8+
9+
# +------------------+
10+
# | GPR callee-saves |
11+
# +------------------+ <- FP
12+
# | <hazard padding> |
13+
# +------------------+
14+
# | FPR locals |
15+
# | %stack.1 |
16+
# +------------------+
17+
# | <hazard padding> |
18+
# +------------------+
19+
# | GPR locals |
20+
# | %stack.2 |
21+
# | <emergency spill>|
22+
# +------------------+ <- BP
23+
# | <VLA> |
24+
# +------------------+ <- SP (can't be used due to VLA)
25+
26+
# In this case without the base pointer we'd need the emergency spill slot to
27+
# access both %stack.1 and %stack.2. With the base pointer we can reach both
28+
# without spilling.
29+
30+
name: stack_hazard_streaming_compat
31+
# CHECK-LABEL: name: stack_hazard_streaming_compat
32+
# CHECK: bb.0:
33+
# CHECK: STRDui $d0, $x19, 131
34+
# CHECK-NEXT: STRXui $x0, $x19, 1
35+
# CHECK: bb.1:
36+
tracksRegLiveness: true
37+
frameInfo:
38+
isFrameAddressTaken: true
39+
stack:
40+
- { id: 0, type: variable-sized, alignment: 1 }
41+
- { id: 1, size: 8, alignment: 8 }
42+
- { id: 2, size: 8, alignment: 8 }
43+
body: |
44+
bb.0:
45+
liveins: $x0, $x8, $d0
46+
$x9 = LDRXui $x0, 0 :: (load (s64))
47+
STRDui $d0, %stack.1, 0 :: (store (s64) into %stack.1)
48+
STRXui $x0, %stack.2, 0 :: (store (s64) into %stack.2)
49+
B %bb.1
50+
bb.1:
51+
liveins: $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr
52+
RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28, implicit $lr
53+
...
54+
---
55+
# +------------------+
56+
# | GPR callee-saves |
57+
# +------------------+ <- FP
58+
# | <hazard padding> |
59+
# +------------------+
60+
# | FPR locals |
61+
# | %stack.1 |
62+
# +------------------+
63+
# | <hazard padding> |
64+
# +------------------+
65+
# | GPR locals |
66+
# | %stack.2 | (very large)
67+
# | <emergency spill>|
68+
# +------------------+ <- BP
69+
# | <VLA> |
70+
# +------------------+ <- SP (can't be used due to VLA)
71+
72+
# In this case we need to use the emergency spill slot to access %stack.1 as it
73+
# is too far from the frame pointer and the base pointer to directly address.
74+
# Note: This also tests that the <emergency spill> located near the SP/BP.
75+
76+
name: stack_hazard_streaming_compat_emergency_spill_slot
77+
# CHECK-LABEL: name: stack_hazard_streaming_compat_emergency_spill_slot
78+
# CHECK: bb.0:
79+
# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $x19, 0
80+
# CHECK-NEXT: $[[SCRATCH]] = ADDXri $x19, 1056, 0
81+
# CHECK-NEXT: STRDui $d0, killed $[[SCRATCH]], 4095
82+
# CHECK-NEXT: $[[SCRATCH]] = LDRXui $x19, 0
83+
# CHECK: bb.1:
84+
tracksRegLiveness: true
85+
frameInfo:
86+
isFrameAddressTaken: true
87+
stack:
88+
- { id: 0, type: variable-sized, alignment: 1 }
89+
- { id: 1, size: 8, alignment: 8 }
90+
- { id: 2, size: 32761, alignment: 8 }
91+
body: |
92+
bb.0:
93+
liveins: $x0, $x8, $d0
94+
$x9 = LDRXui $x0, 0 :: (load (s64))
95+
STRDui $d0, %stack.1, 0 :: (store (s64) into %stack.1)
96+
B %bb.1
97+
bb.1:
98+
liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr
99+
RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28, implicit $lr

llvm/test/CodeGen/AArch64/stack-hazard.ll

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2911,12 +2911,13 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
29112911
; CHECK64-NEXT: mov x9, sp
29122912
; CHECK64-NEXT: mov w20, w0
29132913
; CHECK64-NEXT: msub x9, x8, x8, x9
2914+
; CHECK64-NEXT: mov x19, sp
29142915
; CHECK64-NEXT: mov sp, x9
2915-
; CHECK64-NEXT: stur x9, [x29, #-208]
2916-
; CHECK64-NEXT: sub x9, x29, #208
2917-
; CHECK64-NEXT: sturh wzr, [x29, #-198]
2918-
; CHECK64-NEXT: stur wzr, [x29, #-196]
2919-
; CHECK64-NEXT: sturh w8, [x29, #-200]
2916+
; CHECK64-NEXT: str x9, [x19]
2917+
; CHECK64-NEXT: add x9, x19, #0
2918+
; CHECK64-NEXT: strh wzr, [x19, #10]
2919+
; CHECK64-NEXT: str wzr, [x19, #12]
2920+
; CHECK64-NEXT: strh w8, [x19, #8]
29202921
; CHECK64-NEXT: msr TPIDR2_EL0, x9
29212922
; CHECK64-NEXT: .cfi_offset vg, -32
29222923
; CHECK64-NEXT: smstop sm
@@ -2925,7 +2926,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
29252926
; CHECK64-NEXT: .cfi_restore vg
29262927
; CHECK64-NEXT: smstart za
29272928
; CHECK64-NEXT: mrs x8, TPIDR2_EL0
2928-
; CHECK64-NEXT: sub x0, x29, #208
2929+
; CHECK64-NEXT: add x0, x19, #0
29292930
; CHECK64-NEXT: cbnz x8, .LBB33_2
29302931
; CHECK64-NEXT: // %bb.1: // %entry
29312932
; CHECK64-NEXT: bl __arm_tpidr2_restore
@@ -2991,16 +2992,13 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
29912992
; CHECK1024-NEXT: mov x9, sp
29922993
; CHECK1024-NEXT: mov w20, w0
29932994
; CHECK1024-NEXT: msub x9, x8, x8, x9
2995+
; CHECK1024-NEXT: mov x19, sp
29942996
; CHECK1024-NEXT: mov sp, x9
2995-
; CHECK1024-NEXT: sub x10, x29, #1872
2996-
; CHECK1024-NEXT: stur x9, [x10, #-256]
2997-
; CHECK1024-NEXT: sub x9, x29, #1862
2998-
; CHECK1024-NEXT: sub x10, x29, #1860
2999-
; CHECK1024-NEXT: sturh wzr, [x9, #-256]
3000-
; CHECK1024-NEXT: sub x9, x29, #2128
3001-
; CHECK1024-NEXT: stur wzr, [x10, #-256]
3002-
; CHECK1024-NEXT: sub x10, x29, #1864
3003-
; CHECK1024-NEXT: sturh w8, [x10, #-256]
2997+
; CHECK1024-NEXT: str x9, [x19]
2998+
; CHECK1024-NEXT: add x9, x19, #0
2999+
; CHECK1024-NEXT: strh wzr, [x19, #10]
3000+
; CHECK1024-NEXT: str wzr, [x19, #12]
3001+
; CHECK1024-NEXT: strh w8, [x19, #8]
30043002
; CHECK1024-NEXT: msr TPIDR2_EL0, x9
30053003
; CHECK1024-NEXT: .cfi_offset vg, -32
30063004
; CHECK1024-NEXT: smstop sm
@@ -3009,7 +3007,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
30093007
; CHECK1024-NEXT: .cfi_restore vg
30103008
; CHECK1024-NEXT: smstart za
30113009
; CHECK1024-NEXT: mrs x8, TPIDR2_EL0
3012-
; CHECK1024-NEXT: sub x0, x29, #2128
3010+
; CHECK1024-NEXT: add x0, x19, #0
30133011
; CHECK1024-NEXT: cbnz x8, .LBB33_2
30143012
; CHECK1024-NEXT: // %bb.1: // %entry
30153013
; CHECK1024-NEXT: bl __arm_tpidr2_restore

0 commit comments

Comments
 (0)