Skip to content

Commit b5cf030

Browse files
authored
[AArch64][SME] Fix accessing the emergency spill slot with hazard padding (#142190)
This patch fixes an issue where when hazard padding was enabled locals, including the emergency spill slot, could not be directly addressed. Generally, this is fine, we can materialize the constant offset in a scratch register, but if there's no register free we need to spill, and if we can't even reach the emergency spill slot then we fail to compile. This patch fixes this by ensuring that if a function has variable-sized objects and is likely to have hazard padding we enable the base pointer. Then if we know a function has hazard padding, place the emergency spill slot next to the BP/SP, to ensure it can be directly accessed without stepping over any hazard padding.
1 parent 3a45d55 commit b5cf030

File tree

3 files changed

+130
-18
lines changed

3 files changed

+130
-18
lines changed

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "AArch64Subtarget.h"
1919
#include "MCTargetDesc/AArch64AddressingModes.h"
2020
#include "MCTargetDesc/AArch64InstPrinter.h"
21+
#include "Utils/AArch64SMEAttributes.h"
2122
#include "llvm/ADT/BitVector.h"
2223
#include "llvm/BinaryFormat/Dwarf.h"
2324
#include "llvm/CodeGen/LiveRegMatrix.h"
@@ -632,14 +633,27 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
632633
return true;
633634

634635
auto &ST = MF.getSubtarget<AArch64Subtarget>();
636+
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
635637
if (ST.hasSVE() || ST.isStreaming()) {
636-
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
637638
// Frames that have variable sized objects and scalable SVE objects,
638639
// should always use a basepointer.
639640
if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
640641
return true;
641642
}
642643

644+
// Frames with hazard padding can have a large offset between the frame
645+
// pointer and GPR locals, which includes the emergency spill slot. If the
646+
// emergency spill slot is not within range of the load/store instructions
647+
// (which have a signed 9-bit range), we will fail to compile if it is used.
648+
// Since hasBasePointer() is called before we know if we have hazard padding
649+
// or an emergency spill slot we need to enable the basepointer
650+
// conservatively.
651+
if (AFI->hasStackHazardSlotIndex() ||
652+
(ST.getStreamingHazardSize() &&
653+
!SMEAttrs(MF.getFunction()).hasNonStreamingInterfaceAndBody())) {
654+
return true;
655+
}
656+
643657
// Conservatively estimate whether the negative offset from the frame
644658
// pointer will be sufficient to reach. If a function has a smallish
645659
// frame, it's less likely to have lots of spills and callee saved
@@ -764,7 +778,8 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
764778
assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
765779
AFI->hasCalculatedStackSizeSVE()) &&
766780
"Expected SVE area to be calculated by this point");
767-
return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE();
781+
return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() &&
782+
!AFI->hasStackHazardSlotIndex();
768783
}
769784

770785
bool AArch64RegisterInfo::requiresFrameIndexScavenging(
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-stack-hazard-size=1024 -run-pass=prologepilog %s -o - | FileCheck %s
2+
--- |
3+
4+
define void @stack_hazard_streaming_compat() "aarch64_pstate_sm_compatible" { entry: unreachable }
5+
define void @stack_hazard_streaming_compat_emergency_spill_slot() "aarch64_pstate_sm_compatible" { entry: unreachable }
6+
7+
...
8+
9+
# +------------------+
10+
# | GPR callee-saves |
11+
# +------------------+ <- FP
12+
# | <hazard padding> |
13+
# +------------------+
14+
# | FPR locals |
15+
# | %stack.1 |
16+
# +------------------+
17+
# | <hazard padding> |
18+
# +------------------+
19+
# | GPR locals |
20+
# | %stack.2 |
21+
# | <emergency spill>|
22+
# +------------------+ <- BP
23+
# | <VLA> |
24+
# +------------------+ <- SP (can't be used due to VLA)
25+
26+
# In this case without the base pointer we'd need the emergency spill slot to
27+
# access both %stack.1 and %stack.2. With the base pointer we can reach both
28+
# without spilling.
29+
30+
name: stack_hazard_streaming_compat
31+
# CHECK-LABEL: name: stack_hazard_streaming_compat
32+
# CHECK: bb.0:
33+
# CHECK: STRDui $d0, $x19, 131
34+
# CHECK-NEXT: STRXui $x0, $x19, 1
35+
# CHECK: bb.1:
36+
tracksRegLiveness: true
37+
frameInfo:
38+
isFrameAddressTaken: true
39+
stack:
40+
- { id: 0, type: variable-sized, alignment: 1 }
41+
- { id: 1, size: 8, alignment: 8 }
42+
- { id: 2, size: 8, alignment: 8 }
43+
body: |
44+
bb.0:
45+
liveins: $x0, $x8, $d0
46+
$x9 = LDRXui $x0, 0 :: (load (s64))
47+
STRDui $d0, %stack.1, 0 :: (store (s64) into %stack.1)
48+
STRXui $x0, %stack.2, 0 :: (store (s64) into %stack.2)
49+
B %bb.1
50+
bb.1:
51+
liveins: $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr
52+
RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28, implicit $lr
53+
...
54+
---
55+
# +------------------+
56+
# | GPR callee-saves |
57+
# +------------------+ <- FP
58+
# | <hazard padding> |
59+
# +------------------+
60+
# | FPR locals |
61+
# | %stack.1 |
62+
# +------------------+
63+
# | <hazard padding> |
64+
# +------------------+
65+
# | GPR locals |
66+
# | %stack.2 | (very large)
67+
# | <emergency spill>|
68+
# +------------------+ <- BP
69+
# | <VLA> |
70+
# +------------------+ <- SP (can't be used due to VLA)
71+
72+
# In this case we need to use the emergency spill slot to access %stack.1 as it
73+
# is too far from the frame pointer and the base pointer to directly address.
74+
# Note: This also tests that the <emergency spill> located near the SP/BP.
75+
76+
name: stack_hazard_streaming_compat_emergency_spill_slot
77+
# CHECK-LABEL: name: stack_hazard_streaming_compat_emergency_spill_slot
78+
# CHECK: bb.0:
79+
# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $x19, 0
80+
# CHECK-NEXT: $[[SCRATCH]] = ADDXri $x19, 1056, 0
81+
# CHECK-NEXT: STRDui $d0, killed $[[SCRATCH]], 4095
82+
# CHECK-NEXT: $[[SCRATCH]] = LDRXui $x19, 0
83+
# CHECK: bb.1:
84+
tracksRegLiveness: true
85+
frameInfo:
86+
isFrameAddressTaken: true
87+
stack:
88+
- { id: 0, type: variable-sized, alignment: 1 }
89+
- { id: 1, size: 8, alignment: 8 }
90+
- { id: 2, size: 32761, alignment: 8 }
91+
body: |
92+
bb.0:
93+
liveins: $x0, $x8, $d0
94+
$x9 = LDRXui $x0, 0 :: (load (s64))
95+
STRDui $d0, %stack.1, 0 :: (store (s64) into %stack.1)
96+
B %bb.1
97+
bb.1:
98+
liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr
99+
RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28, implicit $lr

llvm/test/CodeGen/AArch64/stack-hazard.ll

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2905,12 +2905,13 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
29052905
; CHECK64-NEXT: mov x9, sp
29062906
; CHECK64-NEXT: mov w20, w0
29072907
; CHECK64-NEXT: msub x9, x8, x8, x9
2908+
; CHECK64-NEXT: mov x19, sp
29082909
; CHECK64-NEXT: mov sp, x9
2909-
; CHECK64-NEXT: stur x9, [x29, #-208]
2910-
; CHECK64-NEXT: sub x9, x29, #208
2911-
; CHECK64-NEXT: sturh wzr, [x29, #-198]
2912-
; CHECK64-NEXT: stur wzr, [x29, #-196]
2913-
; CHECK64-NEXT: sturh w8, [x29, #-200]
2910+
; CHECK64-NEXT: str x9, [x19]
2911+
; CHECK64-NEXT: add x9, x19, #0
2912+
; CHECK64-NEXT: strh wzr, [x19, #10]
2913+
; CHECK64-NEXT: str wzr, [x19, #12]
2914+
; CHECK64-NEXT: strh w8, [x19, #8]
29142915
; CHECK64-NEXT: msr TPIDR2_EL0, x9
29152916
; CHECK64-NEXT: .cfi_offset vg, -32
29162917
; CHECK64-NEXT: smstop sm
@@ -2919,7 +2920,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
29192920
; CHECK64-NEXT: .cfi_restore vg
29202921
; CHECK64-NEXT: smstart za
29212922
; CHECK64-NEXT: mrs x8, TPIDR2_EL0
2922-
; CHECK64-NEXT: sub x0, x29, #208
2923+
; CHECK64-NEXT: add x0, x19, #0
29232924
; CHECK64-NEXT: cbnz x8, .LBB33_2
29242925
; CHECK64-NEXT: // %bb.1: // %entry
29252926
; CHECK64-NEXT: bl __arm_tpidr2_restore
@@ -2985,16 +2986,13 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
29852986
; CHECK1024-NEXT: mov x9, sp
29862987
; CHECK1024-NEXT: mov w20, w0
29872988
; CHECK1024-NEXT: msub x9, x8, x8, x9
2989+
; CHECK1024-NEXT: mov x19, sp
29882990
; CHECK1024-NEXT: mov sp, x9
2989-
; CHECK1024-NEXT: sub x10, x29, #1872
2990-
; CHECK1024-NEXT: stur x9, [x10, #-256]
2991-
; CHECK1024-NEXT: sub x9, x29, #1862
2992-
; CHECK1024-NEXT: sub x10, x29, #1860
2993-
; CHECK1024-NEXT: sturh wzr, [x9, #-256]
2994-
; CHECK1024-NEXT: sub x9, x29, #2128
2995-
; CHECK1024-NEXT: stur wzr, [x10, #-256]
2996-
; CHECK1024-NEXT: sub x10, x29, #1864
2997-
; CHECK1024-NEXT: sturh w8, [x10, #-256]
2991+
; CHECK1024-NEXT: str x9, [x19]
2992+
; CHECK1024-NEXT: add x9, x19, #0
2993+
; CHECK1024-NEXT: strh wzr, [x19, #10]
2994+
; CHECK1024-NEXT: str wzr, [x19, #12]
2995+
; CHECK1024-NEXT: strh w8, [x19, #8]
29982996
; CHECK1024-NEXT: msr TPIDR2_EL0, x9
29992997
; CHECK1024-NEXT: .cfi_offset vg, -32
30002998
; CHECK1024-NEXT: smstop sm
@@ -3003,7 +3001,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
30033001
; CHECK1024-NEXT: .cfi_restore vg
30043002
; CHECK1024-NEXT: smstart za
30053003
; CHECK1024-NEXT: mrs x8, TPIDR2_EL0
3006-
; CHECK1024-NEXT: sub x0, x29, #2128
3004+
; CHECK1024-NEXT: add x0, x19, #0
30073005
; CHECK1024-NEXT: cbnz x8, .LBB33_2
30083006
; CHECK1024-NEXT: // %bb.1: // %entry
30093007
; CHECK1024-NEXT: bl __arm_tpidr2_restore

0 commit comments

Comments
 (0)