Skip to content

Commit f0f474d

Browse files
committed
[AArch64][SME] Add codegen pass to handle ZA state in arm_new_za functions.
The new pass implements the following: * Inserts code at the start of an arm_new_za function to commit a lazy-save when the lazy-save mechanism is active. * Adds a smstart intrinsic at the start of the function. * Adds a smstop intrinsic at the end of the function. Patch co-authored by kmclaughlin. Differential Revision: https://reviews.llvm.org/D133896
1 parent 08497a7 commit f0f474d

File tree

8 files changed

+224
-2
lines changed

8 files changed

+224
-2
lines changed

llvm/docs/AArch64SME.rst

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ level ACLE attributes:
4040
``aarch64_pstate_za_preserved``
4141
is used for functions with ``__attribute__((arm_preserves_za))``
4242

43+
``aarch64_expanded_pstate_za``
44+
is used for functions with ``__attribute__((arm_new_za))``
45+
4346
Clang must ensure that the above attributes are added both to the
4447
function's declaration/definition as well as to their call-sites. This is
4548
important for calls to attributed function pointers, where there is no
@@ -423,8 +426,10 @@ to toggle PSTATE.ZA using intrinsics. This also makes it simpler to setup a
423426
lazy-save mechanism for calls to private-ZA functions (i.e. functions that may
424427
either directly or indirectly clobber ZA state).
425428

426-
For this purpose, we'll introduce a new LLVM IR pass that is run just before
427-
SelectionDAG.
429+
For the purpose of handling functions marked with ``aarch64_pstate_za_new``,
430+
we have introduced a new LLVM IR pass (SMEABIPass) that is run just before
431+
SelectionDAG. Any such functions dealt with by this pass are marked with
432+
``aarch64_expanded_pstate_za``.
428433

429434
Setting up a lazy-save
430435
----------------------

llvm/lib/Target/AArch64/AArch64.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ FunctionPass *createAArch64MIPeepholeOptPass();
5858
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
5959

6060
FunctionPass *createAArch64CollectLOHPass();
61+
FunctionPass *createSMEABIPass();
6162
ModulePass *createSVEIntrinsicOptsPass();
6263
InstructionSelector *
6364
createAArch64InstructionSelector(const AArch64TargetMachine &,
@@ -100,6 +101,7 @@ void initializeAArch64StorePairSuppressPass(PassRegistry&);
100101
void initializeFalkorHWPFFixPass(PassRegistry&);
101102
void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
102103
void initializeLDTLSCleanupPass(PassRegistry&);
104+
void initializeSMEABIPass(PassRegistry &);
103105
void initializeSVEIntrinsicOptsPass(PassRegistry&);
104106
void initializeAArch64StackTaggingPass(PassRegistry&);
105107
void initializeAArch64StackTaggingPreRAPass(PassRegistry&);

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
224224
initializeFalkorHWPFFixPass(*PR);
225225
initializeFalkorMarkStridedAccessesLegacyPass(*PR);
226226
initializeLDTLSCleanupPass(*PR);
227+
initializeSMEABIPass(*PR);
227228
initializeSVEIntrinsicOptsPass(*PR);
228229
initializeAArch64SpeculationHardeningPass(*PR);
229230
initializeAArch64SLSHardeningPass(*PR);
@@ -588,6 +589,11 @@ void AArch64PassConfig::addIRPasses() {
588589
addPass(createInterleavedAccessPass());
589590
}
590591

592+
// Expand any functions marked with SME attributes which require special
593+
// changes for the calling convention or that require the lazy-saving
594+
// mechanism specified in the SME ABI.
595+
addPass(createSMEABIPass());
596+
591597
// Add Control Flow Guard checks.
592598
if (TM->getTargetTriple().isOSWindows())
593599
addPass(createCFGuardCheckPass());

llvm/lib/Target/AArch64/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ add_llvm_target(AArch64CodeGen
8383
AArch64TargetMachine.cpp
8484
AArch64TargetObjectFile.cpp
8585
AArch64TargetTransformInfo.cpp
86+
SMEABIPass.cpp
8687
SVEIntrinsicOpts.cpp
8788
AArch64SIMDInstrOpt.cpp
8889

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
//===--------- SMEABI - SME ABI-------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass implements parts of the the SME ABI, such as:
10+
// * Using the lazy-save mechanism before enabling the use of ZA.
11+
// * Setting up the lazy-save mechanism around invokes.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "AArch64.h"
16+
#include "Utils/AArch64BaseInfo.h"
17+
#include "Utils/AArch64SMEAttributes.h"
18+
#include "llvm/ADT/SmallVector.h"
19+
#include "llvm/ADT/StringRef.h"
20+
#include "llvm/IR/Constants.h"
21+
#include "llvm/IR/IRBuilder.h"
22+
#include "llvm/IR/Instructions.h"
23+
#include "llvm/IR/IntrinsicInst.h"
24+
#include "llvm/IR/IntrinsicsAArch64.h"
25+
#include "llvm/IR/LLVMContext.h"
26+
#include "llvm/InitializePasses.h"
27+
#include "llvm/Support/Debug.h"
28+
#include "llvm/Transforms/Utils/Cloning.h"
29+
30+
using namespace llvm;
31+
32+
#define DEBUG_TYPE "aarch64-sme-abi"
33+
34+
namespace {
35+
struct SMEABI : public FunctionPass {
36+
static char ID; // Pass identification, replacement for typeid
37+
SMEABI() : FunctionPass(ID) {
38+
initializeSMEABIPass(*PassRegistry::getPassRegistry());
39+
}
40+
41+
bool runOnFunction(Function &F) override;
42+
void getAnalysisUsage(AnalysisUsage &AU) const override;
43+
44+
private:
45+
bool updateNewZAFunctions(Module *M, Function *F, IRBuilder<> &Builder);
46+
};
47+
} // end anonymous namespace
48+
49+
void SMEABI::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); }
50+
51+
char SMEABI::ID = 0;
52+
static const char *name = "SME ABI Pass";
53+
INITIALIZE_PASS_BEGIN(SMEABI, DEBUG_TYPE, name, false, false)
54+
INITIALIZE_PASS_END(SMEABI, DEBUG_TYPE, name, false, false)
55+
56+
FunctionPass *llvm::createSMEABIPass() { return new SMEABI(); }
57+
58+
//===----------------------------------------------------------------------===//
59+
// Utility functions
60+
//===----------------------------------------------------------------------===//
61+
62+
// Utility function to emit a call to __arm_tpidr2_save and clear TPIDR2_EL0.
63+
void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) {
64+
auto *TPIDR2SaveTy =
65+
FunctionType::get(Builder.getVoidTy(), {}, /*IsVarArgs=*/false);
66+
67+
auto Attrs =
68+
AttributeList::get(M->getContext(), 0, {"aarch64_pstate_sm_compatible"});
69+
FunctionCallee Callee =
70+
M->getOrInsertFunction("__arm_tpidr2_save", TPIDR2SaveTy, Attrs);
71+
Builder.CreateCall(Callee);
72+
73+
// A save to TPIDR2 should be followed by clearing TPIDR2_EL0.
74+
Function *WriteIntr =
75+
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_set_tpidr2);
76+
Builder.CreateCall(WriteIntr->getFunctionType(), WriteIntr,
77+
Builder.getInt64(0));
78+
}
79+
80+
/// This function generates code to commit a lazy save at the beginning of a
81+
/// function marked with `aarch64_pstate_za_new`. If the value read from
82+
/// TPIDR2_EL0 is not null on entry to the function then the lazy-saving scheme
83+
/// is active and we should call __arm_tpidr2_save to commit the lazy save.
84+
/// Additionally, PSTATE.ZA should be enabled at the beginning of the function
85+
/// and disabled before returning.
86+
bool SMEABI::updateNewZAFunctions(Module *M, Function *F,
87+
IRBuilder<> &Builder) {
88+
LLVMContext &Context = F->getContext();
89+
BasicBlock *OrigBB = &F->getEntryBlock();
90+
91+
// Create the new blocks for reading TPIDR2_EL0 & enabling ZA state.
92+
auto *SaveBB = OrigBB->splitBasicBlock(OrigBB->begin(), "save.za", true);
93+
auto *PreludeBB = BasicBlock::Create(Context, "prelude", F, SaveBB);
94+
95+
// Read TPIDR2_EL0 in PreludeBB & branch to SaveBB if not 0.
96+
Builder.SetInsertPoint(PreludeBB);
97+
Function *TPIDR2Intr =
98+
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_get_tpidr2);
99+
auto *TPIDR2 = Builder.CreateCall(TPIDR2Intr->getFunctionType(), TPIDR2Intr,
100+
{}, "tpidr2");
101+
auto *Cmp =
102+
Builder.CreateCmp(ICmpInst::ICMP_NE, TPIDR2, Builder.getInt64(0), "cmp");
103+
Builder.CreateCondBr(Cmp, SaveBB, OrigBB);
104+
105+
// Create a call __arm_tpidr2_save, which commits the lazy save.
106+
Builder.SetInsertPoint(&SaveBB->back());
107+
emitTPIDR2Save(M, Builder);
108+
109+
// Enable pstate.za at the start of the function.
110+
Builder.SetInsertPoint(&OrigBB->front());
111+
Function *EnableZAIntr =
112+
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_enable);
113+
Builder.CreateCall(EnableZAIntr->getFunctionType(), EnableZAIntr);
114+
115+
// Before returning, disable pstate.za
116+
for (BasicBlock &BB : F->getBasicBlockList()) {
117+
Instruction *T = BB.getTerminator();
118+
if (!T || !isa<ReturnInst>(T))
119+
continue;
120+
Builder.SetInsertPoint(T);
121+
Function *DisableZAIntr =
122+
Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_disable);
123+
Builder.CreateCall(DisableZAIntr->getFunctionType(), DisableZAIntr);
124+
}
125+
126+
F->addFnAttr("aarch64_expanded_pstate_za");
127+
return true;
128+
}
129+
130+
bool SMEABI::runOnFunction(Function &F) {
131+
Module *M = F.getParent();
132+
LLVMContext &Context = F.getContext();
133+
IRBuilder<> Builder(Context);
134+
135+
if (F.isDeclaration() || F.hasFnAttribute("aarch64_expanded_pstate_za"))
136+
return false;
137+
138+
bool Changed = false;
139+
SMEAttrs FnAttrs(F);
140+
if (FnAttrs.hasNewZAInterface())
141+
Changed |= updateNewZAFunctions(M, &F, Builder);
142+
143+
return Changed;
144+
}

llvm/test/CodeGen/AArch64/O0-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
2727
; CHECK-NEXT: Expand reduction intrinsics
2828
; CHECK-NEXT: AArch64 Stack Tagging
29+
; CHECK-NEXT: SME ABI Pass
2930
; CHECK-NEXT: Exception handling preparation
3031
; CHECK-NEXT: Safe Stack instrumentation pass
3132
; CHECK-NEXT: Insert stack protectors

llvm/test/CodeGen/AArch64/O3-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
; CHECK-NEXT: Interleaved Load Combine Pass
9393
; CHECK-NEXT: Dominator Tree Construction
9494
; CHECK-NEXT: Interleaved Access Pass
95+
; CHECK-NEXT: SME ABI Pass
9596
; CHECK-NEXT: Natural Loop Information
9697
; CHECK-NEXT: Type Promotion
9798
; CHECK-NEXT: CodeGen Prepare
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi %s | FileCheck %s
2+
; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi -aarch64-sme-abi %s | FileCheck %s
3+
4+
declare void @shared_za_callee() "aarch64_pstate_za_shared"
5+
6+
define void @private_za() "aarch64_pstate_za_new" {
7+
; CHECK-LABEL: @private_za(
8+
; CHECK-NEXT: prelude:
9+
; CHECK-NEXT: [[TPIDR2:%.*]] = call i64 @llvm.aarch64.sme.get.tpidr2()
10+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[TPIDR2]], 0
11+
; CHECK-NEXT: br i1 [[CMP]], label [[SAVE_ZA:%.*]], label [[TMP0:%.*]]
12+
; CHECK: save.za:
13+
; CHECK-NEXT: call void @__arm_tpidr2_save()
14+
; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0)
15+
; CHECK-NEXT: br label [[TMP0]]
16+
; CHECK: 0:
17+
; CHECK-NEXT: call void @llvm.aarch64.sme.za.enable()
18+
; CHECK-NEXT: call void @shared_za_callee()
19+
; CHECK-NEXT: call void @llvm.aarch64.sme.za.disable()
20+
; CHECK-NEXT: ret void
21+
;
22+
call void @shared_za_callee()
23+
ret void
24+
}
25+
26+
define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_pstate_za_new" {
27+
; CHECK-LABEL: @private_za_multiple_exit(
28+
; CHECK-NEXT: prelude:
29+
; CHECK-NEXT: [[TPIDR2:%.*]] = call i64 @llvm.aarch64.sme.get.tpidr2()
30+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[TPIDR2]], 0
31+
; CHECK-NEXT: br i1 [[CMP]], label [[SAVE_ZA:%.*]], label [[ENTRY:%.*]]
32+
; CHECK: save.za:
33+
; CHECK-NEXT: call void @__arm_tpidr2_save()
34+
; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0)
35+
; CHECK-NEXT: br label [[ENTRY]]
36+
; CHECK: entry:
37+
; CHECK-NEXT: call void @llvm.aarch64.sme.za.enable()
38+
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[COND:%.*]], 1
39+
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_ELSE:%.*]], label [[IF_END:%.*]]
40+
; CHECK: if.else:
41+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
42+
; CHECK-NEXT: call void @llvm.aarch64.sme.za.disable()
43+
; CHECK-NEXT: ret i32 [[ADD]]
44+
; CHECK: if.end:
45+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A]], [[B]]
46+
; CHECK-NEXT: call void @llvm.aarch64.sme.za.disable()
47+
; CHECK-NEXT: ret i32 [[SUB]]
48+
;
49+
entry:
50+
%tobool = icmp eq i64 %cond, 1
51+
br i1 %tobool, label %if.else, label %if.end
52+
53+
if.else:
54+
%add = add i32 %a, %b
55+
ret i32 %add
56+
57+
if.end:
58+
%sub = sub i32 %a, %b
59+
ret i32 %sub
60+
}
61+
62+
; CHECK: declare "aarch64_pstate_sm_compatible" void @__arm_tpidr2_save()

0 commit comments

Comments
 (0)