Skip to content

Commit 75769f2

Browse files
committed
[PGO] Make the PGO instrumentation insert point after alloca
We're changing PGO instrumentation to insert the intrinsic after alloca instructions. For sampled instrumentation, a conditional check is placed before the intrinsic. If this intrinsic comes before an alloca, the alloca (whose size might be unknown due to Phi node) becomes conditional, resulting in inefficient code. We have seen some stack overflows due to this. This patch guarantees the intrinsic is always after the alloca.
1 parent 040f5ee commit 75769f2

File tree

3 files changed

+43
-4
lines changed

3 files changed

+43
-4
lines changed

llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -855,7 +855,7 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
855855
auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
856856
// There are basic blocks (such as catchswitch) cannot be instrumented.
857857
// If the returned first insertion point is the end of BB, skip this BB.
858-
if (BB->getFirstInsertionPt() == BB->end())
858+
if (BB->getFirstNonPHIOrDbgOrAlloca() == BB->end())
859859
return nullptr;
860860
return BB;
861861
};
@@ -952,7 +952,7 @@ void FunctionInstrumenter::instrument() {
952952
Name, PointerType::get(M.getContext(), 0));
953953
if (PGOFunctionEntryCoverage) {
954954
auto &EntryBB = F.getEntryBlock();
955-
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
955+
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
956956
// llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
957957
// i32 <index>)
958958
Builder.CreateIntrinsic(
@@ -1010,7 +1010,7 @@ void FunctionInstrumenter::instrument() {
10101010
if (PGOTemporalInstrumentation) {
10111011
NumCounters += PGOBlockCoverage ? 8 : 1;
10121012
auto &EntryBB = F.getEntryBlock();
1013-
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
1013+
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
10141014
// llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
10151015
// i32 <index>)
10161016
Builder.CreateIntrinsic(Intrinsic::instrprof_timestamp,
@@ -1021,7 +1021,7 @@ void FunctionInstrumenter::instrument() {
10211021
}
10221022

10231023
for (auto *InstrBB : InstrumentBBs) {
1024-
IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
1024+
IRBuilder<> Builder(InstrBB, InstrBB->getFirstNonPHIOrDbgOrAlloca());
10251025
assert(Builder.GetInsertPoint() != InstrBB->end() &&
10261026
"Cannot get the Instrumentation point");
10271027
// llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; Note: Make sure that instrumention intrinsic is after entry alloca.
2+
;RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s
3+
4+
%struct.A = type { i32, [0 x i32] }
5+
6+
; CHECK-LABEL: @foo()
7+
; CHECK-NEXT: %1 = alloca %struct.A
8+
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo
9+
; CHECK-NEXT: call void @bar(ptr
10+
11+
define dso_local i32 @foo() {
12+
%1 = alloca %struct.A, align 4
13+
call void @bar(ptr noundef nonnull %1) #3
14+
%2 = load i32, ptr %1, align 4
15+
%3 = icmp sgt i32 %2, 0
16+
br i1 %3, label %4, label %15
17+
18+
4:
19+
%5 = getelementptr inbounds i8, ptr %1, i64 4
20+
%6 = zext nneg i32 %2 to i64
21+
br label %7
22+
23+
7:
24+
%8 = phi i64 [ 0, %4 ], [ %13, %7 ]
25+
%9 = phi i32 [ 0, %4 ], [ %12, %7 ]
26+
%10 = getelementptr inbounds [0 x i32], ptr %5, i64 0, i64 %8
27+
%11 = load i32, ptr %10, align 4
28+
%12 = add nsw i32 %11, %9
29+
%13 = add nuw nsw i64 %8, 1
30+
%14 = icmp eq i64 %13, %6
31+
br i1 %14, label %15, label %7
32+
33+
15:
34+
%16 = phi i32 [ 0, %0 ], [ %12, %7 ]
35+
ret i32 %16
36+
}
37+
38+
declare void @bar(ptr noundef)

llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ if.end: ; preds = %if.end.preheader, %
4242
;; The edge will not be profiled.
4343
; CHECK-LABEL: @cannot_split(
4444
; CHECK-NEXT: entry:
45+
; CHECK-NEXT: %targets = alloca <2 x ptr>, align 16
4546
; CHECK-NEXT: call void @llvm.instrprof.increment
4647
; CHECK: indirect:
4748
; CHECK-NOT: call void @llvm.instrprof.increment

0 commit comments

Comments
 (0)