Skip to content

Commit a60cdd3

Browse files
committed
Add function importing info from samplepgo profile to the module summary.
Summary: For SamplePGO, the profile may contain cross-module inline stacks. As we need to make sure the profile annotation happens when all the hot inline stacks are expanded, we need to pass this info to the module importer so that it can import proper functions if necessary. This patch implemented this feature by emitting cross-module targets as part of function entry metadata. In the module-summary phase, the metadata is used to build call edges that points to functions need to be imported. Reviewers: mehdi_amini, tejohnson Reviewed By: tejohnson Subscribers: davidxl, llvm-commits Differential Revision: https://reviews.llvm.org/D30053 llvm-svn: 296498
1 parent 2fdabb0 commit a60cdd3

File tree

14 files changed

+139
-29
lines changed

14 files changed

+139
-29
lines changed

llvm/docs/BranchWeightMetadata.rst

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,11 +123,11 @@ To allow comparing different functions during inter-procedural analysis and
123123
optimization, ``MD_prof`` nodes can also be assigned to a function definition.
124124
The first operand is a string indicating the name of the associated counter.
125125

126-
Currently, one counter is supported: "function_entry_count". This is a 64-bit
127-
counter that indicates the number of times that this function was invoked (in
128-
the case of instrumentation-based profiles). In the case of sampling-based
129-
profiles, this counter is an approximation of how many times the function was
130-
invoked.
126+
Currently, one counter is supported: "function_entry_count". The second operand
127+
is a 64-bit counter that indicates the number of times that this function was
128+
invoked (in the case of instrumentation-based profiles). In the case of
129+
sampling-based profiles, this operand is an approximation of how many times
130+
the function was invoked.
131131

132132
For example, in the code below, the instrumentation for function foo()
133133
indicates that it was called 2,590 times at runtime.
@@ -138,3 +138,13 @@ indicates that it was called 2,590 times at runtime.
138138
ret i32 0
139139
}
140140
!1 = !{!"function_entry_count", i64 2590}
141+
142+
If "function_entry_count" has more than 2 operands, the later operands are
143+
the GUID of the functions that needs to be imported by ThinLTO. This is only
144+
set by sampling based profile. It is needed because the sampling based profile
145+
was collected on a binary that had already imported and inlined these functions,
146+
and we need to ensure the IR matches in the ThinLTO backends for profile
147+
annotation. The reason why we cannot annotate this on the callsite is that it
148+
can only goes down 1 level in the call chain. For the cases where
149+
foo_in_a_cc()->bar_in_b_cc()->baz_in_c_cc(), we will need to go down 2 levels
150+
in the call chain to import both bar_in_b_cc and baz_in_c_cc.

llvm/include/llvm/IR/Function.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#ifndef LLVM_IR_FUNCTION_H
1919
#define LLVM_IR_FUNCTION_H
2020

21+
#include "llvm/ADT/DenseSet.h"
2122
#include "llvm/ADT/ilist_node.h"
2223
#include "llvm/ADT/iterator_range.h"
2324
#include "llvm/ADT/StringRef.h"
@@ -207,15 +208,22 @@ class Function : public GlobalObject, public ilist_node<Function> {
207208
/// \brief Set the entry count for this function.
208209
///
209210
/// Entry count is the number of times this function was executed based on
210-
/// pgo data.
211-
void setEntryCount(uint64_t Count);
211+
/// pgo data. \p Imports points to a set of GUIDs that needs to be imported
212+
/// by the function for sample PGO, to enable the same inlines as the
213+
/// profiled optimized binary.
214+
void setEntryCount(uint64_t Count,
215+
const DenseSet<GlobalValue::GUID> *Imports = nullptr);
212216

213217
/// \brief Get the entry count for this function.
214218
///
215219
/// Entry count is the number of times the function was executed based on
216220
/// pgo data.
217221
Optional<uint64_t> getEntryCount() const;
218222

223+
/// Returns the set of GUIDs that needs to be imported to the function for
224+
/// sample PGO, to enable the same inlines as the profiled optimized binary.
225+
DenseSet<GlobalValue::GUID> getImportGUIDs() const;
226+
219227
/// Set the section prefix for this function.
220228
void setSectionPrefix(StringRef Prefix);
221229

llvm/include/llvm/IR/MDBuilder.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
#ifndef LLVM_IR_MDBUILDER_H
1616
#define LLVM_IR_MDBUILDER_H
1717

18+
#include "llvm/ADT/DenseSet.h"
1819
#include "llvm/ADT/StringRef.h"
20+
#include "llvm/IR/GlobalValue.h"
1921
#include "llvm/Support/DataTypes.h"
2022
#include <utility>
2123

@@ -63,8 +65,11 @@ class MDBuilder {
6365
/// Return metadata specifying that a branch or switch is unpredictable.
6466
MDNode *createUnpredictable();
6567

66-
/// Return metadata containing the entry count for a function.
67-
MDNode *createFunctionEntryCount(uint64_t Count);
68+
/// Return metadata containing the entry \p Count for a function, and the
69+
/// GUIDs stored in \p Imports that need to be imported for sample PGO, to
70+
/// enable the same inlines as the profiled optimized binary
71+
MDNode *createFunctionEntryCount(uint64_t Count,
72+
const DenseSet<GlobalValue::GUID> *Imports);
6873

6974
/// Return metadata containing the section prefix for a function.
7075
MDNode *createFunctionSectionPrefix(StringRef Prefix);

llvm/include/llvm/ProfileData/SampleProf.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@
1515
#ifndef LLVM_PROFILEDATA_SAMPLEPROF_H_
1616
#define LLVM_PROFILEDATA_SAMPLEPROF_H_
1717

18+
#include "llvm/ADT/DenseSet.h"
1819
#include "llvm/ADT/SmallVector.h"
1920
#include "llvm/ADT/StringMap.h"
21+
#include "llvm/IR/GlobalValue.h"
22+
#include "llvm/IR/Module.h"
2023
#include "llvm/Support/Debug.h"
2124
#include "llvm/Support/ErrorOr.h"
2225
#include "llvm/Support/raw_ostream.h"
@@ -300,6 +303,20 @@ class FunctionSamples {
300303
return Result;
301304
}
302305

306+
/// Recursively traverses all children, if the corresponding function is
307+
/// not defined in module \p M, and its total sample is no less than
308+
/// \p Threshold, add its corresponding GUID to \p S.
309+
void findImportedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M,
310+
uint64_t Threshold) const {
311+
if (TotalSamples <= Threshold)
312+
return;
313+
Function *F = M->getFunction(Name);
314+
if (!F || !F->getSubprogram())
315+
S.insert(Function::getGUID(Name));
316+
for (auto CS : CallsiteSamples)
317+
CS.second.findImportedFunctions(S, M, Threshold);
318+
}
319+
303320
/// Set the name of the function.
304321
void setName(StringRef FunctionName) { Name = FunctionName; }
305322

llvm/lib/Analysis/ModuleSummaryAnalysis.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,11 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
259259
}
260260
}
261261

262+
// Explicit add hot edges to enforce importing for designated GUIDs for
263+
// sample PGO, to enable the same inlines as the profiled optimized binary.
264+
for (auto &I : F.getImportGUIDs())
265+
CallGraphEdges[I].updateHotness(CalleeInfo::HotnessType::Hot);
266+
262267
bool NonRenamableLocal = isNonRenamableLocal(F);
263268
bool NotEligibleForImport =
264269
NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||

llvm/lib/IR/Function.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,9 +1259,10 @@ void Function::setValueSubclassDataBit(unsigned Bit, bool On) {
12591259
setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit));
12601260
}
12611261

1262-
void Function::setEntryCount(uint64_t Count) {
1262+
void Function::setEntryCount(uint64_t Count,
1263+
const DenseSet<GlobalValue::GUID> *S) {
12631264
MDBuilder MDB(getContext());
1264-
setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count));
1265+
setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count, S));
12651266
}
12661267

12671268
Optional<uint64_t> Function::getEntryCount() const {
@@ -1278,6 +1279,18 @@ Optional<uint64_t> Function::getEntryCount() const {
12781279
return None;
12791280
}
12801281

1282+
DenseSet<GlobalValue::GUID> Function::getImportGUIDs() const {
1283+
DenseSet<GlobalValue::GUID> R;
1284+
if (MDNode *MD = getMetadata(LLVMContext::MD_prof))
1285+
if (MDString *MDS = dyn_cast<MDString>(MD->getOperand(0)))
1286+
if (MDS->getString().equals("function_entry_count"))
1287+
for (unsigned i = 2; i < MD->getNumOperands(); i++)
1288+
R.insert(mdconst::extract<ConstantInt>(MD->getOperand(i))
1289+
->getValue()
1290+
.getZExtValue());
1291+
return R;
1292+
}
1293+
12811294
void Function::setSectionPrefix(StringRef Prefix) {
12821295
MDBuilder MDB(getContext());
12831296
setMetadata(LLVMContext::MD_section_prefix,

llvm/lib/IR/MDBuilder.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,16 @@ MDNode *MDBuilder::createUnpredictable() {
5656
return MDNode::get(Context, None);
5757
}
5858

59-
MDNode *MDBuilder::createFunctionEntryCount(uint64_t Count) {
59+
MDNode *MDBuilder::createFunctionEntryCount(
60+
uint64_t Count, const DenseSet<GlobalValue::GUID> *Imports) {
6061
Type *Int64Ty = Type::getInt64Ty(Context);
61-
return MDNode::get(Context,
62-
{createString("function_entry_count"),
63-
createConstant(ConstantInt::get(Int64Ty, Count))});
62+
SmallVector<Metadata *, 8> Ops;
63+
Ops.push_back(createString("function_entry_count"));
64+
Ops.push_back(createConstant(ConstantInt::get(Int64Ty, Count)));
65+
if (Imports)
66+
for (auto ID : *Imports)
67+
Ops.push_back(createConstant(ConstantInt::get(Int64Ty, ID)));
68+
return MDNode::get(Context, Ops);
6469
}
6570

6671
MDNode *MDBuilder::createFunctionSectionPrefix(StringRef Prefix) {

llvm/lib/IR/Verifier.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1653,8 +1653,8 @@ void Verifier::verifyFunctionMetadata(
16531653
for (const auto &Pair : MDs) {
16541654
if (Pair.first == LLVMContext::MD_prof) {
16551655
MDNode *MD = Pair.second;
1656-
Assert(MD->getNumOperands() == 2,
1657-
"!prof annotations should have exactly 2 operands", MD);
1656+
Assert(MD->getNumOperands() >= 2,
1657+
"!prof annotations should have no less than 2 operands", MD);
16581658

16591659
// Check first operand.
16601660
Assert(MD->getOperand(0) != nullptr, "first operand should not be null",

llvm/lib/Transforms/IPO/SampleProfile.cpp

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,8 @@ class SampleProfileLoader {
163163
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
164164
const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
165165
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
166-
bool inlineHotFunctions(Function &F);
166+
bool inlineHotFunctions(Function &F,
167+
DenseSet<GlobalValue::GUID> &ImportGUIDs);
167168
void printEdgeWeight(raw_ostream &OS, Edge E);
168169
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
169170
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
@@ -604,9 +605,12 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
604605
/// it to direct call. Each indirect call is limited with a single target.
605606
///
606607
/// \param F function to perform iterative inlining.
608+
/// \param ImportGUIDs a set to be updated to include all GUIDs that come
609+
/// from a different module but inlined in the profiled binary.
607610
///
608611
/// \returns True if there is any inline happened.
609-
bool SampleProfileLoader::inlineHotFunctions(Function &F) {
612+
bool SampleProfileLoader::inlineHotFunctions(
613+
Function &F, DenseSet<GlobalValue::GUID> &ImportGUIDs) {
610614
DenseSet<Instruction *> PromotedInsns;
611615
bool Changed = false;
612616
LLVMContext &Ctx = F.getContext();
@@ -655,8 +659,12 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) {
655659
continue;
656660
}
657661
}
658-
if (!CalledFunction || !CalledFunction->getSubprogram())
662+
if (!CalledFunction || !CalledFunction->getSubprogram()) {
663+
findCalleeFunctionSamples(*I)->findImportedFunctions(
664+
ImportGUIDs, F.getParent(),
665+
Samples->getTotalSamples() * SampleProfileHotThreshold / 100);
659666
continue;
667+
}
660668
DebugLoc DLoc = I->getDebugLoc();
661669
uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples();
662670
if (InlineFunction(CallSite(DI), IFI)) {
@@ -1041,10 +1049,6 @@ void SampleProfileLoader::propagateWeights(Function &F) {
10411049
bool Changed = true;
10421050
unsigned I = 0;
10431051

1044-
// Add an entry count to the function using the samples gathered
1045-
// at the function entry.
1046-
F.setEntryCount(Samples->getHeadSamples() + 1);
1047-
10481052
// If BB weight is larger than its corresponding loop's header BB weight,
10491053
// use the BB weight to replace the loop header BB weight.
10501054
for (auto &BI : F) {
@@ -1273,12 +1277,19 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
12731277
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
12741278
<< ": " << getFunctionLoc(F) << "\n");
12751279

1276-
Changed |= inlineHotFunctions(F);
1280+
DenseSet<GlobalValue::GUID> ImportGUIDs;
1281+
Changed |= inlineHotFunctions(F, ImportGUIDs);
12771282

12781283
// Compute basic block weights.
12791284
Changed |= computeBlockWeights(F);
12801285

12811286
if (Changed) {
1287+
// Add an entry count to the function using the samples gathered at the
1288+
// function entry. Also sets the GUIDs that comes from a different
1289+
// module but inlined in the profiled binary. This is aiming at making
1290+
// the IR match the profiled binary before annotation.
1291+
F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs);
1292+
12821293
// Compute dominance and loop info needed for propagation.
12831294
computeDominanceAndLoopInfo(F);
12841295

llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
; CHECK-NEXT: <VERSION
1111
; See if the call to func is registered, using the expected callsite count
1212
; and profile count, with value id matching the subsequent value symbol table.
13-
; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=[[HOT1:.*]] op5=3 op6=[[COLD:.*]] op7=1 op8=[[HOT2:.*]] op9=3 op10=[[NONE1:.*]] op11=2 op12=[[HOT3:.*]] op13=3 op14=[[NONE2:.*]] op15=2 op16=[[NONE3:.*]] op17=2/>
13+
; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=[[HOT1:.*]] op5=3 op6=[[COLD:.*]] op7=1 op8=[[HOT2:.*]] op9=3 op10=[[NONE1:.*]] op11=2 op12=[[HOT3:.*]] op13=3 op14=[[NONE2:.*]] op15=2 op16=[[NONE3:.*]] op17=2 op18=[[LEGACY:.*]] op19=3/>
1414
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
1515
; CHECK-LABEL: <VALUE_SYMTAB
1616
; CHECK-NEXT: <FNENTRY {{.*}} record string = 'hot_function
@@ -21,6 +21,7 @@
2121
; CHECK-DAG: <ENTRY abbrevid=6 op0=[[HOT1]] {{.*}} record string = 'hot1'
2222
; CHECK-DAG: <ENTRY abbrevid=6 op0=[[HOT2]] {{.*}} record string = 'hot2'
2323
; CHECK-DAG: <ENTRY abbrevid=6 op0=[[HOT3]] {{.*}} record string = 'hot3'
24+
; CHECK-DAG: <COMBINED_ENTRY abbrevid=11 op0=[[LEGACY]] op1=123/>
2425
; CHECK-LABEL: </VALUE_SYMTAB>
2526

2627
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
@@ -80,7 +81,7 @@ declare void @none3() #1
8081

8182

8283
!llvm.module.flags = !{!1}
83-
!20 = !{!"function_entry_count", i64 110}
84+
!20 = !{!"function_entry_count", i64 110, i64 123}
8485

8586
!1 = !{i32 1, !"ProfileSummary", !2}
8687
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
main:10000:0
2+
3: foo:1000
3+
3: bar:200
4+
4: baz:10
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/import.prof -S | FileCheck %s
2+
3+
; Tests whether the functions in the inline stack are added to the
4+
; function_entry_count metadata.
5+
6+
declare void @foo()
7+
8+
define void @main() !dbg !7 {
9+
call void @foo(), !dbg !18
10+
ret void
11+
}
12+
13+
; GUIDs of foo and bar should be included in the metadata to make sure hot
14+
; inline stacks are imported.
15+
; CHECK: !{!"function_entry_count", i64 1, i64 6699318081062747564, i64 -2012135647395072713}
16+
17+
!llvm.dbg.cu = !{!0}
18+
!llvm.module.flags = !{!8, !9}
19+
!llvm.ident = !{!10}
20+
21+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
22+
!1 = !DIFile(filename: "calls.cc", directory: ".")
23+
!2 = !{}
24+
!6 = !DISubroutineType(types: !2)
25+
!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, variables: !2)
26+
!8 = !{i32 2, !"Dwarf Version", i32 4}
27+
!9 = !{i32 1, !"Debug Info Version", i32 3}
28+
!10 = !{!"clang version 3.5 "}
29+
!15 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !7)
30+
!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
31+
!18 = !DILocation(line: 10, scope: !17)

llvm/test/Verifier/function-metadata-bad.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ define i32 @bad2() !prof !1 {
1414
}
1515

1616
!1 = !{!"function_entry_count"}
17-
; CHECK-NEXT: !prof annotations should have exactly 2 operands
17+
; CHECK-NEXT: !prof annotations should have no less than 2 operands
1818
; CHECK-NEXT: !1 = !{!"function_entry_count"}
1919

2020

llvm/test/Verifier/metadata-function-prof.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@ define void @f3() !prof !0 !prof !0 {
1212
unreachable
1313
}
1414

15-
!0 = !{}
15+
!0 = !{!"function_entry_count", i64 100}

0 commit comments

Comments
 (0)