-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Inline] Accumulate the cost of the inlined function to the new call site #111104
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: DianQK (DianQK) ChangesFixes #111102. Consider the following IR, the @<!-- -->define void foo() {
...
loop:
call void @<!-- -->bar()
...
}
define void @<!-- -->bar() {
...
call void @<!-- -->baz()
...
}
define void @<!-- -->baz() {
...
} If I did some experimenting on DianQK/perf/inline-extra-cost and the current thresholds look OK. I'm looking forward to getting some feedback before modifying this threshold to an option. Full diff: https://github.com/llvm/llvm-project/pull/111104.diff 8 Files Affected:
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 871a6e97861e29..2880553817ebd8 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -74,7 +74,8 @@ class InlineAdvisor;
class InlineAdvice {
public:
InlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
- OptimizationRemarkEmitter &ORE, bool IsInliningRecommended);
+ OptimizationRemarkEmitter &ORE, bool IsInliningRecommended,
+ std::optional<int> InliningCost = std::nullopt);
InlineAdvice(InlineAdvice &&) = delete;
InlineAdvice(const InlineAdvice &) = delete;
@@ -108,6 +109,7 @@ class InlineAdvice {
/// Get the inlining recommendation.
bool isInliningRecommended() const { return IsInliningRecommended; }
+ std::optional<int> inliningCost() const { return InliningCost; }
const DebugLoc &getOriginalCallSiteDebugLoc() const { return DLoc; }
const BasicBlock *getOriginalCallSiteBasicBlock() const { return Block; }
@@ -129,6 +131,7 @@ class InlineAdvice {
const BasicBlock *const Block;
OptimizationRemarkEmitter &ORE;
const bool IsInliningRecommended;
+ const std::optional<int> InliningCost;
private:
void markRecorded() {
@@ -145,8 +148,11 @@ class DefaultInlineAdvice : public InlineAdvice {
DefaultInlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
std::optional<InlineCost> OIC,
OptimizationRemarkEmitter &ORE, bool EmitRemarks = true)
- : InlineAdvice(Advisor, CB, ORE, OIC.has_value()), OriginalCB(&CB),
- OIC(OIC), EmitRemarks(EmitRemarks) {}
+ : InlineAdvice(Advisor, CB, ORE, OIC.has_value(),
+ OIC && OIC->isVariable()
+ ? std::optional<int>(OIC->getCost())
+ : std::nullopt),
+ OriginalCB(&CB), OIC(OIC), EmitRemarks(EmitRemarks) {}
private:
void recordUnsuccessfulInliningImpl(const InlineResult &Result) override;
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index c5978ce54fc18b..1190308dba4193 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -58,6 +58,9 @@ const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;
const char FunctionInlineCostMultiplierAttributeName[] =
"function-inline-cost-multiplier";
+/// Cost of call site accumulation added after inlining.
+const char FunctionInlineAdditionalCostAttributeName[] =
+ "function-inline-additional-cost";
const char MaxInlineStackSizeAttributeName[] = "inline-max-stacksize";
} // namespace InlineConstants
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index c6907cb128bb47..9706dd212d7bc5 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -175,10 +175,12 @@ DefaultInlineAdvisor::getAdviceImpl(CallBase &CB) {
InlineAdvice::InlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
OptimizationRemarkEmitter &ORE,
- bool IsInliningRecommended)
+ bool IsInliningRecommended,
+ std::optional<int> InliningCost)
: Advisor(Advisor), Caller(CB.getCaller()), Callee(CB.getCalledFunction()),
DLoc(CB.getDebugLoc()), Block(CB.getParent()), ORE(ORE),
- IsInliningRecommended(IsInliningRecommended) {}
+ IsInliningRecommended(IsInliningRecommended), InliningCost(InliningCost) {
+}
void InlineAdvice::recordInlineStatsIfNeeded() {
if (Advisor->ImportedFunctionsStats)
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index d2c329ba748e58..e45423f2130d8a 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1017,6 +1017,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
InlineConstants::FunctionInlineCostMultiplierAttributeName))
Cost *= *AttrCostMult;
+ if (std::optional<int> AttrAdditonalCost = getStringFnAttrAsInt(
+ CandidateCall,
+ InlineConstants::FunctionInlineAdditionalCostAttributeName))
+ Cost += *AttrAdditonalCost;
+
if (std::optional<int> AttrThreshold =
getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
Threshold = *AttrThreshold;
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 23ee23eb047f58..977bbcd35f73e8 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -376,6 +376,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
getStringFnAttrAsInt(
*CB, InlineConstants::FunctionInlineCostMultiplierAttributeName)
.value_or(1);
+ int CBInliningAdditionalCost =
+ getStringFnAttrAsInt(
+ *CB, InlineConstants::FunctionInlineAdditionalCostAttributeName)
+ .value_or(0);
+ std::optional<int> InliningCost = Advice->inliningCost();
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
@@ -435,6 +440,16 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
InlineConstants::FunctionInlineCostMultiplierAttributeName,
itostr(CBCostMult * IntraSCCCostMultiplier));
ICB->addFnAttr(NewCBCostMult);
+ } else if (InliningCost && *InliningCost > 0) {
+ // Similar to hot call site thresholds that can cause Inliner to
+ // inline numerous functions causing compile time issues, a
+ // linear accumulator was created to mitigate the problem.
+ Attribute NewCBAdditionalCost = Attribute::get(
+ M.getContext(),
+ InlineConstants::FunctionInlineAdditionalCostAttributeName,
+ itostr(CBInliningAdditionalCost +
+ (*InliningCost - CBInliningAdditionalCost) / 16));
+ ICB->addFnAttr(NewCBAdditionalCost);
}
}
}
diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
index dbc733826944b9..c196be9d6dd163 100644
--- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp
+++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
@@ -225,6 +225,11 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
Advice->recordUnattemptedInlining();
continue;
}
+ int CBInliningAdditionalCost =
+ getStringFnAttrAsInt(
+ *CB, InlineConstants::FunctionInlineAdditionalCostAttributeName)
+ .value_or(0);
+ std::optional<int> InliningCost = Advice->inliningCost();
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
@@ -265,8 +270,20 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
NewCallee = ICB->getCalledFunction();
}
if (NewCallee)
- if (!NewCallee->isDeclaration())
+ if (!NewCallee->isDeclaration()) {
Calls->push({ICB, NewHistoryID});
+ if (InliningCost && *InliningCost > 0) {
+ // Similar to hot call site thresholds that can cause Inliner to
+ // inline numerous functions causing compile time issues, a linear
+ // accumulator was created to mitigate the problem.
+ Attribute NewCBAdditionalCost = Attribute::get(
+ M.getContext(),
+ InlineConstants::FunctionInlineAdditionalCostAttributeName,
+ itostr(CBInliningAdditionalCost +
+ (*InliningCost - CBInliningAdditionalCost) / 16));
+ ICB->addFnAttr(NewCBAdditionalCost);
+ }
+ }
}
}
diff --git a/llvm/test/Transforms/Inline/inline-history-noinline.ll b/llvm/test/Transforms/Inline/inline-history-noinline.ll
index 742bd25ecd9bb9..fbe633fc3c797b 100644
--- a/llvm/test/Transforms/Inline/inline-history-noinline.ll
+++ b/llvm/test/Transforms/Inline/inline-history-noinline.ll
@@ -29,4 +29,4 @@ define internal void @a() {
ret void
}
-; CHECK: [[NOINLINE]] = { noinline }
+; CHECK: [[NOINLINE]] = { noinline {{.*}}}
diff --git a/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll b/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll
new file mode 100644
index 00000000000000..a1730d76cd547c
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; This tests that a hot callsite gets the (higher) inlinehint-threshold even without
+; without inline hints and gets inlined because the cost is less than
+; RUN: opt < %s -passes=inline -inline-threshold=0 -locally-hot-callsite-threshold=30 -S | FileCheck %s
+; RUN: opt < %s -passes=module-inline -inline-threshold=0 -locally-hot-callsite-threshold=30 -S | FileCheck %s
+
+; Due to the hot call site, foo0 inlined foo1, foo2, and foo3,
+; but foo4 is not inlined due to the accumulated cost.
+
+declare void @bar(ptr)
+
+define void @foo0(ptr %p) {
+; CHECK-LABEL: define void @foo0(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[HEADER:.*:]]
+; CHECK-NEXT: [[I_I2:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[I_I1:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I]])
+; CHECK-NEXT: call void @bar(ptr [[I_I]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I1]])
+; CHECK-NEXT: call void @bar(ptr [[I_I1]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I2]])
+; CHECK-NEXT: call void @bar(ptr [[I_I2]])
+; CHECK-NEXT: call void @foo4(ptr [[P]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I2]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I1]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I]])
+; CHECK-NEXT: br label %[[LOOP]]
+;
+header:
+ br label %loop
+
+loop:
+ call void @foo1(ptr %p)
+ br label %loop
+}
+
+define void @foo1(ptr %p) {
+; CHECK-LABEL: define void @foo1(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @foo2(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @foo2(ptr %p)
+ ret void
+}
+
+define void @foo2(ptr %p) {
+; CHECK-LABEL: define void @foo2(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @foo3(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @foo3(ptr %p)
+ ret void
+}
+
+define void @foo3(ptr %p) {
+; CHECK-LABEL: define void @foo3(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @foo4(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @foo4(ptr %p)
+ ret void
+}
+
+define void @foo4(ptr %p) {
+; CHECK-LABEL: define void @foo4(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @foo5(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @foo5(ptr %p)
+ ret void
+}
+
+define void @foo5(ptr %p) {
+; CHECK-LABEL: define void @foo5(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @bar(ptr %i)
+ ret void
+}
|
@llvm/pr-subscribers-llvm-analysis Author: DianQK (DianQK) ChangesFixes #111102. Consider the following IR, the @<!-- -->define void foo() {
...
loop:
call void @<!-- -->bar()
...
}
define void @<!-- -->bar() {
...
call void @<!-- -->baz()
...
}
define void @<!-- -->baz() {
...
} If I did some experimenting on DianQK/perf/inline-extra-cost and the current thresholds look OK. I'm looking forward to getting some feedback before modifying this threshold to an option. Full diff: https://github.com/llvm/llvm-project/pull/111104.diff 8 Files Affected:
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 871a6e97861e29..2880553817ebd8 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -74,7 +74,8 @@ class InlineAdvisor;
class InlineAdvice {
public:
InlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
- OptimizationRemarkEmitter &ORE, bool IsInliningRecommended);
+ OptimizationRemarkEmitter &ORE, bool IsInliningRecommended,
+ std::optional<int> InliningCost = std::nullopt);
InlineAdvice(InlineAdvice &&) = delete;
InlineAdvice(const InlineAdvice &) = delete;
@@ -108,6 +109,7 @@ class InlineAdvice {
/// Get the inlining recommendation.
bool isInliningRecommended() const { return IsInliningRecommended; }
+ std::optional<int> inliningCost() const { return InliningCost; }
const DebugLoc &getOriginalCallSiteDebugLoc() const { return DLoc; }
const BasicBlock *getOriginalCallSiteBasicBlock() const { return Block; }
@@ -129,6 +131,7 @@ class InlineAdvice {
const BasicBlock *const Block;
OptimizationRemarkEmitter &ORE;
const bool IsInliningRecommended;
+ const std::optional<int> InliningCost;
private:
void markRecorded() {
@@ -145,8 +148,11 @@ class DefaultInlineAdvice : public InlineAdvice {
DefaultInlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
std::optional<InlineCost> OIC,
OptimizationRemarkEmitter &ORE, bool EmitRemarks = true)
- : InlineAdvice(Advisor, CB, ORE, OIC.has_value()), OriginalCB(&CB),
- OIC(OIC), EmitRemarks(EmitRemarks) {}
+ : InlineAdvice(Advisor, CB, ORE, OIC.has_value(),
+ OIC && OIC->isVariable()
+ ? std::optional<int>(OIC->getCost())
+ : std::nullopt),
+ OriginalCB(&CB), OIC(OIC), EmitRemarks(EmitRemarks) {}
private:
void recordUnsuccessfulInliningImpl(const InlineResult &Result) override;
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index c5978ce54fc18b..1190308dba4193 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -58,6 +58,9 @@ const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;
const char FunctionInlineCostMultiplierAttributeName[] =
"function-inline-cost-multiplier";
+/// Cost of call site accumulation added after inlining.
+const char FunctionInlineAdditionalCostAttributeName[] =
+ "function-inline-additional-cost";
const char MaxInlineStackSizeAttributeName[] = "inline-max-stacksize";
} // namespace InlineConstants
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index c6907cb128bb47..9706dd212d7bc5 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -175,10 +175,12 @@ DefaultInlineAdvisor::getAdviceImpl(CallBase &CB) {
InlineAdvice::InlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
OptimizationRemarkEmitter &ORE,
- bool IsInliningRecommended)
+ bool IsInliningRecommended,
+ std::optional<int> InliningCost)
: Advisor(Advisor), Caller(CB.getCaller()), Callee(CB.getCalledFunction()),
DLoc(CB.getDebugLoc()), Block(CB.getParent()), ORE(ORE),
- IsInliningRecommended(IsInliningRecommended) {}
+ IsInliningRecommended(IsInliningRecommended), InliningCost(InliningCost) {
+}
void InlineAdvice::recordInlineStatsIfNeeded() {
if (Advisor->ImportedFunctionsStats)
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index d2c329ba748e58..e45423f2130d8a 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1017,6 +1017,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
InlineConstants::FunctionInlineCostMultiplierAttributeName))
Cost *= *AttrCostMult;
+ if (std::optional<int> AttrAdditonalCost = getStringFnAttrAsInt(
+ CandidateCall,
+ InlineConstants::FunctionInlineAdditionalCostAttributeName))
+ Cost += *AttrAdditonalCost;
+
if (std::optional<int> AttrThreshold =
getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
Threshold = *AttrThreshold;
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 23ee23eb047f58..977bbcd35f73e8 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -376,6 +376,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
getStringFnAttrAsInt(
*CB, InlineConstants::FunctionInlineCostMultiplierAttributeName)
.value_or(1);
+ int CBInliningAdditionalCost =
+ getStringFnAttrAsInt(
+ *CB, InlineConstants::FunctionInlineAdditionalCostAttributeName)
+ .value_or(0);
+ std::optional<int> InliningCost = Advice->inliningCost();
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
@@ -435,6 +440,16 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
InlineConstants::FunctionInlineCostMultiplierAttributeName,
itostr(CBCostMult * IntraSCCCostMultiplier));
ICB->addFnAttr(NewCBCostMult);
+ } else if (InliningCost && *InliningCost > 0) {
+ // Similar to hot call site thresholds that can cause Inliner to
+ // inline numerous functions causing compile time issues, a
+ // linear accumulator was created to mitigate the problem.
+ Attribute NewCBAdditionalCost = Attribute::get(
+ M.getContext(),
+ InlineConstants::FunctionInlineAdditionalCostAttributeName,
+ itostr(CBInliningAdditionalCost +
+ (*InliningCost - CBInliningAdditionalCost) / 16));
+ ICB->addFnAttr(NewCBAdditionalCost);
}
}
}
diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
index dbc733826944b9..c196be9d6dd163 100644
--- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp
+++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
@@ -225,6 +225,11 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
Advice->recordUnattemptedInlining();
continue;
}
+ int CBInliningAdditionalCost =
+ getStringFnAttrAsInt(
+ *CB, InlineConstants::FunctionInlineAdditionalCostAttributeName)
+ .value_or(0);
+ std::optional<int> InliningCost = Advice->inliningCost();
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
@@ -265,8 +270,20 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
NewCallee = ICB->getCalledFunction();
}
if (NewCallee)
- if (!NewCallee->isDeclaration())
+ if (!NewCallee->isDeclaration()) {
Calls->push({ICB, NewHistoryID});
+ if (InliningCost && *InliningCost > 0) {
+ // Similar to hot call site thresholds that can cause Inliner to
+ // inline numerous functions causing compile time issues, a linear
+ // accumulator was created to mitigate the problem.
+ Attribute NewCBAdditionalCost = Attribute::get(
+ M.getContext(),
+ InlineConstants::FunctionInlineAdditionalCostAttributeName,
+ itostr(CBInliningAdditionalCost +
+ (*InliningCost - CBInliningAdditionalCost) / 16));
+ ICB->addFnAttr(NewCBAdditionalCost);
+ }
+ }
}
}
diff --git a/llvm/test/Transforms/Inline/inline-history-noinline.ll b/llvm/test/Transforms/Inline/inline-history-noinline.ll
index 742bd25ecd9bb9..fbe633fc3c797b 100644
--- a/llvm/test/Transforms/Inline/inline-history-noinline.ll
+++ b/llvm/test/Transforms/Inline/inline-history-noinline.ll
@@ -29,4 +29,4 @@ define internal void @a() {
ret void
}
-; CHECK: [[NOINLINE]] = { noinline }
+; CHECK: [[NOINLINE]] = { noinline {{.*}}}
diff --git a/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll b/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll
new file mode 100644
index 00000000000000..a1730d76cd547c
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; This tests that a hot callsite gets the (higher) inlinehint-threshold even without
+; without inline hints and gets inlined because the cost is less than
+; RUN: opt < %s -passes=inline -inline-threshold=0 -locally-hot-callsite-threshold=30 -S | FileCheck %s
+; RUN: opt < %s -passes=module-inline -inline-threshold=0 -locally-hot-callsite-threshold=30 -S | FileCheck %s
+
+; Due to the hot call site, foo0 inlined foo1, foo2, and foo3,
+; but foo4 is not inlined due to the accumulated cost.
+
+declare void @bar(ptr)
+
+define void @foo0(ptr %p) {
+; CHECK-LABEL: define void @foo0(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[HEADER:.*:]]
+; CHECK-NEXT: [[I_I2:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[I_I1:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I]])
+; CHECK-NEXT: call void @bar(ptr [[I_I]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I1]])
+; CHECK-NEXT: call void @bar(ptr [[I_I1]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I2]])
+; CHECK-NEXT: call void @bar(ptr [[I_I2]])
+; CHECK-NEXT: call void @foo4(ptr [[P]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I2]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I1]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I]])
+; CHECK-NEXT: br label %[[LOOP]]
+;
+header:
+ br label %loop
+
+loop:
+ call void @foo1(ptr %p)
+ br label %loop
+}
+
+define void @foo1(ptr %p) {
+; CHECK-LABEL: define void @foo1(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @foo2(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @foo2(ptr %p)
+ ret void
+}
+
+define void @foo2(ptr %p) {
+; CHECK-LABEL: define void @foo2(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @foo3(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @foo3(ptr %p)
+ ret void
+}
+
+define void @foo3(ptr %p) {
+; CHECK-LABEL: define void @foo3(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @foo4(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @foo4(ptr %p)
+ ret void
+}
+
+define void @foo4(ptr %p) {
+; CHECK-LABEL: define void @foo4(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @foo5(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @foo5(ptr %p)
+ ret void
+}
+
+define void @foo5(ptr %p) {
+; CHECK-LABEL: define void @foo5(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @bar(ptr %i)
+ ret void
+}
|
@@ -0,0 +1,109 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | |||
; This tests that a hot callsite gets the (higher) inlinehint-threshold even without | |||
; without inline hints and gets inlined because the cost is less than |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
; without inline hints and gets inlined because the cost is less than | |
; inline hints and gets inlined because the cost is less than ??? |
I believe part of the design of the new inliner was that it should be idempotent. As in rerunning the inliner should not result in any changes. If you accumulate inline costs between function calls, that breaks idempotency. I'd want more justification besides a vague "the inliner inlined too much stuff in one case" to break that. |
I think I've done something similar to the
In fact, I believe the main reason for the increased compile time is that we've inlined nested loops multiple times. |
Fixes #111102.
Consider the following IR, the
call void @bar
is a local host call site:If
foo
can inlinebaz
after inlinedbar
, I think it can also inline abar
that has inlinedbaz
. With this in mind, I accumulated the previous cost into the new call site, which is a linearly increasing limit.I did some experimenting on DianQK/perf/inline-extra-cost and the current thresholds look OK. I'm looking forward to getting some feedback before modifying this threshold to an option.