Skip to content

Commit 93db40a

Browse files
committed
Always_inline codegen rewrite.
Current implementation may end up emitting an undefined reference for an "inline __attribute__((always_inline))" function by generating an "available_externally alwaysinline" IR function for it and then failing to inline all the calls. This happens when a call to such function is in dead code. As the inliner is an SCC pass, it does not process dead code. Libc++ relies on the compiler never emitting such undefined reference. With this patch, we emit a pair of 1. internal alwaysinline definition (called F.alwaysinline) 2a. A stub F() { musttail call F.alwaysinline } -- or, depending on the linkage -- 2b. A declaration of F. The frontend ensures that F.inlinefunction is only used for direct calls, and the stub is used for everything else (taking the address of the function, really). Declaration (2b) is emitted in the case when "inline" is meant for inlining only (like __gnu_inline__ and some other cases). This approach, among other nice properties, ensures that alwaysinline functions are always internal, making it impossible for a direct call to such function to produce an undefined symbol reference. This patch is based on ideas by Chandler Carruth and Richard Smith. llvm-svn: 247494
1 parent e299bc5 commit 93db40a

16 files changed

+361
-28
lines changed

clang/lib/CodeGen/CGCXX.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) {
109109
D->getType()->getAs<FunctionType>()->getCallConv())
110110
return true;
111111

112+
if (BaseD->hasAttr<AlwaysInlineAttr>())
113+
return true;
114+
112115
return TryEmitDefinitionAsAlias(GlobalDecl(D, Dtor_Base),
113116
GlobalDecl(BaseD, Dtor_Base),
114117
false);
@@ -161,14 +164,7 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
161164

162165
// Instead of creating as alias to a linkonce_odr, replace all of the uses
163166
// of the aliasee.
164-
if (llvm::GlobalValue::isDiscardableIfUnused(Linkage) &&
165-
(TargetLinkage != llvm::GlobalValue::AvailableExternallyLinkage ||
166-
!TargetDecl.getDecl()->hasAttr<AlwaysInlineAttr>())) {
167-
// FIXME: An extern template instantiation will create functions with
168-
// linkage "AvailableExternally". In libc++, some classes also define
169-
// members with attribute "AlwaysInline" and expect no reference to
170-
// be generated. It is desirable to reenable this optimisation after
171-
// corresponding LLVM changes.
167+
if (llvm::GlobalValue::isDiscardableIfUnused(Linkage)) {
172168
Replacements[MangledName] = Aliasee;
173169
return false;
174170
}

clang/lib/CodeGen/CGClass.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1557,7 +1557,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
15571557
// -fapple-kext must inline any call to this dtor into
15581558
// the caller's body.
15591559
if (getLangOpts().AppleKext)
1560-
CurFn->addFnAttr(llvm::Attribute::AlwaysInline);
1560+
CGM.AddAlwaysInlineFunction(CurFn);
15611561

15621562
break;
15631563
}

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2114,7 +2114,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
21142114
".omp_task_privates_map.", &CGM.getModule());
21152115
CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo,
21162116
TaskPrivatesMap);
2117-
TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
2117+
CGM.AddAlwaysInlineFunction(TaskPrivatesMap);
21182118
CodeGenFunction CGF(CGM);
21192119
CGF.disableDebugInfo();
21202120
CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,109 @@ void CodeGenModule::Release() {
448448
EmitVersionIdentMetadata();
449449

450450
EmitTargetMetadata();
451+
452+
RewriteAlwaysInlineFunctions();
453+
}
454+
455+
void CodeGenModule::AddAlwaysInlineFunction(llvm::Function *Fn) {
456+
AlwaysInlineFunctions.push_back(Fn);
457+
}
458+
459+
/// Find all uses of GV that are not direct calls or invokes.
460+
static void FindNonDirectCallUses(llvm::GlobalValue *GV,
461+
llvm::SmallVectorImpl<llvm::Use *> *Uses) {
462+
llvm::GlobalValue::use_iterator UI = GV->use_begin(), E = GV->use_end();
463+
for (; UI != E;) {
464+
llvm::Use &U = *UI;
465+
++UI;
466+
467+
llvm::CallSite CS(U.getUser());
468+
bool isDirectCall = (CS.isCall() || CS.isInvoke()) && CS.isCallee(&U);
469+
if (!isDirectCall)
470+
Uses->push_back(&U);
471+
}
472+
}
473+
474+
/// Replace a list of uses.
475+
static void ReplaceUsesWith(const llvm::SmallVectorImpl<llvm::Use *> &Uses,
476+
llvm::GlobalValue *V,
477+
llvm::GlobalValue *Replacement) {
478+
for (llvm::Use *U : Uses) {
479+
auto *C = dyn_cast<llvm::Constant>(U->getUser());
480+
if (C && !isa<llvm::GlobalValue>(C))
481+
C->handleOperandChange(V, Replacement, U);
482+
else
483+
U->set(Replacement);
484+
}
485+
}
486+
487+
void CodeGenModule::RewriteAlwaysInlineFunction(llvm::Function *Fn) {
488+
std::string Name = Fn->getName();
489+
std::string InlineName = Name + ".alwaysinline";
490+
Fn->setName(InlineName);
491+
492+
llvm::SmallVector<llvm::Use *, 8> NonDirectCallUses;
493+
Fn->removeDeadConstantUsers();
494+
FindNonDirectCallUses(Fn, &NonDirectCallUses);
495+
// Do not create the wrapper if there are no non-direct call uses, and we are
496+
// not required to emit an external definition.
497+
if (NonDirectCallUses.empty() && Fn->isDiscardableIfUnused())
498+
return;
499+
500+
llvm::FunctionType *FT = Fn->getFunctionType();
501+
llvm::LLVMContext &Ctx = getModule().getContext();
502+
llvm::Function *StubFn =
503+
llvm::Function::Create(FT, Fn->getLinkage(), Name, &getModule());
504+
assert(StubFn->getName() == Name && "name was uniqued!");
505+
506+
// Insert the stub immediately after the original function. Helps with the
507+
// fragile tests, among other things.
508+
StubFn->removeFromParent();
509+
TheModule.getFunctionList().insertAfter(Fn, StubFn);
510+
511+
StubFn->copyAttributesFrom(Fn);
512+
StubFn->setPersonalityFn(nullptr);
513+
514+
// AvailableExternally functions are replaced with a declaration.
515+
// Everyone else gets a wrapper that musttail-calls the original function.
516+
if (Fn->hasAvailableExternallyLinkage()) {
517+
StubFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
518+
} else {
519+
llvm::BasicBlock *BB = llvm::BasicBlock::Create(Ctx, "entry", StubFn);
520+
std::vector<llvm::Value *> Args;
521+
for (llvm::Function::arg_iterator ai = StubFn->arg_begin();
522+
ai != StubFn->arg_end(); ++ai)
523+
Args.push_back(&*ai);
524+
llvm::CallInst *CI = llvm::CallInst::Create(Fn, Args, "", BB);
525+
CI->setCallingConv(Fn->getCallingConv());
526+
CI->setTailCallKind(llvm::CallInst::TCK_MustTail);
527+
CI->setAttributes(Fn->getAttributes());
528+
if (FT->getReturnType()->isVoidTy())
529+
llvm::ReturnInst::Create(Ctx, BB);
530+
else
531+
llvm::ReturnInst::Create(Ctx, CI, BB);
532+
}
533+
534+
if (Fn->hasComdat())
535+
StubFn->setComdat(Fn->getComdat());
536+
537+
ReplaceUsesWith(NonDirectCallUses, Fn, StubFn);
538+
539+
// Replace all metadata uses with the stub. This is primarily to reattach
540+
// DISubprogram metadata to the stub, because that's what will be emitted in
541+
// the object file.
542+
if (Fn->isUsedByMetadata())
543+
llvm::ValueAsMetadata::handleRAUW(Fn, StubFn);
544+
}
545+
546+
void CodeGenModule::RewriteAlwaysInlineFunctions() {
547+
for (llvm::Function *Fn : AlwaysInlineFunctions) {
548+
RewriteAlwaysInlineFunction(Fn);
549+
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
550+
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
551+
Fn->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass);
552+
Fn->setVisibility(llvm::GlobalValue::DefaultVisibility);
553+
}
451554
}
452555

453556
void CodeGenModule::UpdateCompletedType(const TagDecl *TD) {
@@ -772,7 +875,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
772875
!F->getAttributes().hasAttribute(llvm::AttributeSet::FunctionIndex,
773876
llvm::Attribute::NoInline)) {
774877
// (noinline wins over always_inline, and we can't specify both in IR)
775-
B.addAttribute(llvm::Attribute::AlwaysInline);
878+
AddAlwaysInlineFunction(F);
776879
}
777880

778881
if (D->hasAttr<ColdAttr>()) {

clang/lib/CodeGen/CodeGenModule.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,8 @@ class CodeGenModule : public CodeGenTypeCache {
489489
/// MDNodes.
490490
llvm::DenseMap<QualType, llvm::Metadata *> MetadataIdMap;
491491

492+
llvm::SmallVector<llvm::Function*, 8> AlwaysInlineFunctions;
493+
492494
public:
493495
CodeGenModule(ASTContext &C, const HeaderSearchOptions &headersearchopts,
494496
const PreprocessorOptions &ppopts,
@@ -1131,6 +1133,8 @@ class CodeGenModule : public CodeGenTypeCache {
11311133
/// \breif Get the declaration of std::terminate for the platform.
11321134
llvm::Constant *getTerminateFn();
11331135

1136+
void AddAlwaysInlineFunction(llvm::Function *Fn);
1137+
11341138
private:
11351139
llvm::Constant *
11361140
GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D,
@@ -1226,6 +1230,12 @@ class CodeGenModule : public CodeGenTypeCache {
12261230
/// Emits target specific Metadata for global declarations.
12271231
void EmitTargetMetadata();
12281232

1233+
/// Replaces alwaysinline functions with a pair of internal xxx.inlinefunction
1234+
/// for direct calls, and a stub for indirect calls, and rewrites all uses of
1235+
/// those.
1236+
void RewriteAlwaysInlineFunctions();
1237+
void RewriteAlwaysInlineFunction(llvm::Function *Fn);
1238+
12291239
/// Emit the llvm.gcov metadata used to tell LLVM where to emit the .gcno and
12301240
/// .gcda files in a way that persists in .bc files.
12311241
void EmitCoverageFile();

clang/lib/CodeGen/ItaniumCXXABI.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3311,6 +3311,9 @@ static StructorCodegen getCodegenToUse(CodeGenModule &CGM,
33113311
if (MD->getParent()->getNumVBases())
33123312
return StructorCodegen::Emit;
33133313

3314+
if (MD->hasAttr<AlwaysInlineAttr>())
3315+
return StructorCodegen::Emit;
3316+
33143317
GlobalDecl AliasDecl;
33153318
if (const auto *DD = dyn_cast<CXXDestructorDecl>(MD)) {
33163319
AliasDecl = GlobalDecl(DD, Dtor_Complete);

clang/test/CodeGen/always-inline.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
22
// RUN: %clang_cc1 -fno-inline -emit-llvm %s -o - | FileCheck %s
33

4+
// CHECK-LABEL: define void @i_want_bar()
45
// CHECK-NOT: foo
6+
// CHECK: ret void
57

68
void bar() {
79
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Test alwaysinline definitions w/o any non-direct-call uses.
2+
// None of the declarations are emitted. Stub are only emitted when the original
3+
// function can not be discarded.
4+
5+
// RUN: %clang_cc1 -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s
6+
7+
void __attribute__((__always_inline__)) f1() {}
8+
inline void __attribute__((__always_inline__)) f2() {}
9+
static inline void __attribute__((__always_inline__)) f3() {}
10+
inline void __attribute__((gnu_inline, __always_inline__)) f4() {}
11+
static inline void __attribute__((gnu_inline, __always_inline__)) f5() {}
12+
inline void __attribute__((visibility("hidden"), __always_inline__)) f6() {}
13+
inline void __attribute__((visibility("hidden"), gnu_inline, __always_inline__)) f7() {}
14+
15+
void g() {
16+
f1();
17+
f2();
18+
f3();
19+
f4();
20+
f5();
21+
f6();
22+
f7();
23+
}
24+
25+
// CHECK: define void @f1()
26+
// CHECK-NOT: void @f2()
27+
// CHECK-NOT: void @f3()
28+
// CHECK: define void @f4()
29+
// CHECK-NOT: void @f5()
30+
// CHECK-NOT: void @f6()
31+
// CHECK: define hidden void @f7()
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// Test different kinds of alwaysinline definitions.
2+
3+
// RUN: %clang_cc1 -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-INLINE
4+
// RUN: %clang_cc1 -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-USE
5+
// RUN: %clang_cc1 -disable-llvm-optzns -fno-inline -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK
6+
// RUN: %clang_cc1 -disable-llvm-optzns -fno-inline -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-USE
7+
8+
void __attribute__((__always_inline__)) f1() {}
9+
inline void __attribute__((__always_inline__)) f2() {}
10+
static inline void __attribute__((__always_inline__)) f3() {}
11+
inline void __attribute__((gnu_inline, __always_inline__)) f4() {}
12+
static inline void __attribute__((gnu_inline, __always_inline__)) f5() {}
13+
inline void __attribute__((visibility("hidden"), __always_inline__)) f6() {}
14+
inline void __attribute__((visibility("hidden"), gnu_inline, __always_inline__)) f7() {}
15+
16+
void g() {
17+
f1();
18+
f2();
19+
f3();
20+
f4();
21+
f5();
22+
f6();
23+
f7();
24+
}
25+
26+
void (*p)(void);
27+
void h() {
28+
p = f1;
29+
p = f2;
30+
p = f3;
31+
p = f4;
32+
p = f5;
33+
p = f6;
34+
p = f7;
35+
}
36+
37+
void (*const cp1)(void) = f1;
38+
void (*p1)(void) = f1;
39+
void (*p2)(int) = (void (*)(int))f1;
40+
41+
void __attribute__((__always_inline__)) f8(void(*f)(void)) {}
42+
43+
void call() {
44+
f8(f1);
45+
}
46+
47+
// CHECK-DAG: define internal void @f1.alwaysinline() #[[AI:[0-9]+]]
48+
// CHECK-DAG: define internal void @f2.alwaysinline() #[[AI_IH:[0-9]+]]
49+
// CHECK-DAG: define internal void @f3.alwaysinline() #[[AI_IH]]
50+
// CHECK-DAG: define internal void @f4.alwaysinline() #[[AI_IH]]
51+
// CHECK-DAG: define internal void @f5.alwaysinline() #[[AI_IH]]
52+
// CHECK-DAG: define internal void @f6.alwaysinline() #[[AI_IH]]
53+
// CHECK-DAG: define internal void @f7.alwaysinline() #[[AI_IH]]
54+
55+
56+
// CHECK-DAG: define void @f1() #[[NOAI:[01-9]+]]
57+
// CHECK-DAG: musttail call void @f1.alwaysinline()
58+
59+
// CHECK-DAG: declare void @f2() #[[NOAI:[01-9]+]]
60+
61+
// CHECK-DAG: define internal void @f3() #[[NOAI:[01-9]+]]
62+
// CHECK-DAG: musttail call void @f3.alwaysinline()
63+
64+
// CHECK-DAG: define void @f4() #[[NOAI:[01-9]+]]
65+
// CHECK-DAG: musttail call void @f4.alwaysinline()
66+
67+
// CHECK-DAG: define internal void @f5() #[[NOAI:[01-9]+]]
68+
// CHECK-DAG: musttail call void @f5.alwaysinline()
69+
70+
// CHECK-DAG: declare hidden void @f6() #[[NOAI:[01-9]+]]
71+
72+
// CHECK-DAG: define hidden void @f7() #[[NOAI:[01-9]+]]
73+
// CHECK-DAG: musttail call void @f7.alwaysinline()
74+
75+
76+
// CHECK-DAG: @cp1 = constant void ()* @f1, align
77+
// CHECK-DAG: @p1 = global void ()* @f1, align
78+
// CHECK-DAG: @p2 = global void (i32)* bitcast (void ()* @f1 to void (i32)*), align
79+
80+
// CHECK: attributes #[[AI]] = {{.*alwaysinline.*}}
81+
// CHECK-INLINE: attributes #[[AI_IH]] = {{.*alwaysinline.*inlinehint.*}}
82+
// CHECK-NOT: attributes #[[NOAI]] = {{.*alwaysinline.*}}
83+
84+
// CHECK-USE-LABEL: define void @g()
85+
// CHECK-USE-NOT: ret void
86+
// CHECK-USE: call void @f1.alwaysinline()
87+
// CHECK-USE-NEXT: call void @f2.alwaysinline()
88+
// CHECK-USE-NEXT: call void @f3.alwaysinline()
89+
// CHECK-USE-NEXT: call void @f4.alwaysinline()
90+
// CHECK-USE-NEXT: call void @f5.alwaysinline()
91+
// CHECK-USE-NEXT: call void @f6.alwaysinline()
92+
// CHECK-USE-NEXT: call void @f7.alwaysinline()
93+
// CHECK-USE-NEXT: ret void
94+
95+
// CHECK-USE-LABEL: define void @h()
96+
// CHECK-USE-NOT: ret void
97+
// CHECK-USE: store void ()* @f1,
98+
// CHECK-USE-NEXT: store void ()* @f2,
99+
// CHECK-USE-NEXT: store void ()* @f3,
100+
// CHECK-USE-NEXT: store void ()* @f4,
101+
// CHECK-USE-NEXT: store void ()* @f5,
102+
// CHECK-USE-NEXT: store void ()* @f6,
103+
// CHECK-USE-NEXT: store void ()* @f7,
104+
// CHECK-USE-NEXT: ret void
105+
106+
// CHECK-USE-LABEL: define void @call()
107+
// CHECK-USE: call void @f8.alwaysinline(void ()* @f1)
108+
// CHECK-USE: ret void

clang/test/CodeGen/always_inline.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
1-
// RUN: %clang -emit-llvm -S -o %t %s
2-
// RUN: not grep '@f0' %t
3-
// RUN: not grep 'call ' %t
4-
// RUN: %clang -mllvm -disable-llvm-optzns -emit-llvm -S -o %t %s
5-
// RUN: grep '@f0' %t | count 2
1+
// RUN: %clang -target x86_64-pc-linux-gnu -emit-llvm -S -o - %s | FileCheck %s
2+
// RUN: %clang -target x86_64-pc-linux-gnu -mllvm -disable-llvm-optzns -emit-llvm -S -o - %s | FileCheck %s --check-prefix=CHECK-NO-OPTZNS
63

74
//static int f0() {
85
static int __attribute__((always_inline)) f0() {
@@ -18,3 +15,14 @@ inline int f2() __attribute__((always_inline));
1815
int f2() { return 7; }
1916
int f3(void) { return f2(); }
2017

18+
// CHECK-LABEL: define i32 @f1()
19+
// CHECK: ret i32 1
20+
// CHECK-LABEL: define i32 @f2()
21+
// CHECK: ret i32 7
22+
// CHECK-LABEL: define i32 @f3()
23+
// CHECK: ret i32 7
24+
25+
// CHECK-NO-OPTZNS: define i32 @f3()
26+
// CHECK-NO-OPTZNS-NOT: ret i32
27+
// CHECK-NO-OPTZNS: call i32 @f2.alwaysinline()
28+
// CHECK-NO-OPTZNS-NEXT: ret i32

clang/test/CodeGen/function-attributes.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ void f6(signed short x) { }
2525

2626
void f7(unsigned short x) { }
2727

28-
// CHECK-LABEL: define void @f8()
29-
// CHECK: [[AI:#[0-9]+]]
28+
// CHECK: define void @f8()
29+
// CHECK: [[NUW:#[0-9]+]]
3030
// CHECK: {
3131
void __attribute__((always_inline)) f8(void) { }
3232

@@ -129,7 +129,6 @@ void f20(void) {
129129
}
130130

131131
// CHECK: attributes [[NUW]] = { nounwind optsize readnone{{.*}} }
132-
// CHECK: attributes [[AI]] = { alwaysinline nounwind optsize readnone{{.*}} }
133132
// CHECK: attributes [[ALIGN]] = { nounwind optsize readnone alignstack=16{{.*}} }
134133
// CHECK: attributes [[RT]] = { nounwind optsize returns_twice{{.*}} }
135134
// CHECK: attributes [[NR]] = { noreturn nounwind optsize }

0 commit comments

Comments
 (0)