Skip to content

Commit

Permalink
[Windows] Replace TrapUnreachable with an int3 insertion pass
Browse files Browse the repository at this point in the history
This is an alternative to D66980, which was reverted. Instead of
inserting a pseudo instruction that optionally expands to nothing, add a
pass that inserts int3 when appropriate after basic block layout.

Reviewers: hans

Differential Revision: https://reviews.llvm.org/D67201

llvm-svn: 371466
  • Loading branch information
rnk committed Sep 9, 2019
1 parent 5112b71 commit bf02399
Show file tree
Hide file tree
Showing 20 changed files with 344 additions and 39 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/CMakeLists.txt
Expand Up @@ -23,6 +23,7 @@ add_public_tablegen_target(X86CommonTableGen)

set(sources
X86AsmPrinter.cpp
X86AvoidTrailingCall.cpp
X86CallFrameOptimization.cpp
X86CallingConv.cpp
X86CallLowering.cpp
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/X86/X86.h
Expand Up @@ -81,6 +81,12 @@ FunctionPass *createX86FlagsCopyLoweringPass();
/// Return a pass that expands WinAlloca pseudo-instructions.
FunctionPass *createX86WinAllocaExpander();

/// Return a pass that inserts int3 at the end of the function if it ends with a
/// CALL instruction. The pass does the same for each funclet as well. This
/// ensures that the open interval of function start and end PCs contains all
/// return addresses for the benefit of the Windows x64 unwinder.
FunctionPass *createX86AvoidTrailingCallPass();

/// Return a pass that optimizes the code-size of x86 call sequences. This is
/// done by replacing esp-relative movs with pushes.
FunctionPass *createX86CallFrameOptimization();
Expand Down
108 changes: 108 additions & 0 deletions llvm/lib/Target/X86/X86AvoidTrailingCall.cpp
@@ -0,0 +1,108 @@
//===----- X86AvoidTrailingCall.cpp - Insert int3 after trailing calls ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The Windows x64 unwinder has trouble unwinding the stack when a return
// address points to the end of the function. This pass maintains the invariant
// that every return address is inside the bounds of its parent function or
// funclet by inserting int3 if the last instruction would otherwise be a call.
//
//===----------------------------------------------------------------------===//

#include "X86.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"

#define DEBUG_TYPE "x86-avoid-trailing-call"

using namespace llvm;

namespace {

class X86AvoidTrailingCallPass : public MachineFunctionPass {
public:
X86AvoidTrailingCallPass() : MachineFunctionPass(ID) {}

bool runOnMachineFunction(MachineFunction &MF) override;

private:
StringRef getPassName() const override {
return "X86 avoid trailing call pass";
}
static char ID;
};

char X86AvoidTrailingCallPass::ID = 0;

} // end anonymous namespace

FunctionPass *llvm::createX86AvoidTrailingCallPass() {
return new X86AvoidTrailingCallPass();
}

// A real instruction is a non-meta, non-pseudo instruction. Some pseudos
// expand to nothing, and some expand to code. This logic conservatively assumes
// they might expand to nothing.
static bool isRealInstruction(MachineInstr &MI) {
return !MI.isPseudo() && !MI.isMetaInstruction();
}

// Return true if this is a call instruction, but not a tail call.
static bool isCallInstruction(const MachineInstr &MI) {
return MI.isCall() && !MI.isReturn();
}

bool X86AvoidTrailingCallPass::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86InstrInfo &TII = *STI.getInstrInfo();
assert(STI.isTargetWin64() && "pass only runs on Win64");

// FIXME: Perhaps this pass should also replace SEH_Epilogue by inserting nops
// before epilogues.

bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
// Look for basic blocks that precede funclet entries or are at the end of
// the function.
MachineBasicBlock *NextMBB = MBB.getNextNode();
if (NextMBB && !NextMBB->isEHFuncletEntry())
continue;

// Find the last real instruction in this block, or previous blocks if this
// block is empty.
MachineBasicBlock::reverse_iterator LastRealInstr;
for (MachineBasicBlock &RMBB :
make_range(MBB.getReverseIterator(), MF.rend())) {
LastRealInstr = llvm::find_if(reverse(RMBB), isRealInstruction);
if (LastRealInstr != RMBB.rend())
break;
}

// Do nothing if this function or funclet has no instructions.
if (LastRealInstr == MF.begin()->rend())
continue;

// If this is a call instruction, insert int3 right after it with the same
// DebugLoc. Convert back to a forward iterator and advance the insertion
// position once.
if (isCallInstruction(*LastRealInstr)) {
LLVM_DEBUG({
dbgs() << "inserting int3 after trailing call instruction:\n";
LastRealInstr->dump();
dbgs() << '\n';
});

MachineBasicBlock::iterator MBBI = std::next(LastRealInstr.getReverse());
BuildMI(*LastRealInstr->getParent(), MBBI, LastRealInstr->getDebugLoc(),
TII.get(X86::INT3));
Changed = true;
}
}

return Changed;
}
21 changes: 10 additions & 11 deletions llvm/lib/Target/X86/X86TargetMachine.cpp
Expand Up @@ -219,17 +219,9 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
getEffectiveX86CodeModel(CM, JIT, TT.getArch() == Triple::x86_64),
OL),
TLOF(createTLOF(getTargetTriple())) {
// Windows stack unwinder gets confused when execution flow "falls through"
// after a call to 'noreturn' function.
// To prevent that, we emit a trap for 'unreachable' IR instructions.
// (which on X86, happens to be the 'ud2' instruction)
// On PS4, the "return address" of a 'noreturn' call must still be within
// the calling function, and TrapUnreachable is an easy way to get that.
// The check here for 64-bit windows is a bit icky, but as we're unlikely
// to ever want to mix 32 and 64-bit windows code in a single module
// this should be fine.
if ((TT.isOSWindows() && TT.getArch() == Triple::x86_64) || TT.isPS4() ||
TT.isOSBinFormatMachO()) {
if (TT.isPS4() || TT.isOSBinFormatMachO()) {
this->Options.TrapUnreachable = true;
this->Options.NoTrapAfterNoreturn = TT.isOSBinFormatMachO();
}
Expand Down Expand Up @@ -518,12 +510,19 @@ void X86PassConfig::addPreEmitPass() {
}

void X86PassConfig::addPreEmitPass2() {
const Triple &TT = TM->getTargetTriple();
const MCAsmInfo *MAI = TM->getMCAsmInfo();

addPass(createX86RetpolineThunksPass());

// Insert extra int3 instructions after trailing call instructions to avoid
// issues in the unwinder.
if (TT.isOSWindows() && TT.getArch() == Triple::x86_64)
addPass(createX86AvoidTrailingCallPass());

// Verify basic block incoming and outgoing cfa offset and register values and
// correct CFA calculation rule where needed by inserting appropriate CFI
// instructions.
const Triple &TT = TM->getTargetTriple();
const MCAsmInfo *MAI = TM->getMCAsmInfo();
if (!TT.isOSDarwin() &&
(!TT.isOSWindows() ||
MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI))
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/WinEH/wineh-noret-cleanup.ll
@@ -1,4 +1,3 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: sed -e s/.Cxx:// %s | llc -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefix=CXX
; RUN: sed -e s/.Seh:// %s | llc -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefix=SEH

Expand Down Expand Up @@ -69,13 +68,13 @@ catch.body.2:
; SEH-NEXT: .long .Ltmp0@IMGREL+1
; SEH-NEXT: .long .Ltmp1@IMGREL+1
; SEH-NEXT: .long dummy_filter@IMGREL
; SEH-NEXT: .long .LBB0_2@IMGREL
; SEH-NEXT: .long .LBB0_5@IMGREL
; SEH-NEXT: .long .Ltmp2@IMGREL+1
; SEH-NEXT: .long .Ltmp3@IMGREL+1
; SEH-NEXT: .long "?dtor$5@?0?test@4HA"@IMGREL
; SEH-NEXT: .long "?dtor$2@?0?test@4HA"@IMGREL
; SEH-NEXT: .long 0
; SEH-NEXT: .long .Ltmp2@IMGREL+1
; SEH-NEXT: .long .Ltmp3@IMGREL+1
; SEH-NEXT: .long dummy_filter@IMGREL
; SEH-NEXT: .long .LBB0_2@IMGREL
; SEH-NEXT: .long .LBB0_5@IMGREL
; SEH-NEXT: .Llsda_end0:
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/br-fold.ll
Expand Up @@ -5,18 +5,18 @@
; RUN: llc -mtriple=x86_64-scei-ps4 < %s | FileCheck -check-prefix=PS4 %s

; X64_DARWIN: orq
; X64-DARWIN-NEXT: ud2
; X64_DARWIN-NEXT: ud2

; X64_LINUX: orq %rax, %rcx
; X64_LINUX-NEXT: jne
; X64_LINUX-NEXT: %bb8.i329

; X64_WINDOWS: orq %rax, %rcx
; X64_WINDOWS-NEXT: ud2
; X64_WINDOWS-NEXT: jne

; X64_WINDOWS_GNU: movq .refptr._ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE(%rip), %rax
; X64_WINDOWS_GNU: orq .refptr._ZN11xercesc_2_56XMLUni16fgNotationStringE(%rip), %rax
; X64_WINDOWS_GNU-NEXT: ud2
; X64_WINDOWS_GNU-NEXT: jne

; PS4: orq %rax, %rcx
; PS4-NEXT: ud2
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/X86/catchpad-lifetime.ll
Expand Up @@ -7,6 +7,8 @@ declare void @throw()

declare i32 @__CxxFrameHandler3(...)

declare void @llvm.trap()

define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
entry:
%alloca2 = alloca i8*, align 4
Expand All @@ -30,6 +32,7 @@ catch.pad: ; preds = %catch.dispatch
%bc2 = bitcast i8** %alloca2 to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* %bc2)
store volatile i8* null, i8** %alloca1
call void @llvm.trap()
unreachable

; CHECK-LABEL: "?catch$2@?0?test1@4HA"
Expand Down Expand Up @@ -67,6 +70,7 @@ catch.pad: ; preds = %catch.dispatch
%bc2 = bitcast i8** %alloca2 to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* %bc2)
store volatile i8* null, i8** %alloca1
call void @llvm.trap()
unreachable

; CHECK-LABEL: "?catch$2@?0?test2@4HA"
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/catchpad-regmask.ll
Expand Up @@ -75,7 +75,7 @@ unreachable: ; preds = %entry
; CHECK: popq %rbp
; CHECK: retq

; CHECK: "?catch$2@?0?global_array@4HA":
; CHECK: "?catch${{[0-9]+}}@?0?global_array@4HA":
; CHECK: pushq %rbp
; CHECK: movslq {{.*}}, %[[idx:[^ ]*]]
; CHECK: leaq array(%rip), %[[base:[^ ]*]]
Expand Down Expand Up @@ -122,7 +122,7 @@ unreachable: ; preds = %entry
; CHECK: popq %rbp
; CHECK: retq

; CHECK: "?catch$2@?0?access_imported@4HA":
; CHECK: "?catch${{[0-9]+}}@?0?access_imported@4HA":
; CHECK: pushq %rbp
; CHECK: movq __imp_imported(%rip), %[[base:[^ ]*]]
; CHECK: movl $222, (%[[base]])
Expand Down
7 changes: 5 additions & 2 deletions llvm/test/CodeGen/X86/catchret-regmask.ll
Expand Up @@ -6,6 +6,7 @@ target triple = "x86_64-pc-windows-msvc"
declare i32 @__CxxFrameHandler3(...)
declare void @throw() noreturn uwtable
declare i8* @getval()
declare void @llvm.trap()

define i8* @reload_out_of_pad(i8* %arg) #0 personality i32 (...)* @__CxxFrameHandler3 {
assertPassed:
Expand All @@ -19,6 +20,7 @@ catch:
; This block *must* appear after the catchret to test the bug.
; FIXME: Make this an MIR test so we can control MBB layout.
unreachable:
call void @llvm.trap()
unreachable

catch.dispatch:
Expand All @@ -35,7 +37,7 @@ return:
; CHECK: movq -[[arg_slot]](%rbp), %rax # 8-byte Reload
; CHECK: retq

; CHECK: "?catch$3@?0?reload_out_of_pad@4HA":
; CHECK: "?catch${{[0-9]+}}@?0?reload_out_of_pad@4HA":
; CHECK-NOT: Reload
; CHECK: retq

Expand All @@ -50,6 +52,7 @@ catch:
catchret from %cp to label %return

unreachable:
call void @llvm.trap()
unreachable

catch.dispatch:
Expand All @@ -65,7 +68,7 @@ return:
; CHECK: movq -[[val_slot:[0-9]+]](%rbp), %rax # 8-byte Reload
; CHECK: retq

; CHECK: "?catch$3@?0?spill_in_pad@4HA":
; CHECK: "?catch${{[0-9]+}}@?0?spill_in_pad@4HA":
; CHECK: callq getval
; CHECK: movq %rax, -[[val_slot]](%rbp) # 8-byte Spill
; CHECK: retq
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/empty-function.ll
Expand Up @@ -15,7 +15,7 @@ entry:

; CHECK-LABEL: f:
; WIN32: nop
; WIN64: ud2
; WIN64: nop
; LINUX-NOT: nop
; LINUX-NOT: ud2

Expand Down
16 changes: 11 additions & 5 deletions llvm/test/CodeGen/X86/funclet-layout.ll
Expand Up @@ -9,6 +9,8 @@ target triple = "x86_64-pc-windows-msvc"
@"\01??_7type_info@@6B@" = external constant i8*
@"\01??_R0H@8" = internal global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }

declare void @llvm.trap()

define void @test1(i1 %B) personality i32 (...)* @__CxxFrameHandler3 {
entry:
invoke void @g()
Expand All @@ -31,6 +33,7 @@ try.cont:
ret void

unreachable:
call void @llvm.trap()
unreachable
}

Expand Down Expand Up @@ -76,6 +79,7 @@ try.cont.5: ; preds = %try.cont
ret i32 0

unreachable: ; preds = %catch, %entry
call void @llvm.trap()
unreachable
}

Expand Down Expand Up @@ -125,11 +129,13 @@ try.cont: ; preds = %entry
br i1 %V, label %exit_one, label %exit_two

exit_one:
tail call void @exit(i32 0)
tail call void @g()
call void @llvm.trap()
unreachable

exit_two:
tail call void @exit(i32 0)
tail call void @g()
call void @llvm.trap()
unreachable
}

Expand All @@ -138,20 +144,20 @@ exit_two:
; The entry funclet contains %entry and %try.cont
; CHECK: # %entry
; CHECK: # %try.cont
; CHECK: callq exit
; CHECK: callq g
; CHECK-NOT: # exit_one
; CHECK-NOT: # exit_two
; CHECK: ud2

; The catch(...) funclet contains %catch.2
; CHECK: # %catch.2{{$}}
; CHECK: callq exit
; CHECK: ud2
; CHECK-NEXT: int3

; The catch(int) funclet contains %catch
; CHECK: # %catch{{$}}
; CHECK: callq exit
; CHECK: ud2
; CHECK-NEXT: int3

declare void @exit(i32) noreturn nounwind
declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
Expand Down

0 comments on commit bf02399

Please sign in to comment.