diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index a318ef0b6bd68..c8f4e2aa8c580 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -538,6 +538,11 @@ class MCPlusBuilder { llvm_unreachable("not implemented"); } + virtual void createDirectBranch(MCInst &Inst, const MCSymbol *Target, + MCContext *Ctx) { + llvm_unreachable("not implemented"); + } + virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); } virtual unsigned getShortBranchOpcode(unsigned Opcode) const { diff --git a/bolt/lib/Passes/Instrumentation.cpp b/bolt/lib/Passes/Instrumentation.cpp index 150461b020f06..10479f35d8f9d 100644 --- a/bolt/lib/Passes/Instrumentation.cpp +++ b/bolt/lib/Passes/Instrumentation.cpp @@ -305,9 +305,12 @@ void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB, : IndCallHandlerExitBBFunction->getSymbol(), IndCallSiteID, &*BC.Ctx); - Iter = BB.eraseInstruction(Iter); - Iter = insertInstructions(CounterInstrs, BB, Iter); - --Iter; + if (!BC.isAArch64()) { + Iter = BB.eraseInstruction(Iter); + Iter = insertInstructions(CounterInstrs, BB, Iter); + --Iter; + } else + Iter = insertInstructions(CounterInstrs, BB, Iter); } bool Instrumentation::instrumentOneTarget( diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index af87d5c12b5ce..dc7644fbabdcf 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -48,14 +48,14 @@ static cl::opt NoLSEAtomics( namespace { -static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) { +[[maybe_unused]] static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) { Inst.setOpcode(AArch64::MRS); Inst.clear(); Inst.addOperand(MCOperand::createReg(RegName)); Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV)); } -static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) { +[[maybe_unused]] static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) { Inst.setOpcode(AArch64::MSR); Inst.clear(); Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV)); @@ -2114,6 +2114,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { convertJmpToTailCall(Inst); } + void createDirectBranch(MCInst &Inst, const MCSymbol *Target, + MCContext *Ctx) override { + Inst.setOpcode(AArch64::B); + Inst.clear(); + Inst.addOperand(MCOperand::createExpr(getTargetExprFor( + Inst, MCSymbolRefExpr::create(Target, *Ctx), *Ctx, 0))); + } + bool analyzeBranch(InstructionIterator Begin, InstructionIterator End, const MCSymbol *&TBB, const MCSymbol *&FBB, MCInst *&CondBranch, @@ -2471,21 +2479,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { } InstructionListType createInstrumentedIndCallHandlerExitBB() const override { - InstructionListType Insts(5); // Code sequence for instrumented indirect call handler: - // msr nzcv, x1 - // ldp x0, x1, [sp], #16 - // ldr x16, [sp], #16 - // ldp x0, x1, [sp], #16 - // br x16 - setSystemFlag(Insts[0], AArch64::X1); - createPopRegisters(Insts[1], AArch64::X0, AArch64::X1); - // Here we load address of the next function which should be called in the - // original binary to X16 register. Writing to X16 is permitted without - // needing to restore. - loadReg(Insts[2], AArch64::X16, AArch64::SP); - createPopRegisters(Insts[3], AArch64::X0, AArch64::X1); - createIndirectBranch(Insts[4], AArch64::X16, 0); + // ret + + InstructionListType Insts; + + Insts.emplace_back(); + createReturn(Insts.back()); + return Insts; } @@ -2561,39 +2562,59 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { MCSymbol *HandlerFuncAddr, int CallSiteID, MCContext *Ctx) override { - InstructionListType Insts; // Code sequence used to enter indirect call instrumentation helper: - // stp x0, x1, [sp, #-16]! createPushRegisters - // mov target x0 convertIndirectCallToLoad -> orr x0 target xzr + // stp x0, x1, [sp, #-16]! createPushRegisters (1) + // mov target, x0 convertIndirectCallToLoad -> orr x0 target xzr // mov x1 CallSiteID createLoadImmediate -> // movk x1, #0x0, lsl #48 // movk x1, #0x0, lsl #32 // movk x1, #0x0, lsl #16 // movk x1, #0x0 - // stp x0, x1, [sp, #-16]! - // bl *HandlerFuncAddr createIndirectCall -> + // stp x0, x30, [sp, #-16]! (2) // adr x0 *HandlerFuncAddr -> adrp + add - // blr x0 + // blr x0 (__bolt_instr_ind_call_handler_func) + // ldp x0, x30, [sp], #16 (2) + // mov x0, target ; move target address to used register + // ldp x0, x1, [sp], #16 (1) + + InstructionListType Insts; Insts.emplace_back(); - createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1); + createPushRegisters(Insts.back(), getIntArgRegister(0), + getIntArgRegister(1)); Insts.emplace_back(CallInst); - convertIndirectCallToLoad(Insts.back(), AArch64::X0); + convertIndirectCallToLoad(Insts.back(), getIntArgRegister(0)); InstructionListType LoadImm = createLoadImmediate(getIntArgRegister(1), CallSiteID); Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end()); Insts.emplace_back(); - createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1); + createPushRegisters(Insts.back(), getIntArgRegister(0), AArch64::LR); Insts.resize(Insts.size() + 2); - InstructionListType Addr = - materializeAddress(HandlerFuncAddr, Ctx, AArch64::X0); + InstructionListType Addr = materializeAddress( + HandlerFuncAddr, Ctx, CallInst.getOperand(0).getReg()); assert(Addr.size() == 2 && "Invalid Addr size"); std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size()); + Insts.emplace_back(); - createIndirectCallInst(Insts.back(), isTailCall(CallInst), AArch64::X0); + createIndirectCallInst(Insts.back(), false, + CallInst.getOperand(0).getReg()); - // Carry over metadata including tail call marker if present. - stripAnnotations(Insts.back()); - moveAnnotations(std::move(CallInst), Insts.back()); + Insts.emplace_back(); + createPopRegisters(Insts.back(), getIntArgRegister(0), AArch64::LR); + + // move x0 to indirect call register + Insts.emplace_back(); + Insts.back().setOpcode(AArch64::ORRXrs); + Insts.back().insert(Insts.back().begin(), + MCOperand::createReg(CallInst.getOperand(0).getReg())); + Insts.back().insert(Insts.back().begin() + 1, + MCOperand::createReg(AArch64::XZR)); + Insts.back().insert(Insts.back().begin() + 2, + MCOperand::createReg(getIntArgRegister(0))); + Insts.back().insert(Insts.back().begin() + 3, MCOperand::createImm(0)); + + Insts.emplace_back(); + createPopRegisters(Insts.back(), getIntArgRegister(0), + getIntArgRegister(1)); return Insts; } @@ -2602,12 +2623,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline, const MCSymbol *IndCallHandler, MCContext *Ctx) override { - // Code sequence used to check whether InstrTampoline was initialized + // Code sequence used to check whether InstrTrampoline was initialized // and call it if so, returns via IndCallHandler - // stp x0, x1, [sp, #-16]! - // mrs x1, nzcv - // adr x0, InstrTrampoline -> adrp + add - // ldr x0, [x0] + // adrp x0, InstrTrampoline + // ldr x0, [x0, #lo12:InstrTrampoline] // subs x0, x0, #0x0 // b.eq IndCallHandler // str x30, [sp, #-16]! @@ -2615,30 +2634,42 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { // ldr x30, [sp], #16 // b IndCallHandler InstructionListType Insts; + + // load handler address + MCInst InstAdrp; + InstAdrp.setOpcode(AArch64::ADRP); + InstAdrp.addOperand(MCOperand::createReg(getIntArgRegister(0))); + InstAdrp.addOperand(MCOperand::createImm(0)); + setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, InstrTrampoline, + /* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE); + Insts.emplace_back(InstAdrp); + + MCInst InstLoad; + InstLoad.setOpcode(AArch64::LDRXui); + InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0))); + InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0))); + InstLoad.addOperand(MCOperand::createImm(0)); + setOperandToSymbolRef(InstLoad, /* OpNum */ 2, InstrTrampoline, + /* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC); + Insts.emplace_back(InstLoad); + + InstructionListType CmpJmp = + createCmpJE(getIntArgRegister(0), 0, IndCallHandler, Ctx); + Insts.insert(Insts.end(), CmpJmp.begin(), CmpJmp.end()); + Insts.emplace_back(); - createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1); - Insts.emplace_back(); - getSystemFlag(Insts.back(), getIntArgRegister(1)); - Insts.emplace_back(); - Insts.emplace_back(); - InstructionListType Addr = - materializeAddress(InstrTrampoline, Ctx, AArch64::X0); - std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size()); - assert(Addr.size() == 2 && "Invalid Addr size"); - Insts.emplace_back(); - loadReg(Insts.back(), AArch64::X0, AArch64::X0); - InstructionListType cmpJmp = - createCmpJE(AArch64::X0, 0, IndCallHandler, Ctx); - Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end()); - Insts.emplace_back(); - storeReg(Insts.back(), AArch64::LR, AArch64::SP); + storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8)); + Insts.emplace_back(); Insts.back().setOpcode(AArch64::BLR); - Insts.back().addOperand(MCOperand::createReg(AArch64::X0)); + Insts.back().addOperand(MCOperand::createReg(getIntArgRegister(0))); + Insts.emplace_back(); - loadReg(Insts.back(), AArch64::LR, AArch64::SP); + loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8)); + Insts.emplace_back(); - createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true); + createDirectBranch(Insts.back(), IndCallHandler, Ctx); + return Insts; } diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp index f586db2b0f9ba..634ade6bdd407 100644 --- a/bolt/runtime/instr.cpp +++ b/bolt/runtime/instr.cpp @@ -1691,9 +1691,12 @@ instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) { extern "C" __attribute((naked)) void __bolt_instr_indirect_call() { #if defined(__aarch64__) + // the target address is placed on stack + // the identifier of the indirect call site is placed in X1 register + // clang-format off __asm__ __volatile__(SAVE_ALL - "ldp x0, x1, [sp, #288]\n" + "ldr x0, [sp, #272]\n" "bl instrumentIndirectCall\n" RESTORE_ALL "ret\n" @@ -1728,9 +1731,12 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call() extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall() { #if defined(__aarch64__) + // the target address is placed on stack + // the identifier of the indirect call site is placed in X1 register + // clang-format off __asm__ __volatile__(SAVE_ALL - "ldp x0, x1, [sp, #288]\n" + "ldr x0, [sp, #272]\n" "bl instrumentIndirectCall\n" RESTORE_ALL "ret\n" diff --git a/bolt/runtime/sys_aarch64.h b/bolt/runtime/sys_aarch64.h index b1d04f9d558e0..9cb8e022f58df 100644 --- a/bolt/runtime/sys_aarch64.h +++ b/bolt/runtime/sys_aarch64.h @@ -18,10 +18,12 @@ "stp x24, x25, [sp, #-16]!\n" \ "stp x26, x27, [sp, #-16]!\n" \ "stp x28, x29, [sp, #-16]!\n" \ - "str x30, [sp,#-16]!\n" + "mrs x29, nzcv\n" \ + "stp x29, x30, [sp, #-16]!\n" // Mirrors SAVE_ALL #define RESTORE_ALL \ - "ldr x30, [sp], #16\n" \ + "ldp x29, x30, [sp], #16\n" \ + "msr nzcv, x29\n" \ "ldp x28, x29, [sp], #16\n" \ "ldp x26, x27, [sp], #16\n" \ "ldp x24, x25, [sp], #16\n" \ diff --git a/bolt/test/runtime/AArch64/instrumentation-ind-call.c b/bolt/test/runtime/AArch64/instrumentation-ind-call.c index f9056da333b4e..eddecba4d8b52 100644 --- a/bolt/test/runtime/AArch64/instrumentation-ind-call.c +++ b/bolt/test/runtime/AArch64/instrumentation-ind-call.c @@ -15,9 +15,63 @@ int main() { REQUIRES: system-linux,bolt-runtime RUN: %clang %cflags %s -o %t.exe -Wl,-q -no-pie -fpie +RUN: llvm-objdump --disassemble-symbols=main %t.exe \ +RUN: | FileCheck %s --check-prefix=CHECKINDIRECTREG + +CHECKINDIRECTREG: mov w0, #0xa +CHECKINDIRECTREG-NEXT: mov w1, #0x14 +CHECKINDIRECTREG-NEXT: blr x8 RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata \ -RUN: -o %t.instrumented +RUN: -o %t.instrumented \ +RUN: | FileCheck %s --check-prefix=CHECK-INSTR-LOG + +CHECK-INSTR-LOG: BOLT-INSTRUMENTER: Number of indirect call site descriptors: 1 + +RUN: llvm-objdump --disassemble-symbols=main %t.instrumented \ +RUN: | FileCheck %s --check-prefix=CHECK-INSTR-INDIRECTREG + +RUN: llvm-objdump --disassemble-symbols=__bolt_instr_ind_call_handler \ +RUN: %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL +RUN: llvm-objdump --disassemble-symbols=__bolt_instr_ind_call_handler_func \ +RUN: %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL-FUNC + +CHECK-INSTR-INDIRECTREG: mov w0, #0xa +CHECK-INSTR-INDIRECTREG-NEXT: mov w1, #0x14 +// store current values +CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x1, {{.*}} +// store the indirect target address in x0 +CHECK-INSTR-INDIRECTREG-NEXT: mov x0, x8 +// load callsite id into x1 +CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}} +CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}} +CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}} +CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}} +CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x30, {{.*}} +CHECK-INSTR-INDIRECTREG-NEXT: adrp x8, {{.*}} +CHECK-INSTR-INDIRECTREG-NEXT: add x8, {{.*}} +// call instrumentation library handler function +CHECK-INSTR-INDIRECTREG-NEXT: blr x8 +// restore registers saved before +CHECK-INSTR-INDIRECTREG-NEXT: ldp x0, x30, {{.*}} +CHECK-INSTR-INDIRECTREG-NEXT: mov x8, x0 +CHECK-INSTR-INDIRECTREG-NEXT: ldp x0, x1, {{.*}} +// original indirect call instruction +CHECK-INSTR-INDIRECTREG-NEXT: blr x8 + + +CHECK-INSTR-INDIR-CALL: __bolt_instr_ind_call_handler>: +CHECK-INSTR-INDIR-CALL-NEXT: ret + +CHECK-INSTR-INDIR-CALL-FUNC: __bolt_instr_ind_call_handler_func>: +CHECK-INSTR-INDIR-CALL-FUNC-NEXT: adrp x0 +CHECK-INSTR-INDIR-CALL-FUNC-NEXT: ldr x0 +CHECK-INSTR-INDIR-CALL-FUNC-NEXT: cmp x0, #0x0 +CHECK-INSTR-INDIR-CALL-FUNC-NEXT: b.eq{{.*}}__bolt_instr_ind_call_handler +CHECK-INSTR-INDIR-CALL-FUNC-NEXT: str x30 +CHECK-INSTR-INDIR-CALL-FUNC-NEXT: blr x0 +CHECK-INSTR-INDIR-CALL-FUNC-NEXT: ldr x30 +CHECK-INSTR-INDIR-CALL-FUNC-NEXT: b{{.*}}__bolt_instr_ind_call_handler # Instrumented program needs to finish returning zero RUN: %t.instrumented | FileCheck %s -check-prefix=CHECK-OUTPUT diff --git a/clang/include/clang/CIR/Dialect/IR/CIRDialect.td b/clang/include/clang/CIR/Dialect/IR/CIRDialect.td index e91537186df59..34df9af7fc06d 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRDialect.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRDialect.td @@ -44,6 +44,7 @@ def CIR_Dialect : Dialect { static llvm::StringRef getModuleLevelAsmAttrName() { return "cir.module_asm"; } static llvm::StringRef getGlobalCtorsAttrName() { return "cir.global_ctors"; } static llvm::StringRef getGlobalDtorsAttrName() { return "cir.global_dtors"; } + static llvm::StringRef getOperandSegmentSizesAttrName() { return "operandSegmentSizes"; } void registerAttributes(); void registerTypes(); diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 777b49434f119..5f5fab6f12300 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -2728,7 +2728,7 @@ def CIR_LLVMIntrinsicCallOp : CIR_Op<"call_llvm_intrinsic"> { } //===----------------------------------------------------------------------===// -// CallOp +// CallOp and TryCallOp //===----------------------------------------------------------------------===// def CIR_SideEffect : CIR_I32EnumAttr< @@ -2855,6 +2855,96 @@ def CIR_CallOp : CIR_CallOpBase<"call", [NoRegionArguments]> { ]; } +def CIR_TryCallOp : CIR_CallOpBase<"try_call",[ + Terminator +]> { + let summary = "try_call operation"; + let description = [{ + Similar to `cir.call` but requires two destination blocks, + one which is used if the call returns without throwing an + exception (the "normal" destination) and another which is used + if an exception is thrown (the "unwind" destination). + + This operation is used only after the CFG flatterning pass. + + Example: + + ```mlir + // Before CFG flattening + cir.try { + %call = cir.call @division(%a, %b) : () -> !s32i + cir.yield + } catch all { + cir.yield + } + + // After CFG flattening + %call = cir.try_call @division(%a, %b) ^normalDest, ^unwindDest + : (f32, f32) -> f32 + ^normalDest: + cir.br ^afterTryBlock + ^unwindDest: + %exception_ptr, %type_id = cir.eh.inflight_exception + cir.br ^catchHandlerBlock(%exception_ptr : !cir.ptr) + ^catchHandlerBlock: + ... + ``` + }]; + + let arguments = commonArgs; + let results = (outs Optional:$result); + let successors = (successor + AnySuccessor:$normalDest, + AnySuccessor:$unwindDest + ); + + let skipDefaultBuilders = 1; + let hasLLVMLowering = false; + + let builders = [ + OpBuilder<(ins "mlir::SymbolRefAttr":$callee, + "mlir::Type":$resType, + "mlir::Block *":$normalDest, + "mlir::Block *":$unwindDest, + CArg<"mlir::ValueRange", "{}">:$callOperands, + CArg<"SideEffect", "SideEffect::All">:$sideEffect), [{ + $_state.addOperands(callOperands); + + if (callee) + $_state.addAttribute("callee", callee); + if (resType && !isa(resType)) + $_state.addTypes(resType); + + $_state.addAttribute("side_effect", + SideEffectAttr::get($_builder.getContext(), sideEffect)); + + // Handle branches + $_state.addSuccessors(normalDest); + $_state.addSuccessors(unwindDest); + }]>, + OpBuilder<(ins "mlir::Value":$ind_target, + "FuncType":$fn_type, + "mlir::Block *":$normalDest, + "mlir::Block *":$unwindDest, + CArg<"mlir::ValueRange", "{}">:$callOperands, + CArg<"SideEffect", "SideEffect::All">:$sideEffect), [{ + ::llvm::SmallVector finalCallOperands({ind_target}); + finalCallOperands.append(callOperands.begin(), callOperands.end()); + $_state.addOperands(finalCallOperands); + + if (!fn_type.hasVoidReturn()) + $_state.addTypes(fn_type.getReturnType()); + + $_state.addAttribute("side_effect", + SideEffectAttr::get($_builder.getContext(), sideEffect)); + + // Handle branches + $_state.addSuccessors(normalDest); + $_state.addSuccessors(unwindDest); + }]> + ]; +} + //===----------------------------------------------------------------------===// // AwaitOp //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index f1bacff7fc691..d505ca141d383 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -721,8 +721,28 @@ unsigned cir::CallOp::getNumArgOperands() { return this->getOperation()->getNumOperands(); } +static mlir::ParseResult +parseTryCallDestinations(mlir::OpAsmParser &parser, + mlir::OperationState &result) { + mlir::Block *normalDestSuccessor; + if (parser.parseSuccessor(normalDestSuccessor)) + return mlir::failure(); + + if (parser.parseComma()) + return mlir::failure(); + + mlir::Block *unwindDestSuccessor; + if (parser.parseSuccessor(unwindDestSuccessor)) + return mlir::failure(); + + result.addSuccessors(normalDestSuccessor); + result.addSuccessors(unwindDestSuccessor); + return mlir::success(); +} + static mlir::ParseResult parseCallCommon(mlir::OpAsmParser &parser, - mlir::OperationState &result) { + mlir::OperationState &result, + bool hasDestinationBlocks = false) { llvm::SmallVector ops; llvm::SMLoc opsLoc; mlir::FlatSymbolRefAttr calleeAttr; @@ -749,6 +769,11 @@ static mlir::ParseResult parseCallCommon(mlir::OpAsmParser &parser, if (parser.parseRParen()) return mlir::failure(); + if (hasDestinationBlocks && + parseTryCallDestinations(parser, result).failed()) { + return ::mlir::failure(); + } + if (parser.parseOptionalKeyword("nothrow").succeeded()) result.addAttribute(CIRDialect::getNoThrowAttrName(), mlir::UnitAttr::get(parser.getContext())); @@ -788,7 +813,9 @@ static void printCallCommon(mlir::Operation *op, mlir::FlatSymbolRefAttr calleeSym, mlir::Value indirectCallee, mlir::OpAsmPrinter &printer, bool isNothrow, - cir::SideEffect sideEffect) { + cir::SideEffect sideEffect, + mlir::Block *normalDest = nullptr, + mlir::Block *unwindDest = nullptr) { printer << ' '; auto callLikeOp = mlir::cast(op); @@ -802,8 +829,18 @@ static void printCallCommon(mlir::Operation *op, assert(indirectCallee); printer << indirectCallee; } + printer << "(" << ops << ")"; + if (normalDest) { + assert(unwindDest && "expected two successors"); + auto tryCall = cast(op); + printer << ' ' << tryCall.getNormalDest(); + printer << ","; + printer << ' '; + printer << tryCall.getUnwindDest(); + } + if (isNothrow) printer << " nothrow"; @@ -813,11 +850,11 @@ static void printCallCommon(mlir::Operation *op, printer << ")"; } - printer.printOptionalAttrDict(op->getAttrs(), - {CIRDialect::getCalleeAttrName(), - CIRDialect::getNoThrowAttrName(), - CIRDialect::getSideEffectAttrName()}); - + llvm::SmallVector<::llvm::StringRef> elidedAttrs = { + CIRDialect::getCalleeAttrName(), CIRDialect::getNoThrowAttrName(), + CIRDialect::getSideEffectAttrName(), + CIRDialect::getOperandSegmentSizesAttrName()}; + printer.printOptionalAttrDict(op->getAttrs(), elidedAttrs); printer << " : "; printer.printFunctionalType(op->getOperands().getTypes(), op->getResultTypes()); @@ -898,6 +935,59 @@ cir::CallOp::verifySymbolUses(SymbolTableCollection &symbolTable) { return verifyCallCommInSymbolUses(*this, symbolTable); } +//===----------------------------------------------------------------------===// +// TryCallOp +//===----------------------------------------------------------------------===// + +mlir::OperandRange cir::TryCallOp::getArgOperands() { + if (isIndirect()) + return getArgs().drop_front(1); + return getArgs(); +} + +mlir::MutableOperandRange cir::TryCallOp::getArgOperandsMutable() { + mlir::MutableOperandRange args = getArgsMutable(); + if (isIndirect()) + return args.slice(1, args.size() - 1); + return args; +} + +mlir::Value cir::TryCallOp::getIndirectCall() { + assert(isIndirect()); + return getOperand(0); +} + +/// Return the operand at index 'i'. +Value cir::TryCallOp::getArgOperand(unsigned i) { + if (isIndirect()) + ++i; + return getOperand(i); +} + +/// Return the number of operands. +unsigned cir::TryCallOp::getNumArgOperands() { + if (isIndirect()) + return this->getOperation()->getNumOperands() - 1; + return this->getOperation()->getNumOperands(); +} + +LogicalResult +cir::TryCallOp::verifySymbolUses(SymbolTableCollection &symbolTable) { + return verifyCallCommInSymbolUses(*this, symbolTable); +} + +mlir::ParseResult cir::TryCallOp::parse(mlir::OpAsmParser &parser, + mlir::OperationState &result) { + return parseCallCommon(parser, result, /*hasDestinationBlocks=*/true); +} + +void cir::TryCallOp::print(::mlir::OpAsmPrinter &p) { + mlir::Value indirectCallee = isIndirect() ? getIndirectCall() : nullptr; + cir::SideEffect sideEffect = getSideEffect(); + printCallCommon(*this, getCalleeAttr(), indirectCallee, p, getNothrow(), + sideEffect, getNormalDest(), getUnwindDest()); +} + //===----------------------------------------------------------------------===// // ReturnOp //===----------------------------------------------------------------------===// diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 50edca43ebb92..19c83d3910902 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -2363,12 +2363,9 @@ bool UnwrappedLineParser::tryToParseLambda() { Arrow = FormatTok; nextToken(); break; - case tok::kw_requires: { - auto *RequiresToken = FormatTok; - nextToken(); - parseRequiresClause(RequiresToken); + case tok::kw_requires: + parseRequiresClause(); break; - } case tok::equal: if (!InTemplateParameterList) return true; @@ -2580,12 +2577,9 @@ bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) { if (IsEnum && !Style.AllowShortEnumsOnASingleLine) addUnwrappedLine(); break; - case tok::kw_requires: { - auto *RequiresToken = FormatTok; - nextToken(); - parseRequiresExpression(RequiresToken); + case tok::kw_requires: + parseRequiresExpression(); break; - } default: nextToken(); break; @@ -2727,12 +2721,9 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType, else nextToken(); break; - case tok::kw_requires: { - auto RequiresToken = FormatTok; - nextToken(); - parseRequiresExpression(RequiresToken); + case tok::kw_requires: + parseRequiresExpression(); break; - } case tok::ampamp: if (AmpAmpTokenType != TT_Unknown) FormatTok->setFinalizedType(AmpAmpTokenType); @@ -3467,23 +3458,20 @@ void UnwrappedLineParser::parseAccessSpecifier() { /// \returns true if it parsed a clause. bool UnwrappedLineParser::parseRequires(bool SeenEqual) { assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); - auto RequiresToken = FormatTok; // We try to guess if it is a requires clause, or a requires expression. For - // that we first consume the keyword and check the next token. - nextToken(); - - switch (FormatTok->Tok.getKind()) { + // that we first check the next token. + switch (Tokens->peekNextToken(/*SkipComment=*/true)->Tok.getKind()) { case tok::l_brace: // This can only be an expression, never a clause. - parseRequiresExpression(RequiresToken); + parseRequiresExpression(); return false; case tok::l_paren: // Clauses and expression can start with a paren, it's unclear what we have. break; default: // All other tokens can only be a clause. - parseRequiresClause(RequiresToken); + parseRequiresClause(); return true; } @@ -3494,13 +3482,13 @@ bool UnwrappedLineParser::parseRequires(bool SeenEqual) { // requires (C && ... // But first let's look behind. - auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); + auto *PreviousNonComment = FormatTok->getPreviousNonComment(); if (!PreviousNonComment || PreviousNonComment->is(TT_RequiresExpressionLBrace)) { // If there is no token, or an expression left brace, we are a requires // clause within a requires expression. - parseRequiresClause(RequiresToken); + parseRequiresClause(); return true; } @@ -3512,7 +3500,7 @@ bool UnwrappedLineParser::parseRequires(bool SeenEqual) { case tok::star: case tok::amp: // This is a requires clause. - parseRequiresClause(RequiresToken); + parseRequiresClause(); return true; case tok::ampamp: { // This can be either: @@ -3523,7 +3511,7 @@ bool UnwrappedLineParser::parseRequires(bool SeenEqual) { // void member(...) const && requires (C ... auto PrevPrev = PreviousNonComment->getPreviousNonComment(); if ((PrevPrev && PrevPrev->is(tok::kw_const)) || !SeenEqual) { - parseRequiresClause(RequiresToken); + parseRequiresClause(); return true; } break; @@ -3531,11 +3519,11 @@ bool UnwrappedLineParser::parseRequires(bool SeenEqual) { default: if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) { // This is a requires clause. - parseRequiresClause(RequiresToken); + parseRequiresClause(); return true; } // It's an expression. - parseRequiresExpression(RequiresToken); + parseRequiresExpression(); return false; } @@ -3564,7 +3552,7 @@ bool UnwrappedLineParser::parseRequires(bool SeenEqual) { case tok::comma: if (OpenAngles == 0) { FormatTok = Tokens->setPosition(StoredPosition); - parseRequiresExpression(RequiresToken); + parseRequiresExpression(); return false; } break; @@ -3579,7 +3567,7 @@ bool UnwrappedLineParser::parseRequires(bool SeenEqual) { case tok::identifier: if (FoundType && !LastWasColonColon && OpenAngles == 0) { FormatTok = Tokens->setPosition(StoredPosition); - parseRequiresExpression(RequiresToken); + parseRequiresExpression(); return false; } FoundType = true; @@ -3594,7 +3582,7 @@ bool UnwrappedLineParser::parseRequires(bool SeenEqual) { default: if (NextToken->isTypeName(LangOpts)) { FormatTok = Tokens->setPosition(StoredPosition); - parseRequiresExpression(RequiresToken); + parseRequiresExpression(); return false; } break; @@ -3602,31 +3590,29 @@ bool UnwrappedLineParser::parseRequires(bool SeenEqual) { } // This seems to be a complicated expression, just assume it's a clause. FormatTok = Tokens->setPosition(StoredPosition); - parseRequiresClause(RequiresToken); + parseRequiresClause(); return true; } /// Parses a requires clause. -/// \param RequiresToken The requires keyword token, which starts this clause. -/// \pre We need to be on the next token after the requires keyword. /// \sa parseRequiresExpression /// /// Returns if it either has finished parsing the clause, or it detects, that /// the clause is incorrect. -void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { - assert(FormatTok->getPreviousNonComment() == RequiresToken); - assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); +void UnwrappedLineParser::parseRequiresClause() { + assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); // If there is no previous token, we are within a requires expression, // otherwise we will always have the template or function declaration in front // of it. bool InRequiresExpression = - !RequiresToken->Previous || - RequiresToken->Previous->is(TT_RequiresExpressionLBrace); + !FormatTok->Previous || + FormatTok->Previous->is(TT_RequiresExpressionLBrace); - RequiresToken->setFinalizedType(InRequiresExpression - ? TT_RequiresClauseInARequiresExpression - : TT_RequiresClause); + FormatTok->setFinalizedType(InRequiresExpression + ? TT_RequiresClauseInARequiresExpression + : TT_RequiresClause); + nextToken(); // NOTE: parseConstraintExpression is only ever called from this function. // It could be inlined into here. @@ -3637,17 +3623,15 @@ void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { } /// Parses a requires expression. -/// \param RequiresToken The requires keyword token, which starts this clause. -/// \pre We need to be on the next token after the requires keyword. /// \sa parseRequiresClause /// /// Returns if it either has finished parsing the expression, or it detects, /// that the expression is incorrect. -void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { - assert(FormatTok->getPreviousNonComment() == RequiresToken); - assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); +void UnwrappedLineParser::parseRequiresExpression() { + assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); - RequiresToken->setFinalizedType(TT_RequiresExpression); + FormatTok->setFinalizedType(TT_RequiresExpression); + nextToken(); if (FormatTok->is(tok::l_paren)) { FormatTok->setFinalizedType(TT_RequiresExpressionLParen); @@ -3687,12 +3671,9 @@ void UnwrappedLineParser::parseConstraintExpression() { bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); switch (FormatTok->Tok.getKind()) { - case tok::kw_requires: { - auto RequiresToken = FormatTok; - nextToken(); - parseRequiresExpression(RequiresToken); + case tok::kw_requires: + parseRequiresExpression(); break; - } case tok::l_paren: if (!TopLevelParensAllowed) diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index 0161a5063ad40..86022d9b316c6 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -169,8 +169,8 @@ class UnwrappedLineParser { bool parseEnum(); bool parseStructLike(); bool parseRequires(bool SeenEqual); - void parseRequiresClause(FormatToken *RequiresToken); - void parseRequiresExpression(FormatToken *RequiresToken); + void parseRequiresClause(); + void parseRequiresExpression(); void parseConstraintExpression(); void parseCppExportBlock(); void parseNamespaceOrExportBlock(unsigned AddLevels); diff --git a/clang/test/CIR/IR/try-call.cir b/clang/test/CIR/IR/try-call.cir new file mode 100644 index 0000000000000..39db43aee40c1 --- /dev/null +++ b/clang/test/CIR/IR/try-call.cir @@ -0,0 +1,35 @@ +// RUN: cir-opt %s --verify-roundtrip | FileCheck %s + +!s32i = !cir.int + +module { + +cir.func private @division(%a: !s32i, %b: !s32i) -> !s32i + +cir.func @flatten_structure_with_try_call_op() { + %a = cir.const #cir.int<1> : !s32i + %b = cir.const #cir.int<2> : !s32i + %3 = cir.try_call @division(%a, %b) ^normal, ^unwind : (!s32i, !s32i) -> !s32i + ^normal: + cir.br ^end + ^unwind: + cir.br ^end + ^end: + cir.return +} + +// CHECK: cir.func private @division(!s32i, !s32i) -> !s32i + +// CHECK: cir.func @flatten_structure_with_try_call_op() { +// CHECK-NEXT: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i +// CHECK-NEXT: %[[CONST_2:.*]] = cir.const #cir.int<2> : !s32i +// CHECK-NEXT: %[[CALL:.*]] = cir.try_call @division(%[[CONST_1]], %[[CONST_2]]) ^[[NORMAL:.*]], ^[[UNWIND:.*]] : (!s32i, !s32i) -> !s32i +// CHECK-NEXT: ^[[NORMAL]]: +// CHECK-NEXT: cir.br ^[[END:.*]] +// CHECK-NEXT: ^[[UNWIND]]: +// CHECK-NEXT: cir.br ^[[END:.*]] +// CHECK-NEXT: ^[[END]]: +// CHECK-NEXT: cir.return +// CHECK-NEXT: } + +} diff --git a/libcxx/include/__locale_dir/support/linux.h b/libcxx/include/__locale_dir/support/linux.h index 1a589be49bf1d..deb657d4faced 100644 --- a/libcxx/include/__locale_dir/support/linux.h +++ b/libcxx/include/__locale_dir/support/linux.h @@ -83,15 +83,30 @@ inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) { // Strtonum functions // inline _LIBCPP_HIDE_FROM_ABI float __strtof(const char* __nptr, char** __endptr, __locale_t __loc) { +#if !_LIBCPP_HAS_MUSL_LIBC || defined(_GNU_SOURCE) return ::strtof_l(__nptr, __endptr, __loc); +#else + (void)__loc; + return ::strtof(__nptr, __endptr); +#endif } inline _LIBCPP_HIDE_FROM_ABI double __strtod(const char* __nptr, char** __endptr, __locale_t __loc) { +#if !_LIBCPP_HAS_MUSL_LIBC || defined(_GNU_SOURCE) return ::strtod_l(__nptr, __endptr, __loc); +#else + (void)__loc; + return ::strtod(__nptr, __endptr); +#endif } inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __endptr, __locale_t __loc) { +#if !_LIBCPP_HAS_MUSL_LIBC || defined(_GNU_SOURCE) return ::strtold_l(__nptr, __endptr, __loc); +#else + (void)__loc; + return ::strtold(__nptr, __endptr); +#endif } // diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 5ed89e47c672e..7ec75b0d61fce 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -8,6 +8,7 @@ #include "InputFiles.h" #include "OutputSections.h" +#include "RISCVInternalRelocations.h" #include "RelocScan.h" #include "Symbols.h" #include "SyntheticSections.h" @@ -345,8 +346,15 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_SUB_ULEB128: return RE_RISCV_LEB128; default: - Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v - << ") against symbol " << &s; + if (type.v & INTERNAL_RISCV_VENDOR_MASK) { + Err(ctx) << getErrorLoc(ctx, loc) + << "unsupported vendor-specific relocation " << type + << " against symbol " << &s; + return R_NONE; + } + Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" + << (type.v & ~INTERNAL_RISCV_VENDOR_MASK) << ") against symbol " + << &s; return R_NONE; } } @@ -859,7 +867,7 @@ static bool relax(Ctx &ctx, int pass, InputSection &sec) { std::fill_n(aux.relocTypes.get(), relocs.size(), R_RISCV_NONE); aux.writes.clear(); - for (auto [i, r] : llvm::enumerate(relocs)) { + for (auto [i, r] : llvm::enumerate(riscv_vendor_relocs(relocs))) { const uint64_t loc = secAddr + r.offset - delta; uint32_t &cur = aux.relocDeltas[i], remove = 0; switch (r.type) { @@ -1503,12 +1511,19 @@ void RISCV::scanSectionImpl(InputSectionBase &sec, Relocs rels) { rvVendor = sym.getName(); continue; } else if (!rvVendor.empty()) { - Err(ctx) << getErrorLoc(ctx, loc) - << "unknown vendor-specific relocation (" << type.v - << ") in namespace '" << rvVendor << "' against symbol '" << &sym - << "'"; + uint32_t VendorFlag = getRISCVVendorRelMarker(rvVendor); + if (!VendorFlag) { + Err(ctx) << getErrorLoc(ctx, loc) + << "unknown vendor-specific relocation (" << type.v + << ") in namespace '" << rvVendor << "' against symbol '" + << &sym << "'"; + rvVendor = ""; + continue; + } + rvVendor = ""; - continue; + assert((type.v < 256) && "Out of range relocation detected!"); + type.v |= VendorFlag; } rs.scan(it, type, rs.getAddend(*it, type)); @@ -1533,3 +1548,21 @@ template void RISCV::scanSection1(InputSectionBase &sec) { void RISCV::scanSection(InputSectionBase &sec) { invokeELFT(scanSection1, sec); } + +namespace lld::elf { +uint32_t getRISCVVendorRelMarker(StringRef rvVendor) { + return StringSwitch(rvVendor) + .Case("QUALCOMM", INTERNAL_RISCV_VENDOR_QUALCOMM) + .Case("ANDES", INTERNAL_RISCV_VENDOR_ANDES) + .Default(0); +} + +std::optional getRISCVVendorString(RelType ty) { + if ((ty.v & INTERNAL_RISCV_VENDOR_MASK) == INTERNAL_RISCV_VENDOR_QUALCOMM) + return "QUALCOMM"; + if ((ty.v & INTERNAL_RISCV_VENDOR_MASK) == INTERNAL_RISCV_VENDOR_ANDES) + return "ANDES"; + return std::nullopt; +} + +} // namespace lld::elf diff --git a/lld/ELF/Arch/RISCVInternalRelocations.h b/lld/ELF/Arch/RISCVInternalRelocations.h new file mode 100644 index 0000000000000..35e2f53b03b35 --- /dev/null +++ b/lld/ELF/Arch/RISCVInternalRelocations.h @@ -0,0 +1,113 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_ARCH_RISCVINTERNALRELOCATIONS_H +#define LLD_ELF_ARCH_RISCVINTERNALRELOCATIONS_H + +#include "Relocations.h" +#include "Symbols.h" + +namespace lld::elf { + +// Bit 8 of RelType is used to indicate linker-internal relocations that are +// not vendor-specific. +// These are internal relocation numbers for GP/X0 relaxation. They aren't part +// of the psABI spec. +constexpr uint32_t INTERNAL_R_RISCV_GPREL_I = 256; +constexpr uint32_t INTERNAL_R_RISCV_GPREL_S = 257; +constexpr uint32_t INTERNAL_R_RISCV_X0REL_I = 258; +constexpr uint32_t INTERNAL_R_RISCV_X0REL_S = 259; + +// Bits 9 -> 31 of RelType are used to indicate vendor-specific relocations. +constexpr uint32_t INTERNAL_RISCV_VENDOR_MASK = 0xFFFFFFFF << 9; +constexpr uint32_t INTERNAL_RISCV_VENDOR_QUALCOMM = 1 << 9; +constexpr uint32_t INTERNAL_RISCV_VENDOR_ANDES = 2 << 9; + +constexpr uint32_t INTERNAL_RISCV_QC_ABS20_U = + INTERNAL_RISCV_VENDOR_QUALCOMM | llvm::ELF::R_RISCV_QC_ABS20_U; +constexpr uint32_t INTERNAL_RISCV_QC_E_BRANCH = + INTERNAL_RISCV_VENDOR_QUALCOMM | llvm::ELF::R_RISCV_QC_E_BRANCH; +constexpr uint32_t INTERNAL_RISCV_QC_E_32 = + INTERNAL_RISCV_VENDOR_QUALCOMM | llvm::ELF::R_RISCV_QC_E_32; +constexpr uint32_t INTERNAL_RISCV_QC_E_CALL_PLT = + INTERNAL_RISCV_VENDOR_QUALCOMM | llvm::ELF::R_RISCV_QC_E_CALL_PLT; + +constexpr uint32_t INTERNAL_RISCV_NDS_BRANCH_10 = + INTERNAL_RISCV_VENDOR_ANDES | llvm::ELF::R_RISCV_NDS_BRANCH_10; + +uint32_t getRISCVVendorRelMarker(llvm::StringRef rvVendor); +std::optional getRISCVVendorString(RelType ty); + +class vendor_reloc_iterator { +public: + using iterator_category = std::forward_iterator_tag; + using value_type = Relocation; + using difference_type = std::ptrdiff_t; + using pointer = Relocation *; + using reference = Relocation; // returned by value + + vendor_reloc_iterator(MutableArrayRef::iterator i, + MutableArrayRef::iterator e) + : it(i), end(e) {} + + // Dereference + Relocation operator*() const { + Relocation r = *it; + r.type.v |= rvVendorFlag; + return r; + } + + struct vendor_reloc_proxy { + Relocation r; + const Relocation *operator->() const { return &r; } + }; + + vendor_reloc_proxy operator->() const { + return vendor_reloc_proxy{this->operator*()}; + } + + vendor_reloc_iterator &operator++() { + ++it; + if (it != end && it->type == llvm::ELF::R_RISCV_VENDOR) { + rvVendorFlag = getRISCVVendorRelMarker(it->sym->getName()); + ++it; + } else { + rvVendorFlag = 0; + } + return *this; + } + + vendor_reloc_iterator operator++(int) { + vendor_reloc_iterator tmp(*this); + ++(*this); + return tmp; + } + + bool operator==(const vendor_reloc_iterator &other) const { + return it == other.it; + } + bool operator!=(const vendor_reloc_iterator &other) const { + return it != other.it; + } + + Relocation *getUnderlyingRelocation() const { return &*it; } + +private: + MutableArrayRef::iterator it; + MutableArrayRef::iterator end; + uint32_t rvVendorFlag = 0; +}; + +inline auto riscv_vendor_relocs(MutableArrayRef arr) { + return llvm::make_range(vendor_reloc_iterator(arr.begin(), arr.end()), + vendor_reloc_iterator(arr.end(), arr.end())); +} + +} // namespace lld::elf + +#endif diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 89e4dbeed3109..3fc3e3f16e9e0 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -24,6 +24,7 @@ //===----------------------------------------------------------------------===// #include "Target.h" +#include "Arch/RISCVInternalRelocations.h" #include "InputFiles.h" #include "OutputSections.h" #include "RelocScan.h" @@ -40,6 +41,14 @@ using namespace lld::elf; std::string elf::toStr(Ctx &ctx, RelType type) { StringRef s = getELFRelocationTypeName(ctx.arg.emachine, type); + if (ctx.arg.emachine == EM_RISCV && s == "Unknown") { + auto VendorString = getRISCVVendorString(type); + if (VendorString) + s = getRISCVVendorRelocationTypeName(type & ~INTERNAL_RISCV_VENDOR_MASK, + *VendorString); + if (s == "Unknown") + return ("Unknown vendor-specific (" + Twine(type) + ")").str(); + } if (s == "Unknown") return ("Unknown (" + Twine(type) + ")").str(); return std::string(s); diff --git a/lld/test/ELF/riscv-vendor-relocations.s b/lld/test/ELF/riscv-vendor-relocations.s index b0f3c4a30d060..f121adec95cd0 100644 --- a/lld/test/ELF/riscv-vendor-relocations.s +++ b/lld/test/ELF/riscv-vendor-relocations.s @@ -8,12 +8,19 @@ TARGET: nop -.global INVALID_VENDOR +.local INVALID_VENDOR +.local QUALCOMM +.local ANDES .reloc 1f, R_RISCV_VENDOR, INVALID_VENDOR+0 .reloc 1f, R_RISCV_VENDOR, INVALID_VENDOR+0 .reloc 1f, R_RISCV_CUSTOM255, TARGET -1: - nop - # CHECK: error: {{.*}}:(.text+0x4): malformed consecutive R_RISCV_VENDOR relocations # CHECK: error: {{.*}}:(.text+0x4): unknown vendor-specific relocation (255) in namespace 'INVALID_VENDOR' against symbol 'TARGET' +.reloc 1f, R_RISCV_VENDOR, QUALCOMM+0 +.reloc 1f, R_RISCV_CUSTOM192, TARGET +# CHECK: error: {{.*}}:(.text+0x4): unsupported vendor-specific relocation R_RISCV_QC_ABS20_U against symbol TARGET +.reloc 1f, R_RISCV_VENDOR, ANDES+0 +.reloc 1f, R_RISCV_CUSTOM241, TARGET +# CHECK: error: {{.*}}:(.text+0x4): unsupported vendor-specific relocation R_RISCV_NDS_BRANCH_10 against symbol TARGET +1: + nop diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst index ad544342de329..b90f313f70cb2 100644 --- a/llvm/docs/GettingInvolved.rst +++ b/llvm/docs/GettingInvolved.rst @@ -209,7 +209,7 @@ what to add to your calendar invite. - `ics `__ - `Meeting details/agenda: `__ * - `LLVM Qualification Working Group `__ - - 1st Tuesday/Wednesday of the month + - 1st Tuesday of the month - `ics `__ `gcal `__ - `Minutes/docs `__ diff --git a/llvm/docs/QualGroup.rst b/llvm/docs/QualGroup.rst index 01c1f6f9d0032..1c065f69ef613 100644 --- a/llvm/docs/QualGroup.rst +++ b/llvm/docs/QualGroup.rst @@ -241,15 +241,8 @@ Agendas, meeting notes, and presentation slides for the sync-ups are shared to e Upcoming and past meeting agendas, and meeting minutes are published in a dedicated thread on the LLVM Discourse forum: `Meeting Agendas and Minutes `_ -Slides used to support discussions during sync-up meetings are stored in LLVM's GitHub repository. - -Available slides: - -* (add future entries here) -* `October 2025 `_ -* `September 2025 `_ -* `August 2025 `_ -* `July 2025 `_ +Slides used to support discussions during sync-up meetings are stored in a dedicated Google Drive folder: `Link `_. +Note that the naming convention for these slides is *YYYYMM*\_llvm_qual_wg. AI Transcription Policy ======================= diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 561a9c51b9cc2..b07f95018ca90 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -365,6 +365,10 @@ struct PPCOperand : public MCParsedAsmOperand { bool isS16ImmX4() const { return isExtImm<16>(/*Signed*/ true, 4); } bool isS16ImmX16() const { return isExtImm<16>(/*Signed*/ true, 16); } bool isS17Imm() const { return isExtImm<17>(/*Signed*/ true, 1); } + bool isS32Imm() const { + // TODO: Is ContextImmediate needed? + return Kind == Expression || isSImm<32>(); + } bool isS34Imm() const { // Once the PC-Rel ABI is finalized, evaluate whether a 34-bit // ContextImmediate is needed. diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 04b886ae74993..558351b515a2e 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -47,6 +47,9 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { case PPC::fixup_ppc_half16ds: case PPC::fixup_ppc_half16dq: return Value & 0xfffc; + case PPC::fixup_ppc_pcrel32: + case PPC::fixup_ppc_imm32: + return Value & 0xffffffff; case PPC::fixup_ppc_pcrel34: case PPC::fixup_ppc_imm34: return Value & 0x3ffffffff; @@ -71,6 +74,8 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case PPC::fixup_ppc_br24abs: case PPC::fixup_ppc_br24_notoc: return 4; + case PPC::fixup_ppc_pcrel32: + case PPC::fixup_ppc_imm32: case PPC::fixup_ppc_pcrel34: case PPC::fixup_ppc_imm34: case FK_Data_8: @@ -154,6 +159,8 @@ MCFixupKindInfo PPCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_ppc_brcond14abs", 16, 14, 0}, {"fixup_ppc_half16", 0, 16, 0}, {"fixup_ppc_half16ds", 0, 14, 0}, + {"fixup_ppc_pcrel32", 0, 32, 0}, + {"fixup_ppc_imm32", 0, 32, 0}, {"fixup_ppc_pcrel34", 0, 34, 0}, {"fixup_ppc_imm34", 0, 34, 0}, {"fixup_ppc_nofixup", 0, 0, 0}}; @@ -166,6 +173,8 @@ MCFixupKindInfo PPCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_ppc_brcond14abs", 2, 14, 0}, {"fixup_ppc_half16", 0, 16, 0}, {"fixup_ppc_half16ds", 2, 14, 0}, + {"fixup_ppc_pcrel32", 0, 32, 0}, + {"fixup_ppc_imm32", 0, 32, 0}, {"fixup_ppc_pcrel34", 0, 34, 0}, {"fixup_ppc_imm34", 0, 34, 0}, {"fixup_ppc_nofixup", 0, 0, 0}}; diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index df0c666f5b113..4164b697649cd 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -40,6 +40,12 @@ enum Fixups { /// instrs like 'std'. fixup_ppc_half16ds, + // A 32-bit fixup corresponding to PC-relative paddis. + fixup_ppc_pcrel32, + + // A 32-bit fixup corresponding to Non-PC-relative paddis. + fixup_ppc_imm32, + // A 34-bit fixup corresponding to PC-relative paddi. fixup_ppc_pcrel34, diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp index a2f981e861511..46d6093be3c17 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp @@ -430,6 +430,17 @@ void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo, printOperand(MI, OpNo, STI, O); } +void PPCInstPrinter::printS32ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) { + long long Value = MI->getOperand(OpNo).getImm(); + assert(isInt<32>(Value) && "Invalid s32imm argument!"); + O << (long long)Value; + } else + printOperand(MI, OpNo, STI, O); +} + void PPCInstPrinter::printS34ImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h index 01ff6255f2a03..2fbd06c5a96cf 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h @@ -80,6 +80,8 @@ class PPCInstPrinter : public MCInstPrinter { const MCSubtargetInfo &STI, raw_ostream &O); void printS16ImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printS32ImmOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printS34ImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printU16ImmOperand(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 81d8e94b660d7..b28304b07e1a3 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -46,6 +46,7 @@ static void addFixup(SmallVectorImpl &Fixups, uint32_t Offset, case PPC::fixup_ppc_br24_notoc: case PPC::fixup_ppc_brcond14: case PPC::fixup_ppc_pcrel34: + case PPC::fixup_ppc_pcrel32: PCRel = true; } Fixups.push_back(MCFixup::create(Offset, Value, Kind, PCRel)); diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td index e417ffe6d3677..39e6f4f139c11 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -312,9 +312,41 @@ class 8RR_XX4Form_XTABC6_P opcode, dag OOL, dag IOL, string asmstr, let Inst{63} = XT{5}; } +class MLS_DForm_R_SI32_RTA5 opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<5> RA; + bits<32> SI; + + let Pattern = pattern; + + // The prefix. + let Inst{6...7} = 2; + let Inst{8} = 0; + let Inst{11} = PCRel; + let Inst{16...31} = SI{31...16}; + + // The instruction. + let Inst{38...42} = RT; + let Inst{43...47} = RA; + let Inst{48...63} = SI{15...0}; +} + +multiclass MLS_DForm_R_SI32_RTA5_p opcode, dag OOL, dag IOL, + dag PCRel_IOL, string asmstr, + InstrItinClass itin> { + def NAME : MLS_DForm_R_SI32_RTA5; + def pc : MLS_DForm_R_SI32_RTA5, + isPCRel; +} + //-------------------------- Instruction definitions -------------------------// // Predicate combinations available: // [IsISAFuture] +// [IsISAFuture, PrefixInstrs] // [HasVSX, IsISAFuture] // [HasVSX, PrefixInstrs, IsISAFuture] @@ -346,6 +378,18 @@ let Predicates = [IsISAFuture] in { } } +let Predicates = [IsISAFuture, PrefixInstrs] in { + defm PADDIS : MLS_DForm_R_SI32_RTA5_p<15, (outs gprc:$RT), + (ins gprc_nor0:$RA, s32imm:$SI), + (ins immZero:$RA, s32imm_pcrel:$SI), + "paddis $RT, $RA, $SI", IIC_LdStLFD>; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in + defm PADDIS8 : MLS_DForm_R_SI32_RTA5_p<15, (outs g8rc:$RT), + (ins g8rc_nox0:$RA, s32imm:$SI), + (ins immZero:$RA, s32imm_pcrel:$SI), + "paddis $RT, $RA, $SI", IIC_LdStLFD>; +} + let Predicates = [HasVSX, IsISAFuture] in { let mayLoad = 1 in { def LXVRL : XX1Form_memOp<31, 525, (outs vsrc:$XT), diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index d6b13680a057e..e23914a050359 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -808,6 +808,25 @@ def s17imm64 : Operand { let DecoderMethod = "decodeSImmOperand<16>"; let OperandType = "OPERAND_IMMEDIATE"; } +def PPCS32ImmAsmOperand : AsmOperandClass { + let Name = "S32Imm"; + let PredicateMethod = "isS32Imm"; + let RenderMethod = "addImmOperands"; +} +def s32imm : Operand { + let PrintMethod = "printS32ImmOperand"; + let EncoderMethod = "getImmEncoding"; + let ParserMatchClass = PPCS32ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<32>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def s32imm_pcrel : Operand { + let PrintMethod = "printS32ImmOperand"; + let EncoderMethod = "getImmEncoding"; + let ParserMatchClass = PPCS32ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<32>"; + let OperandType = "OPERAND_IMMEDIATE"; +} def PPCS34ImmAsmOperand : AsmOperandClass { let Name = "S34Imm"; let PredicateMethod = "isS34Imm"; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index cf6e7315114dc..33eee8e059486 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5886,6 +5886,12 @@ static void collectOffsetOp(Value *V, SmallVectorImpl &Offsets, Offsets.emplace_back(Instruction::Xor, Inst->getOperand(1)); Offsets.emplace_back(Instruction::Xor, Inst->getOperand(0)); break; + case Instruction::Shl: + if (Inst->hasNoSignedWrap()) + Offsets.emplace_back(Instruction::AShr, Inst->getOperand(1)); + if (Inst->hasNoUnsignedWrap()) + Offsets.emplace_back(Instruction::LShr, Inst->getOperand(1)); + break; case Instruction::Select: if (AllowRecursion) { collectOffsetOp(Inst->getOperand(1), Offsets, /*AllowRecursion=*/false); @@ -5942,9 +5948,31 @@ static Instruction *foldICmpEqualityWithOffset(ICmpInst &I, collectOffsetOp(Op1, OffsetOps, /*AllowRecursion=*/true); auto ApplyOffsetImpl = [&](Value *V, unsigned BinOpc, Value *RHS) -> Value * { + switch (BinOpc) { + // V = shl nsw X, RHS => X = ashr V, RHS + case Instruction::AShr: { + const APInt *CV, *CRHS; + if (!(match(V, m_APInt(CV)) && match(RHS, m_APInt(CRHS)) && + CV->ashr(*CRHS).shl(*CRHS) == *CV) && + !match(V, m_NSWShl(m_Value(), m_Specific(RHS)))) + return nullptr; + break; + } + // V = shl nuw X, RHS => X = lshr V, RHS + case Instruction::LShr: { + const APInt *CV, *CRHS; + if (!(match(V, m_APInt(CV)) && match(RHS, m_APInt(CRHS)) && + CV->lshr(*CRHS).shl(*CRHS) == *CV) && + !match(V, m_NUWShl(m_Value(), m_Specific(RHS)))) + return nullptr; + break; + } + default: + break; + } + Value *Simplified = simplifyBinOp(BinOpc, V, RHS, SQ); - // Avoid infinite loops by checking if RHS is an identity for the BinOp. - if (!Simplified || Simplified == V) + if (!Simplified) return nullptr; // Reject constant expressions as they don't simplify things. if (isa(Simplified) && !match(Simplified, m_ImmConstant())) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 5fbd61a929fe2..92969c8ed9ec0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -845,23 +845,13 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { if (!MinMaxR) return nullptr; - auto *RepR = dyn_cast(MinMaxR); - if (!isa(MinMaxR) && - !(RepR && isa(RepR->getUnderlyingInstr()))) + // Check that MinMaxR is a VPWidenIntrinsicRecipe or VPReplicateRecipe + // with an intrinsic that matches the reduction kind. + Intrinsic::ID ExpectedIntrinsicID = + getMinMaxReductionIntrinsicOp(RedPhiR->getRecurrenceKind()); + if (!match(MinMaxR, m_Intrinsic(ExpectedIntrinsicID))) return nullptr; -#ifndef NDEBUG - Intrinsic::ID RdxIntrinsicId = - RedPhiR->getRecurrenceKind() == RecurKind::FMaxNum ? Intrinsic::maxnum - : Intrinsic::minnum; - assert(((isa(MinMaxR) && - cast(MinMaxR)->getVectorIntrinsicID() == - RdxIntrinsicId) || - (RepR && cast(RepR->getUnderlyingInstr()) - ->getIntrinsicID() == RdxIntrinsicId)) && - "Intrinsic did not match recurrence kind"); -#endif - if (MinMaxR->getOperand(0) == RedPhiR) return MinMaxR->getOperand(1); diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 79275e7bffdd0..07dfe31eea46d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -856,6 +856,11 @@ template inline IntrinsicID_match m_Intrinsic() { return IntrinsicID_match(IntrID); } +/// Match intrinsic calls with a runtime intrinsic ID. +inline IntrinsicID_match m_Intrinsic(Intrinsic::ID IntrID) { + return IntrinsicID_match(IntrID); +} + template inline typename m_Intrinsic_Ty::Ty m_Intrinsic(const T0 &Op0) { return m_CombineAnd(m_Intrinsic(), m_Argument<0>(Op0)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 4b7f90118374b..67aca48bcaf8f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -703,6 +703,23 @@ static SmallVector collectUsersRecursively(VPValue *V) { return Users.takeVector(); } +/// Scalarize a VPWidenPointerInductionRecipe by replacing it with a PtrAdd +/// (IndStart, ScalarIVSteps (0, Step)). This is used when the recipe only +/// generates scalar values. +static VPValue * +scalarizeVPWidenPointerInduction(VPWidenPointerInductionRecipe *PtrIV, + VPlan &Plan, VPBuilder &Builder) { + const InductionDescriptor &ID = PtrIV->getInductionDescriptor(); + VPValue *StartV = Plan.getConstantInt(ID.getStep()->getType(), 0); + VPValue *StepV = PtrIV->getOperand(1); + VPScalarIVStepsRecipe *Steps = createScalarIVSteps( + Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr, + nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder); + + return Builder.createPtrAdd(PtrIV->getStartValue(), Steps, + PtrIV->getDebugLoc(), "next.gep"); +} + /// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd /// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as /// VPWidenPointerInductionRecipe will generate vectors only. If some users @@ -755,16 +772,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF())) continue; - const InductionDescriptor &ID = PtrIV->getInductionDescriptor(); - VPValue *StartV = Plan.getConstantInt(ID.getStep()->getType(), 0); - VPValue *StepV = PtrIV->getOperand(1); - VPScalarIVStepsRecipe *Steps = createScalarIVSteps( - Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr, - nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder); - - VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps, - PtrIV->getDebugLoc(), "next.gep"); - + VPValue *PtrAdd = scalarizeVPWidenPointerInduction(PtrIV, Plan, Builder); PtrIV->replaceAllUsesWith(PtrAdd); continue; } @@ -3575,6 +3583,16 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) { } if (auto *WidenIVR = dyn_cast(&R)) { + // If the recipe only generates scalars, scalarize it instead of + // expanding it. + if (WidenIVR->onlyScalarsGenerated(Plan.hasScalableVF())) { + VPBuilder Builder(WidenIVR); + VPValue *PtrAdd = + scalarizeVPWidenPointerInduction(WidenIVR, Plan, Builder); + WidenIVR->replaceAllUsesWith(PtrAdd); + ToRemove.push_back(WidenIVR); + continue; + } expandVPWidenPointerInduction(WidenIVR, TypeInfo); ToRemove.push_back(WidenIVR); continue; diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll index 97896e598de90..8fba0a4187e81 100644 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll @@ -17,7 +17,8 @@ define void @f(ptr %dst) { entry: ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0) - ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16 + ; CHECK: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16 + ; CHECK-COUNT-2: load float, ptr addrspace(2) [[GEP]] %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4 store float %a1, ptr %dst, align 32 @@ -30,15 +31,17 @@ entry: ; CHECK: define void @g define void @g(ptr %dst) { entry: - ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0) - ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16 + ; CHECK: [[PTR1:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0) + ; CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR1]], i32 16 + ; CHECK: load float, ptr addrspace(2) [[GEP1]] %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4 store float %a1, ptr %dst, align 32 br label %next next: - ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0) - ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16 + ; CHECK: [[PTR2:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0) + ; CHECK: [[GEP2:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR2]], i32 16 + ; CHECK: load float, ptr addrspace(2) [[GEP2]] %a2 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4 store float %a2, ptr %dst, align 32 diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt index b27a50d93f5b9..1024c6b546c4a 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt @@ -298,6 +298,12 @@ #CHECK: mtlpl 3, 4 0x7c,0x80,0x1a,0x26 +#CHECK: paddis 10, 12, 1000000000, 0 +0x06,0x00,0x3b,0x9a,0x3d,0x4c,0xca,0x00 + +#CHECK: paddis 10, 0, 1000000000, 1 +0x06,0x10,0x3b,0x9a,0x3d,0x40,0xca,0x00 + #CHECK: xxmulmul 8, 3, 4, 2 0xed,0x03,0x22,0x08 diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt index 72662d9736740..bda8d1e69442f 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt @@ -292,6 +292,12 @@ #CHECK: mtlpl 3, 4 0x26,0x1a,0x80,0x7c +#CHECK: paddis 10, 12, 1000000000, 0 +0x9a,0x3b,0x00,0x06,0x00,0xca,0x4c,0x3d + +#CHECK: paddis 10, 0, 1000000000, 1 +0x9a,0x3b,0x10,0x06,0x00,0xca,0x40,0x3d + #CHECK: xxmulmul 8, 3, 4, 2 0x08,0x22,0x03,0xed diff --git a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s index ab72649fc3404..eb616a15500f1 100644 --- a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s +++ b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s @@ -419,6 +419,18 @@ #CHECK-BE: mtlpl 3, 4 # encoding: [0x7c,0x80,0x1a,0x26] #CHECK-LE: mtlpl 3, 4 # encoding: [0x26,0x1a,0x80,0x7c] + paddis 10, 12, 1000000000, 0 +#CHECK-BE: paddis 10, 12, 1000000000, 0 # encoding: [0x06,0x00,0x3b,0x9a, +#CHECK-BE-SAME: 0x3d,0x4c,0xca,0x00] +#CHECK-LE: paddis 10, 12, 1000000000, 0 # encoding: [0x9a,0x3b,0x00,0x06, +#CHECK-LE-SAME: 0x00,0xca,0x4c,0x3d] + + paddis 10, 0, 1000000000, 1 +#CHECK-BE: paddis 10, 0, 1000000000, 1 # encoding: [0x06,0x10,0x3b,0x9a, +#CHECK-BE-SAME: 0x3d,0x40,0xca,0x00] +#CHECK-LE: paddis 10, 0, 1000000000, 1 # encoding: [0x9a,0x3b,0x10,0x06, +#CHECK-LE-SAME: 0x00,0xca,0x40,0x3d] + xxmulmul 8, 3, 4, 2 #CHECK-BE: xxmulmul 8, 3, 4, 2 # encoding: [0xed,0x03,0x22,0x08] #CHECK-LE: xxmulmul 8, 3, 4, 2 # encoding: [0x08,0x22,0x03,0xed] diff --git a/llvm/test/MC/PowerPC/ppc64-errors.s b/llvm/test/MC/PowerPC/ppc64-errors.s index 8598174300e42..4d4da58f650fe 100644 --- a/llvm/test/MC/PowerPC/ppc64-errors.s +++ b/llvm/test/MC/PowerPC/ppc64-errors.s @@ -4,6 +4,11 @@ # RUN: not llvm-mc -triple powerpc64le-unknown-unknown < %s 2> %t # RUN: FileCheck < %t %s +# From ISAFuture + +# CHECK: error: invalid operand for instruction +paddis 10, 5, 1000000000, 1 + # From ISA31 # CHECK: error: invalid operand for instruction diff --git a/llvm/test/Transforms/InstCombine/icmp-select.ll b/llvm/test/Transforms/InstCombine/icmp-select.ll index c6c0ba385a6fd..c29527a3c3c5e 100644 --- a/llvm/test/Transforms/InstCombine/icmp-select.ll +++ b/llvm/test/Transforms/InstCombine/icmp-select.ll @@ -835,3 +835,120 @@ define i1 @discr_eq_constantexpr(ptr %p) { %cmp = icmp eq i64 %sub, -1 ret i1 %cmp } + +define i1 @shl_nsw_eq_simplify_zero_to_self(i8 %a, i1 %cond) { +; CHECK-LABEL: @shl_nsw_eq_simplify_zero_to_self( +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[COND:%.*]] to i8 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[A:%.*]], [[TMP1]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %shl = shl nsw i8 %a, 3 + %sel = select i1 %cond, i8 8, i8 0 + %cmp = icmp eq i8 %shl, %sel + ret i1 %cmp +} + +define i1 @shl_nsw_eq(i8 %a, i1 %cond) { +; CHECK-LABEL: @shl_nsw_eq( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[COND:%.*]], i8 1, i8 -15 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[A:%.*]], [[TMP1]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %shl = shl nsw i8 %a, 3 + %sel = select i1 %cond, i8 8, i8 -120 + %cmp = icmp eq i8 %shl, %sel + ret i1 %cmp +} + +define i1 @shl_nuw_eq(i8 %a, i1 %cond) { +; CHECK-LABEL: @shl_nuw_eq( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[COND:%.*]], i8 1, i8 17 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[A:%.*]], [[TMP1]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %shl = shl nuw i8 %a, 3 + %sel = select i1 %cond, i8 8, i8 -120 + %cmp = icmp eq i8 %shl, %sel + ret i1 %cmp +} + +define i1 @shl_nsw_failed_to_simplify(i8 %a, i1 %cond) { +; CHECK-LABEL: @shl_nsw_failed_to_simplify( +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[A:%.*]], 1 +; CHECK-NEXT: [[NOT_COND:%.*]] = xor i1 [[COND:%.*]], true +; CHECK-NEXT: [[CMP:%.*]] = select i1 [[NOT_COND]], i1 [[CMP1]], i1 false +; CHECK-NEXT: ret i1 [[CMP]] +; + %shl = shl nsw i8 %a, 4 + %sel = select i1 %cond, i8 8, i8 16 + %cmp = icmp eq i8 %shl, %sel + ret i1 %cmp +} + +define i1 @shl_nuw_failed_to_simplify(i8 %a, i1 %cond) { +; CHECK-LABEL: @shl_nuw_failed_to_simplify( +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[A:%.*]], 4 +; CHECK-NEXT: [[NOT_COND:%.*]] = xor i1 [[COND:%.*]], true +; CHECK-NEXT: [[CMP:%.*]] = select i1 [[NOT_COND]], i1 [[CMP1]], i1 false +; CHECK-NEXT: ret i1 [[CMP]] +; + %shl = shl nuw i8 %a, 3 + %sel = select i1 %cond, i8 -1, i8 32 + %cmp = icmp eq i8 %shl, %sel + ret i1 %cmp +} + +define i1 @shl_failed_to_simplify(i8 %a, i1 %cond) { +; CHECK-LABEL: @shl_failed_to_simplify( +; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[A:%.*]], 3 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], i8 8, i8 32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[SHL]], [[SEL]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %shl = shl i8 %a, 3 + %sel = select i1 %cond, i8 8, i8 32 + %cmp = icmp eq i8 %shl, %sel + ret i1 %cmp +} + +define i1 @shl_nuw_ne(i8 %a, i8 %b, i8 %c, i1 %cond) { +; CHECK-LABEL: @shl_nuw_ne( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[COND:%.*]], i8 [[B:%.*]], i8 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[TMP1]], [[A:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %shl_a = shl nuw i8 %a, 3 + %shl_b = shl nuw i8 %b, 3 + %sel = select i1 %cond, i8 %shl_b, i8 32 + %cmp = icmp ne i8 %sel, %shl_a + ret i1 %cmp +} + +define i1 @shl_const_phi_failed_to_simplify(i64 %indvars, i32 %conv) { +; CHECK-LABEL: @shl_const_phi_failed_to_simplify( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i64 [[INDVARS:%.*]], 1 +; CHECK-NEXT: br i1 [[CMP_SLT]], label [[END:%.*]], label [[THEN:%.*]] +; CHECK: then: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[CONST_PHI:%.*]] = phi i32 [ 0, [[THEN]] ], [ 65535, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SHL_NUW:%.*]] = shl nuw i32 [[CONV:%.*]], 31 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP_SLT]], i32 [[CONST_PHI]], i32 [[SHL_NUW]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SEL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %cmp_slt = icmp slt i64 %indvars, 1 + br i1 %cmp_slt, label %end, label %then + +then: + br label %end + +end: + %const_phi = phi i32 [ 0, %then ], [ 65535, %entry ] + %shl_nuw = shl nuw i32 %conv, 31 + %sel = select i1 %cmp_slt, i32 %const_phi, i32 %shl_nuw + %cmp = icmp eq i32 %sel, 0 + ret i1 %cmp +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll index fa710cb8d65b1..164a5cd1ae3c0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll @@ -61,4 +61,104 @@ exit: ret void } +define i1 @scalarize_ptr_induction(ptr %start, ptr %end, ptr noalias %dst, i1 %c) #1 { +; CHECK-LABEL: define i1 @scalarize_ptr_induction( +; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]], ptr noalias [[DST:%.*]], i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[START5:%.*]] = ptrtoint ptr [[START]] to i64 +; CHECK-NEXT: [[END4:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64 +; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END4]], -12 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START5]] +; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP1]], 12 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[END1]], -12 +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], [[START2]] +; CHECK-NEXT: [[TMP8:%.*]] = udiv i64 [[TMP7]], 12 +; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 12 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 8 +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP10]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP3]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[START]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[DST]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement poison, ptr [[END]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector [[BROADCAST_SPLATINSERT6]], poison, zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP3]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP13:%.*]] = call @llvm.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul [[TMP13]], splat (i64 12) +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP14]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, [[VECTOR_GEP]], i64 4 +; CHECK-NEXT: [[TMP18:%.*]] = call @llvm.vp.gather.nxv2i32.nxv2p0( align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]] +; CHECK-NEXT: [[TMP19:%.*]] = zext [[TMP18]] to +; CHECK-NEXT: [[TMP20:%.*]] = mul [[TMP19]], splat (i64 -7070675565921424023) +; CHECK-NEXT: [[TMP21:%.*]] = add [[TMP20]], splat (i64 -4) +; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[TMP21]], align 1 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP11]]), !alias.scope [[META6:![0-9]+]], !noalias [[META3]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr nusw i8, [[VECTOR_GEP]], i64 12 +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq [[TMP16]], [[BROADCAST_SPLAT7]] +; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = mul i64 12, [[TMP26]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP29:%.*]] = sub i64 [[TMP26]], 1 +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 2 +; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement [[TMP17]], i64 [[TMP29]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR_IV]], i64 4 +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: [[EXT:%.*]] = zext i32 [[L]] to i64 +; CHECK-NEXT: [[UNUSED:%.*]] = load i32, ptr [[PTR_IV]], align 4 +; CHECK-NEXT: [[MUL1:%.*]] = mul i64 [[EXT]], -7070675565921424023 +; CHECK-NEXT: [[MUL2:%.*]] = add i64 [[MUL1]], -4 +; CHECK-NEXT: store i64 [[MUL2]], ptr [[DST]], align 1 +; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr nusw i8, ptr [[PTR_IV]], i64 12 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CMP_LCSSA:%.*]] = phi i1 [ [[CMP]], %[[LOOP]] ], [ [[TMP25]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i1 [[CMP_LCSSA]] +; +entry: + br label %loop + +loop: + %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop ] + %gep = getelementptr i8, ptr %ptr.iv, i64 4 + %l = load i32, ptr %gep, align 4 + %ext = zext i32 %l to i64 + %unused = load i32, ptr %ptr.iv, align 4 + %mul1 = mul i64 %ext, -7070675565921424023 + %mul2 = add i64 %mul1, -4 + store i64 %mul2, ptr %dst, align 1 + %ptr.iv.next = getelementptr nusw i8, ptr %ptr.iv, i64 12 + %cmp = icmp eq ptr %ptr.iv.next, %end + %or.cond = select i1 %cmp, i1 true, i1 false + br i1 %or.cond, label %exit, label %loop + +exit: + ret i1 %cmp +} + attributes #0 = { "target-features"="+v" } +attributes #1 = { "target-cpu"="sifive-p670" } diff --git a/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h b/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h index 47b8381eefda8..32dd8ba2bc391 100644 --- a/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h @@ -60,6 +60,12 @@ LogicalResult vectorOneToOneRewrite(Operation *op, StringRef targetOp, Attribute propertiesAttr, const LLVMTypeConverter &typeConverter, ConversionPatternRewriter &rewriter); + +/// Return "true" if the given type is an unsupported floating point type. In +/// case of a vector type, return "true" if the element type is an unsupported +/// floating point type. +bool isUnsupportedFloatingPointType(const TypeConverter &typeConverter, + Type type); } // namespace detail } // namespace LLVM @@ -97,16 +103,6 @@ class VectorConvertToLLVMPattern : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; using Super = VectorConvertToLLVMPattern; - /// Return the given type if it's a floating point type. If the given type is - /// a vector type, return its element type if it's a floating point type. - static FloatType getFloatingPointType(Type type) { - if (auto floatType = dyn_cast(type)) - return floatType; - if (auto vecType = dyn_cast(type)) - return dyn_cast(vecType.getElementType()); - return nullptr; - } - LogicalResult matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override { @@ -114,26 +110,18 @@ class VectorConvertToLLVMPattern : public ConvertOpToLLVMPattern { std::is_base_of, SourceOp>::value, "expected single result op"); - // The pattern should not apply if a floating-point operand is converted to - // a non-floating-point type. This indicates that the floating point type - // is not supported by the LLVM lowering. (Such types are converted to - // integers.) - auto checkType = [&](Value v) -> LogicalResult { - FloatType floatType = getFloatingPointType(v.getType()); - if (!floatType) - return success(); - Type convertedType = this->getTypeConverter()->convertType(floatType); - if (!isa_and_nonnull(convertedType)) - return rewriter.notifyMatchFailure(op, - "unsupported floating point type"); - return success(); - }; + // Bail on unsupported floating point types. (These are type-converted to + // integer types.) if (FailOnUnsupportedFP) { for (Value operand : op->getOperands()) - if (failed(checkType(operand))) - return failure(); - if (failed(checkType(op->getResult(0)))) - return failure(); + if (LLVM::detail::isUnsupportedFloatingPointType( + *this->getTypeConverter(), operand.getType())) + return rewriter.notifyMatchFailure(op, + "unsupported floating point type"); + if (LLVM::detail::isUnsupportedFloatingPointType( + *this->getTypeConverter(), op->getResult(0).getType())) + return rewriter.notifyMatchFailure(op, + "unsupported floating point type"); } // Determine attributes for the target op diff --git a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp index cc3e8468f298b..220826dc5f3ac 100644 --- a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp +++ b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp @@ -483,6 +483,10 @@ CmpIOpLowering::matchAndRewrite(arith::CmpIOp op, OpAdaptor adaptor, LogicalResult CmpFOpLowering::matchAndRewrite(arith::CmpFOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const { + if (LLVM::detail::isUnsupportedFloatingPointType(*this->getTypeConverter(), + op.getLhs().getType())) + return rewriter.notifyMatchFailure(op, "unsupported floating point type"); + Type operandType = adaptor.getLhs().getType(); Type resultType = op.getResult().getType(); LLVM::FastmathFlags fmf = diff --git a/mlir/lib/Conversion/LLVMCommon/VectorPattern.cpp b/mlir/lib/Conversion/LLVMCommon/VectorPattern.cpp index 24b01259f0499..e5969c2539566 100644 --- a/mlir/lib/Conversion/LLVMCommon/VectorPattern.cpp +++ b/mlir/lib/Conversion/LLVMCommon/VectorPattern.cpp @@ -130,3 +130,24 @@ LogicalResult LLVM::detail::vectorOneToOneRewrite( return handleMultidimensionalVectors(op, operands, typeConverter, callback, rewriter); } + +/// Return the given type if it's a floating point type. If the given type is +/// a vector type, return its element type if it's a floating point type. +static FloatType getFloatingPointType(Type type) { + if (auto floatType = dyn_cast(type)) + return floatType; + if (auto vecType = dyn_cast(type)) + return dyn_cast(vecType.getElementType()); + return nullptr; +} + +bool LLVM::detail::isUnsupportedFloatingPointType( + const TypeConverter &typeConverter, Type type) { + FloatType floatType = getFloatingPointType(type); + if (!floatType) + return false; + Type convertedType = typeConverter.convertType(floatType); + if (!convertedType) + return true; + return !isa(convertedType); +} diff --git a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir index 6fdc1104d2609..b53c52d75c0aa 100644 --- a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir +++ b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir @@ -770,12 +770,14 @@ func.func @memref_bitcast(%1: memref) -> memref { // CHECK: arith.addf {{.*}} : f4E2M1FN // CHECK: arith.addf {{.*}} : vector<4xf4E2M1FN> // CHECK: arith.addf {{.*}} : vector<8x4xf4E2M1FN> +// CHECK: arith.cmpf {{.*}} : f4E2M1FN // CHECK: llvm.select {{.*}} : i1, i4 func.func @unsupported_fp_type(%arg0: f4E2M1FN, %arg1: vector<4xf4E2M1FN>, %arg2: vector<8x4xf4E2M1FN>, %arg3: f4E2M1FN, %arg4: i1) { %0 = arith.addf %arg0, %arg0 : f4E2M1FN %1 = arith.addf %arg1, %arg1 : vector<4xf4E2M1FN> %2 = arith.addf %arg2, %arg2 : vector<8x4xf4E2M1FN> - %3 = arith.select %arg4, %arg0, %arg3 : f4E2M1FN + %3 = arith.cmpf oeq, %arg0, %arg3 : f4E2M1FN + %4 = arith.select %arg4, %arg0, %arg3 : f4E2M1FN return } @@ -785,9 +787,11 @@ func.func @unsupported_fp_type(%arg0: f4E2M1FN, %arg1: vector<4xf4E2M1FN>, %arg2 // CHECK: llvm.fadd {{.*}} : f32 // CHECK: llvm.fadd {{.*}} : vector<4xf32> // CHECK-COUNT-4: llvm.fadd {{.*}} : vector<8xf32> -func.func @supported_fp_type(%arg0: f32, %arg1: vector<4xf32>, %arg2: vector<4x8xf32>) -> (f32, vector<4xf32>, vector<4x8xf32>) { +// CHECK: llvm.fcmp {{.*}} : f32 +func.func @supported_fp_type(%arg0: f32, %arg1: vector<4xf32>, %arg2: vector<4x8xf32>, %arg3: f32) { %0 = arith.addf %arg0, %arg0 : f32 %1 = arith.addf %arg1, %arg1 : vector<4xf32> %2 = arith.addf %arg2, %arg2 : vector<4x8xf32> - return %0, %1, %2 : f32, vector<4xf32>, vector<4x8xf32> + %3 = arith.cmpf oeq, %arg0, %arg3 : f32 + return }