diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index ea9b87952cd84..1e9d44fa37bea 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -540,43 +540,6 @@ void EhFrameSection::finalizeContents() { this->size = off; } -// Returns data for .eh_frame_hdr. .eh_frame_hdr is a binary search table -// to get an FDE from an address to which FDE is applied. This function -// returns a list of such pairs. -SmallVector EhFrameSection::getFdeData() const { - uint8_t *buf = ctx.bufferStart + getParent()->offset + outSecOff; - SmallVector ret; - - uint64_t va = getPartition(ctx).ehFrameHdr->getVA(); - for (CieRecord *rec : cieRecords) { - uint8_t enc = getFdeEncoding(rec->cie); - for (EhSectionPiece *fde : rec->fdes) { - uint64_t pc = getFdePc(buf, fde->outputOff, enc); - uint64_t fdeVA = getParent()->addr + fde->outputOff; - if (!isInt<32>(pc - va)) { - Err(ctx) << fde->sec << ": PC offset is too large: 0x" - << Twine::utohexstr(pc - va); - continue; - } - ret.push_back({uint32_t(pc - va), uint32_t(fdeVA - va)}); - } - } - - // Sort the FDE list by their PC and uniqueify. Usually there is only - // one FDE for a PC (i.e. function), but if ICF merges two functions - // into one, there can be more than one FDEs pointing to the address. - auto less = [](const FdeData &a, const FdeData &b) { - return a.pcRel < b.pcRel; - }; - llvm::stable_sort(ret, less); - auto eq = [](const FdeData &a, const FdeData &b) { - return a.pcRel == b.pcRel; - }; - ret.erase(llvm::unique(ret, eq), ret.end()); - - return ret; -} - static uint64_t readFdeAddr(Ctx &ctx, uint8_t *buf, int size) { switch (size) { case DW_EH_PE_udata2: @@ -630,14 +593,79 @@ void EhFrameSection::writeTo(uint8_t *buf) { } } - // Apply relocations. .eh_frame section contents are not contiguous - // in the output buffer, but relocateAlloc() still works because - // getOffset() takes care of discontiguous section pieces. + // Apply relocations to .eh_frame entries. This includes CIE personality + // pointers, FDE initial_location fields, and LSDA pointers. for (EhInputSection *s : sections) ctx.target->relocateEh(*s, buf); - if (getPartition(ctx).ehFrameHdr && getPartition(ctx).ehFrameHdr->getParent()) - getPartition(ctx).ehFrameHdr->write(); + EhFrameHeader *hdr = getPartition(ctx).ehFrameHdr.get(); + if (!hdr || !hdr->getParent()) + return; + + // Write the .eh_frame_hdr section, which contains a binary search table of + // pointers to FDEs. This must be written after .eh_frame relocation since + // the content depends on relocated initial_location fields in FDEs. + using FdeData = EhFrameSection::FdeData; + SmallVector fdes; + uint64_t va = hdr->getVA(); + for (CieRecord *rec : cieRecords) { + uint8_t enc = getFdeEncoding(rec->cie); + for (EhSectionPiece *fde : rec->fdes) { + uint64_t pc = getFdePc(buf, fde->outputOff, enc); + uint64_t fdeVA = getParent()->addr + fde->outputOff; + if (!isInt<32>(pc - va)) { + Err(ctx) << fde->sec << ": PC offset is too large: 0x" + << Twine::utohexstr(pc - va); + continue; + } + fdes.push_back({uint32_t(pc - va), uint32_t(fdeVA - va)}); + } + } + + // Sort the FDE list by their PC and uniqueify. Usually there is only + // one FDE for a PC (i.e. function), but if ICF merges two functions + // into one, there can be more than one FDEs pointing to the address. + llvm::stable_sort(fdes, [](const FdeData &a, const FdeData &b) { + return a.pcRel < b.pcRel; + }); + fdes.erase( + llvm::unique(fdes, [](auto &a, auto &b) { return a.pcRel == b.pcRel; }), + fdes.end()); + + // Write header. + uint8_t *hdrBuf = ctx.bufferStart + hdr->getParent()->offset + hdr->outSecOff; + hdrBuf[0] = 1; // version + hdrBuf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; // eh_frame_ptr_enc + hdrBuf[2] = DW_EH_PE_udata4; // fde_count_enc + hdrBuf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; // table_enc + write32(ctx, hdrBuf + 4, + getParent()->addr - hdr->getVA() - 4); // eh_frame_ptr + write32(ctx, hdrBuf + 8, fdes.size()); // fde_count + hdrBuf += 12; + + // Write binary search table. Each entry describes the starting PC and the FDE + // address. + for (FdeData &fde : fdes) { + write32(ctx, hdrBuf, fde.pcRel); + write32(ctx, hdrBuf + 4, fde.fdeVARel); + hdrBuf += 8; + } +} + +EhFrameHeader::EhFrameHeader(Ctx &ctx) + : SyntheticSection(ctx, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, 4) {} + +void EhFrameHeader::writeTo(uint8_t *buf) { + // The section content is written during EhFrameSection::writeTo. +} + +size_t EhFrameHeader::getSize() const { + // .eh_frame_hdr has a 12 bytes header followed by an array of FDEs. + return 12 + getPartition(ctx).ehFrame->numFdes * 8; +} + +bool EhFrameHeader::isNeeded() const { + return isLive() && getPartition(ctx).ehFrame->isNeeded(); } GotSection::GotSection(Ctx &ctx) @@ -3658,51 +3686,6 @@ void GdbIndexSection::writeTo(uint8_t *buf) { bool GdbIndexSection::isNeeded() const { return !chunks.empty(); } -EhFrameHeader::EhFrameHeader(Ctx &ctx) - : SyntheticSection(ctx, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, 4) {} - -void EhFrameHeader::writeTo(uint8_t *buf) { - // Unlike most sections, the EhFrameHeader section is written while writing - // another section, namely EhFrameSection, which calls the write() function - // below from its writeTo() function. This is necessary because the contents - // of EhFrameHeader depend on the relocated contents of EhFrameSection and we - // don't know which order the sections will be written in. -} - -// .eh_frame_hdr contains a binary search table of pointers to FDEs. -// Each entry of the search table consists of two values, -// the starting PC from where FDEs covers, and the FDE's address. -// It is sorted by PC. -void EhFrameHeader::write() { - uint8_t *buf = ctx.bufferStart + getParent()->offset + outSecOff; - using FdeData = EhFrameSection::FdeData; - SmallVector fdes = getPartition(ctx).ehFrame->getFdeData(); - - buf[0] = 1; - buf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; - buf[2] = DW_EH_PE_udata4; - buf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; - write32(ctx, buf + 4, - getPartition(ctx).ehFrame->getParent()->addr - this->getVA() - 4); - write32(ctx, buf + 8, fdes.size()); - buf += 12; - - for (FdeData &fde : fdes) { - write32(ctx, buf, fde.pcRel); - write32(ctx, buf + 4, fde.fdeVARel); - buf += 8; - } -} - -size_t EhFrameHeader::getSize() const { - // .eh_frame_hdr has a 12 bytes header followed by an array of FDEs. - return 12 + getPartition(ctx).ehFrame->numFdes * 8; -} - -bool EhFrameHeader::isNeeded() const { - return isLive() && getPartition(ctx).ehFrame->isNeeded(); -} - VersionDefinitionSection::VersionDefinitionSection(Ctx &ctx) : SyntheticSection(ctx, ".gnu.version_d", SHT_GNU_verdef, SHF_ALLOC, sizeof(uint32_t)) {} diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 66c866d7e8cde..e01a5ad8abc60 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -68,7 +68,6 @@ class EhFrameSection final : public SyntheticSection { uint32_t fdeVARel; }; - SmallVector getFdeData() const; ArrayRef getCieRecords() const { return cieRecords; } template void iterateFDEWithLSDA(llvm::function_ref fn); @@ -95,6 +94,17 @@ class EhFrameSection final : public SyntheticSection { llvm::DenseMap, Symbol *>, CieRecord *> cieMap; }; +// .eh_frame_hdr contains a binary search table for .eh_frame FDEs. The section +// is covered by a PT_GNU_EH_FRAME segment, which allows the runtime unwinder to +// locate it via functions like `dl_iterate_phdr`. +class EhFrameHeader final : public SyntheticSection { +public: + EhFrameHeader(Ctx &); + void writeTo(uint8_t *buf) override; + size_t getSize() const override; + bool isNeeded() const override; +}; + class GotSection final : public SyntheticSection { public: GotSection(Ctx &); @@ -967,24 +977,6 @@ class GdbIndexSection final : public SyntheticSection { size_t size; }; -// --eh-frame-hdr option tells linker to construct a header for all the -// .eh_frame sections. This header is placed to a section named .eh_frame_hdr -// and also to a PT_GNU_EH_FRAME segment. -// At runtime the unwinder then can find all the PT_GNU_EH_FRAME segments by -// calling dl_iterate_phdr. -// This section contains a lookup table for quick binary search of FDEs. -// Detailed info about internals can be found in Ian Lance Taylor's blog: -// http://www.airs.com/blog/archives/460 (".eh_frame") -// http://www.airs.com/blog/archives/462 (".eh_frame_hdr") -class EhFrameHeader final : public SyntheticSection { -public: - EhFrameHeader(Ctx &); - void write(); - void writeTo(uint8_t *buf) override; - size_t getSize() const override; - bool isNeeded() const override; -}; - // For more information about .gnu.version and .gnu.version_r see: // https://www.akkadia.org/drepper/symbol-versioning diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst index aedb6643cf581..88164e6fa53d8 100644 --- a/llvm/docs/SPIRVUsage.rst +++ b/llvm/docs/SPIRVUsage.rst @@ -30,8 +30,8 @@ Static Compiler Commands Description: This command compiles an LLVM IL file (`input.ll`) to a SPIR-V binary (`output.spvt`) for a 32-bit architecture. 2. **Compilation with Extensions and Optimization** - Command: `llc -O1 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers input.ll -o output.spvt` - Description: Compiles an LLVM IL file to SPIR-V with (`-O1`) optimizations, targeting a 64-bit architecture. It enables the SPV_INTEL_arbitrary_precision_integers extension. + Command: `llc -O1 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers input.ll -o output.spvt` + Description: Compiles an LLVM IL file to SPIR-V with (`-O1`) optimizations, targeting a 64-bit architecture. It enables the SPV_ALTERA_arbitrary_precision_integers extension. 3. **Compilation with experimental NonSemantic.Shader.DebugInfo.100 support** Command: `llc --spv-emit-nonsemantic-debug-info --spirv-ext=+SPV_KHR_non_semantic_info input.ll -o output.spvt` @@ -136,7 +136,7 @@ extensions to enable or disable, each prefixed with ``+`` or ``-``, respectively To enable multiple extensions, list them separated by comma. For example, to enable support for atomic operations on floating-point numbers and arbitrary precision integers, use: -``-spirv-ext=+SPV_EXT_shader_atomic_float_add,+SPV_INTEL_arbitrary_precision_integers`` +``-spirv-ext=+SPV_EXT_shader_atomic_float_add,+SPV_ALTERA_arbitrary_precision_integers`` To enable all extensions, use the following option: ``-spirv-ext=all`` @@ -145,7 +145,7 @@ To enable all KHR extensions, use the following option: ``-spirv-ext=khr`` To enable all extensions except specified, specify ``all`` followed by a list of disallowed extensions. For example: -``-spirv-ext=all,-SPV_INTEL_arbitrary_precision_integers`` +``-spirv-ext=all,-SPV_ALTERA_arbitrary_precision_integers`` Below is a list of supported SPIR-V extensions, sorted alphabetically by their extension names: @@ -171,7 +171,7 @@ Below is a list of supported SPIR-V extensions, sorted alphabetically by their e - Extends the SPV_EXT_shader_atomic_float_add and SPV_EXT_shader_atomic_float_min_max to support addition, minimum and maximum on 16-bit `bfloat16` floating-point numbers in memory. * - ``SPV_INTEL_2d_block_io`` - Adds additional subgroup block prefetch, load, load transposed, load transformed and store instructions to read two-dimensional blocks of data from a two-dimensional region of memory, or to write two-dimensional blocks of data to a two dimensional region of memory. - * - ``SPV_INTEL_arbitrary_precision_integers`` + * - ``SPV_ALTERA_arbitrary_precision_integers`` - Allows generating arbitrary width integer types. * - ``SPV_INTEL_bindless_images`` - Adds instructions to convert convert unsigned integer handles to images, samplers and sampled images. @@ -245,6 +245,9 @@ Below is a list of supported SPIR-V extensions, sorted alphabetically by their e - Adds execution mode and capability to enable maximal reconvergence. * - ``SPV_ALTERA_blocking_pipes`` - Adds new pipe read and write functions that have blocking semantics instead of the non-blocking semantics of the existing pipe read/write functions. + * - ``SPV_ALTERA_arbitrary_precision_fixed_point`` + - Add instructions for fixed point arithmetic. The extension works without SPV_ALTERA_arbitrary_precision_integers, but together they allow greater flexibility in representing arbitrary precision data types. + SPIR-V representation in LLVM IR ================================ diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4550e40166525..a6212f5cc84be 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -9584,6 +9584,50 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget)) return V; + // When there is no cost for GPR <-> FPR, we can use zicond select for + // floating value when CondV is int type + bool FPinGPR = Subtarget.hasStdExtZfinx(); + + // We can handle FGPR without spliting into hi/lo parts + bool FitsInGPR = TypeSize::isKnownLE(VT.getSizeInBits(), + Subtarget.getXLenVT().getSizeInBits()); + + bool UseZicondForFPSel = Subtarget.hasStdExtZicond() && FPinGPR && + VT.isFloatingPoint() && FitsInGPR; + + if (UseZicondForFPSel) { + + auto CastToInt = [&](SDValue V) -> SDValue { + // Treat +0.0 as int 0 to enable single 'czero' instruction generation. + if (isNullFPConstant(V)) + return DAG.getConstant(0, DL, XLenVT); + + if (VT == MVT::f16) + return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, V); + + if (VT == MVT::f32 && Subtarget.is64Bit()) + return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, V); + + return DAG.getBitcast(XLenVT, V); + }; + + SDValue TrueVInt = CastToInt(TrueV); + SDValue FalseVInt = CastToInt(FalseV); + + // Emit integer SELECT (lowers to Zicond) + SDValue ResultInt = + DAG.getNode(ISD::SELECT, DL, XLenVT, CondV, TrueVInt, FalseVInt); + + // Convert back to floating VT + if (VT == MVT::f32 && Subtarget.is64Bit()) + return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, VT, ResultInt); + + if (VT == MVT::f16) + return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, ResultInt); + + return DAG.getBitcast(VT, ResultInt); + } + // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ // nodes to implement the SELECT. Performing the lowering here allows for // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless @@ -10699,7 +10743,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, VecVT != MVT::v4i8 && VecVT != MVT::v2i32) return SDValue(); SDValue Extracted = DAG.getBitcast(XLenVT, Vec); - unsigned ElemWidth = EltVT.getSizeInBits(); + unsigned ElemWidth = VecVT.getVectorElementType().getSizeInBits(); SDValue Shamt = DAG.getNode(ISD::MUL, DL, XLenVT, Idx, DAG.getConstant(ElemWidth, DL, XLenVT)); return DAG.getNode(ISD::SRL, DL, XLenVT, Extracted, Shamt); diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index 709f49b0fecc1..87ebee6a14eac 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -2399,6 +2399,77 @@ static bool generateBlockingPipesInst(const SPIRV::IncomingCall *Call, return buildOpFromWrapper(MIRBuilder, Opcode, Call, Register(0)); } +static bool buildAPFixedPointInst(const SPIRV::IncomingCall *Call, + unsigned Opcode, MachineIRBuilder &MIRBuilder, + SPIRVGlobalRegistry *GR) { + MachineRegisterInfo *MRI = MIRBuilder.getMRI(); + SmallVector ImmArgs; + Register InputReg = Call->Arguments[0]; + const Type *RetTy = GR->getTypeForSPIRVType(Call->ReturnType); + bool IsSRet = RetTy->isVoidTy(); + + if (IsSRet) { + const LLT ValTy = MRI->getType(InputReg); + Register ActualRetValReg = MRI->createGenericVirtualRegister(ValTy); + SPIRVType *InstructionType = + GR->getPointeeType(GR->getSPIRVTypeForVReg(InputReg)); + InputReg = Call->Arguments[1]; + auto InputType = GR->getTypeForSPIRVType(GR->getSPIRVTypeForVReg(InputReg)); + Register PtrInputReg; + if (InputType->getTypeID() == llvm::Type::TypeID::TypedPointerTyID) { + LLT InputLLT = MRI->getType(InputReg); + PtrInputReg = MRI->createGenericVirtualRegister(InputLLT); + SPIRVType *PtrType = + GR->getPointeeType(GR->getSPIRVTypeForVReg(InputReg)); + MachineMemOperand *MMO1 = MIRBuilder.getMF().getMachineMemOperand( + MachinePointerInfo(), MachineMemOperand::MOLoad, + InputLLT.getSizeInBytes(), Align(4)); + MIRBuilder.buildLoad(PtrInputReg, InputReg, *MMO1); + MRI->setRegClass(PtrInputReg, &SPIRV::iIDRegClass); + GR->assignSPIRVTypeToVReg(PtrType, PtrInputReg, MIRBuilder.getMF()); + } + + for (unsigned index = 2; index < 7; index++) { + ImmArgs.push_back(getConstFromIntrinsic(Call->Arguments[index], MRI)); + } + + // Emit the instruction + auto MIB = MIRBuilder.buildInstr(Opcode) + .addDef(ActualRetValReg) + .addUse(GR->getSPIRVTypeID(InstructionType)); + if (PtrInputReg) + MIB.addUse(PtrInputReg); + else + MIB.addUse(InputReg); + + for (uint32_t Imm : ImmArgs) + MIB.addImm(Imm); + unsigned Size = ValTy.getSizeInBytes(); + // Store result to the pointer passed in Arg[0] + MachineMemOperand *MMO = MIRBuilder.getMF().getMachineMemOperand( + MachinePointerInfo(), MachineMemOperand::MOStore, Size, Align(4)); + MRI->setRegClass(ActualRetValReg, &SPIRV::pIDRegClass); + MIRBuilder.buildStore(ActualRetValReg, Call->Arguments[0], *MMO); + return true; + } else { + for (unsigned index = 1; index < 6; index++) + ImmArgs.push_back(getConstFromIntrinsic(Call->Arguments[index], MRI)); + + return buildOpFromWrapper(MIRBuilder, Opcode, Call, + GR->getSPIRVTypeID(Call->ReturnType), ImmArgs); + } +} + +static bool generateAPFixedPointInst(const SPIRV::IncomingCall *Call, + MachineIRBuilder &MIRBuilder, + SPIRVGlobalRegistry *GR) { + const SPIRV::DemangledBuiltin *Builtin = Call->Builtin; + unsigned Opcode = + SPIRV::lookupNativeBuiltin(Builtin->Name, Builtin->Set)->Opcode; + + return buildAPFixedPointInst(Call, Opcode, MIRBuilder, GR); +} + static bool generateTernaryBitwiseFunctionINTELInst(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, @@ -3061,6 +3132,8 @@ std::optional lowerBuiltin(const StringRef DemangledCall, return generatePredicatedLoadStoreInst(Call.get(), MIRBuilder, GR); case SPIRV::BlockingPipes: return generateBlockingPipesInst(Call.get(), MIRBuilder, GR); + case SPIRV::ArbitraryPrecisionFixedPoint: + return generateAPFixedPointInst(Call.get(), MIRBuilder, GR); } return false; } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index 492a98e1995fe..98440856387c9 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -71,6 +71,7 @@ def TernaryBitwiseINTEL : BuiltinGroup; def Block2DLoadStore : BuiltinGroup; def Pipe : BuiltinGroup; def PredicatedLoadStore : BuiltinGroup; +def ArbitraryPrecisionFixedPoint : BuiltinGroup; def BlockingPipes : BuiltinGroup; //===----------------------------------------------------------------------===// @@ -1181,6 +1182,19 @@ defm : DemangledNativeBuiltin<"__spirv_WritePipeBlockingINTEL", OpenCL_std, Bloc defm : DemangledNativeBuiltin<"__spirv_ReadPipeBlockingINTEL", OpenCL_std, BlockingPipes, 0, 0, OpReadPipeBlockingALTERA>; defm : DemangledNativeBuiltin<"__spirv_ReadClockKHR", OpenCL_std, KernelClock, 1, 1, OpReadClockKHR>; +//SPV_ALTERA_arbitrary_precision_fixed_point +defm : DemangledNativeBuiltin<"__spirv_FixedSqrtINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedSqrtALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedRecipINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedRecipALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedRsqrtINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedRsqrtALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedSinINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedSinALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedCosINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedCosALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedSinCosINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedSinCosALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedSinPiINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedSinPiALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedCosPiINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedCosPiALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedSinCosPiINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedSinCosPiALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedLogINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedLogALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedExpINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedExpALTERA>; + //===----------------------------------------------------------------------===// // Class defining an atomic instruction on floating-point numbers. // diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index d394b3ac243a9..146384f4bf08c 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -53,8 +53,8 @@ static const std::map> SPIRV::Extension::Extension::SPV_GOOGLE_hlsl_functionality1}, {"SPV_GOOGLE_user_type", SPIRV::Extension::Extension::SPV_GOOGLE_user_type}, - {"SPV_INTEL_arbitrary_precision_integers", - SPIRV::Extension::Extension::SPV_INTEL_arbitrary_precision_integers}, + {"SPV_ALTERA_arbitrary_precision_integers", + SPIRV::Extension::Extension::SPV_ALTERA_arbitrary_precision_integers}, {"SPV_INTEL_cache_controls", SPIRV::Extension::Extension::SPV_INTEL_cache_controls}, {"SPV_INTEL_float_controls2", @@ -163,7 +163,11 @@ static const std::map> {"SPV_INTEL_kernel_attributes", SPIRV::Extension::Extension::SPV_INTEL_kernel_attributes}, {"SPV_ALTERA_blocking_pipes", - SPIRV::Extension::Extension::SPV_ALTERA_blocking_pipes}}; + SPIRV::Extension::Extension::SPV_ALTERA_blocking_pipes}, + {"SPV_INTEL_int4", SPIRV::Extension::Extension::SPV_INTEL_int4}, + {"SPV_ALTERA_arbitrary_precision_fixed_point", + SPIRV::Extension::Extension:: + SPV_ALTERA_arbitrary_precision_fixed_point}}; bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName, StringRef ArgValue, diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 8b1a09caf907d..ae81d38579c18 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -155,7 +155,7 @@ unsigned SPIRVGlobalRegistry::adjustOpTypeIntWidth(unsigned Width) const { report_fatal_error("Unsupported integer width!"); const SPIRVSubtarget &ST = cast(CurMF->getSubtarget()); if (ST.canUseExtension( - SPIRV::Extension::SPV_INTEL_arbitrary_precision_integers) || + SPIRV::Extension::SPV_ALTERA_arbitrary_precision_integers) || ST.canUseExtension(SPIRV::Extension::SPV_INTEL_int4)) return Width; if (Width <= 8) @@ -183,11 +183,11 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeInt(unsigned Width, .addImm(SPIRV::Capability::Int4TypeINTEL); } else if ((!isPowerOf2_32(Width) || Width < 8) && ST.canUseExtension( - SPIRV::Extension::SPV_INTEL_arbitrary_precision_integers)) { + SPIRV::Extension::SPV_ALTERA_arbitrary_precision_integers)) { MIRBuilder.buildInstr(SPIRV::OpExtension) - .addImm(SPIRV::Extension::SPV_INTEL_arbitrary_precision_integers); + .addImm(SPIRV::Extension::SPV_ALTERA_arbitrary_precision_integers); MIRBuilder.buildInstr(SPIRV::OpCapability) - .addImm(SPIRV::Capability::ArbitraryPrecisionIntegersINTEL); + .addImm(SPIRV::Capability::ArbitraryPrecisionIntegersALTERA); } return MIRBuilder.buildInstr(SPIRV::OpTypeInt) .addDef(createTypeVReg(MIRBuilder)) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index 03bd61bdf2cf6..815d2d7ed854b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -999,3 +999,27 @@ def OpReadPipeBlockingALTERA :Op<5946, (outs), (ins ID:$pipe, ID:$pointer, ID:$p "OpReadPipeBlockingALTERA $pipe $pointer $packetSize $packetAlignment">; def OpWritePipeBlockingALTERA :Op<5946, (outs), (ins ID:$pipe, ID:$pointer, ID:$packetSize, ID:$packetAlignment), "OpWritePipeBlockingALTERA $pipe $pointer $packetSize $packetAlignment">; + +//SPV_ALTERA_arbitrary_precision_fixed_point +def OpFixedSqrtALTERA: Op<5923, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedSqrtALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedRecipALTERA: Op<5924, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedRecipALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedRsqrtALTERA: Op<5925, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedRsqrtALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedSinALTERA: Op<5926, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedSinALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedCosALTERA: Op<5927, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedCosALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedSinCosALTERA: Op<5928, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedSinCosALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedSinPiALTERA: Op<5929, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedSinPiALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedCosPiALTERA: Op<5930, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedCosPiALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedSinCosPiALTERA: Op<5931, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedSinCosPiALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedLogALTERA: Op<5932, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedLogALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedExpALTERA: Op<5933, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedExpALTERA $result_type $input $sign $l $rl $q $o">; diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp index 53074ea3b2597..189c03a0ca3f9 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp @@ -128,7 +128,7 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) { bool IsExtendedInts = ST.canUseExtension( - SPIRV::Extension::SPV_INTEL_arbitrary_precision_integers) || + SPIRV::Extension::SPV_ALTERA_arbitrary_precision_integers) || ST.canUseExtension(SPIRV::Extension::SPV_KHR_bit_instructions) || ST.canUseExtension(SPIRV::Extension::SPV_INTEL_int4); auto extendedScalarsAndVectors = diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index 00f750b88a608..2feb73d8dedfa 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -1692,6 +1692,27 @@ void addInstrRequirements(const MachineInstr &MI, Reqs.addCapability(SPIRV::Capability::GroupNonUniformRotateKHR); Reqs.addCapability(SPIRV::Capability::GroupNonUniform); break; + case SPIRV::OpFixedCosALTERA: + case SPIRV::OpFixedSinALTERA: + case SPIRV::OpFixedCosPiALTERA: + case SPIRV::OpFixedSinPiALTERA: + case SPIRV::OpFixedExpALTERA: + case SPIRV::OpFixedLogALTERA: + case SPIRV::OpFixedRecipALTERA: + case SPIRV::OpFixedSqrtALTERA: + case SPIRV::OpFixedSinCosALTERA: + case SPIRV::OpFixedSinCosPiALTERA: + case SPIRV::OpFixedRsqrtALTERA: + if (!ST.canUseExtension( + SPIRV::Extension::SPV_ALTERA_arbitrary_precision_fixed_point)) + report_fatal_error("This instruction requires the " + "following SPIR-V extension: " + "SPV_ALTERA_arbitrary_precision_fixed_point", + false); + Reqs.addExtension( + SPIRV::Extension::SPV_ALTERA_arbitrary_precision_fixed_point); + Reqs.addCapability(SPIRV::Capability::ArbitraryPrecisionFixedPointALTERA); + break; case SPIRV::OpGroupIMulKHR: case SPIRV::OpGroupFMulKHR: case SPIRV::OpGroupBitwiseAndKHR: diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index 0f4b3d59b904a..7ca463460ffad 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -509,7 +509,7 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, bool IsExtendedInts = ST->canUseExtension( - SPIRV::Extension::SPV_INTEL_arbitrary_precision_integers) || + SPIRV::Extension::SPV_ALTERA_arbitrary_precision_integers) || ST->canUseExtension(SPIRV::Extension::SPV_KHR_bit_instructions) || ST->canUseExtension(SPIRV::Extension::SPV_INTEL_int4); diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index f02a587013856..94e0138c66487 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -318,7 +318,7 @@ defm SPV_INTEL_io_pipes : ExtensionOperand<63, [EnvOpenCL]>; defm SPV_KHR_ray_tracing : ExtensionOperand<64, [EnvVulkan]>; defm SPV_KHR_ray_query : ExtensionOperand<65, [EnvVulkan]>; defm SPV_INTEL_fpga_memory_accesses : ExtensionOperand<66, [EnvOpenCL]>; -defm SPV_INTEL_arbitrary_precision_integers : ExtensionOperand<67, [EnvOpenCL]>; +defm SPV_ALTERA_arbitrary_precision_integers : ExtensionOperand<67, [EnvOpenCL]>; defm SPV_EXT_shader_atomic_float_add : ExtensionOperand<68, [EnvVulkan, EnvOpenCL]>; defm SPV_KHR_terminate_invocation : ExtensionOperand<69, [EnvVulkan]>; @@ -390,6 +390,7 @@ defm SPV_KHR_maximal_reconvergence : ExtensionOperand<128, [EnvVulkan]>; defm SPV_INTEL_bfloat16_arithmetic : ExtensionOperand<129, [EnvVulkan, EnvOpenCL]>; defm SPV_INTEL_16bit_atomics : ExtensionOperand<130, [EnvVulkan, EnvOpenCL]>; +defm SPV_ALTERA_arbitrary_precision_fixed_point : ExtensionOperand<131, [EnvOpenCL, EnvVulkan]>; //===----------------------------------------------------------------------===// // Multiclass used to define Capabilities enum values and at the same time @@ -549,7 +550,7 @@ defm ComputeDerivativeGroupLinearNV : CapabilityOperand<5350, 0, 0, [], []>; defm FragmentDensityEXT : CapabilityOperand<5291, 0, 0, [], [Shader]>; defm PhysicalStorageBufferAddressesEXT : CapabilityOperand<5347, 0, 0, [], [Shader]>; defm CooperativeMatrixNV : CapabilityOperand<5357, 0, 0, [], [Shader]>; -defm ArbitraryPrecisionIntegersINTEL : CapabilityOperand<5844, 0, 0, [SPV_INTEL_arbitrary_precision_integers], [Int8, Int16]>; +defm ArbitraryPrecisionIntegersALTERA : CapabilityOperand<5844, 0, 0, [SPV_ALTERA_arbitrary_precision_integers], [Int8, Int16]>; defm OptNoneINTEL : CapabilityOperand<6094, 0, 0, [SPV_INTEL_optnone], []>; defm OptNoneEXT : CapabilityOperand<6094, 0, 0, [SPV_EXT_optnone], []>; defm BitInstructions : CapabilityOperand<6025, 0, 0, [SPV_KHR_bit_instructions], []>; @@ -615,6 +616,7 @@ defm BFloat16TypeKHR : CapabilityOperand<5116, 0, 0, [SPV_KHR_bfloat16], []>; defm BFloat16DotProductKHR : CapabilityOperand<5117, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR]>; defm BFloat16CooperativeMatrixKHR : CapabilityOperand<5118, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR, CooperativeMatrixKHR]>; defm BlockingPipesALTERA : CapabilityOperand<5945, 0, 0, [SPV_ALTERA_blocking_pipes], []>; +defm ArbitraryPrecisionFixedPointALTERA : CapabilityOperand<5922, 0, 0, [SPV_ALTERA_arbitrary_precision_fixed_point], []>; //===----------------------------------------------------------------------===// // Multiclass used to define SourceLanguage enum values and at the same time diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index c7a0fd7407a4e..d36975699c4a8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -197,7 +197,8 @@ bool vputils::isSingleScalar(const VPValue *VPV) { all_of(VPI->operands(), isSingleScalar)); if (auto *RR = dyn_cast(VPV)) return !RR->isPartialReduction(); - if (isa(VPV)) + if (isa(VPV)) return true; if (auto *Expr = dyn_cast(VPV)) return Expr->isSingleScalar(); diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll index d4ea9e6c3def0..f803f6aa09652 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll @@ -484,6 +484,25 @@ define void @test_extract_vector_16(ptr %ret_ptr, ptr %a_ptr) { ret void } +define void @test_extract_vector_16_elem1(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-RV32-LABEL: test_extract_vector_16_elem1: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: lhu a1, 2(a1) +; CHECK-RV32-NEXT: sh a1, 0(a0) +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_extract_vector_16_elem1: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: lw a1, 0(a1) +; CHECK-RV64-NEXT: srli a1, a1, 16 +; CHECK-RV64-NEXT: sh a1, 0(a0) +; CHECK-RV64-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %extracted = extractelement <2 x i16> %a, i32 1 + store i16 %extracted, ptr %ret_ptr + ret void +} + define void @test_extract_vector_8(ptr %ret_ptr, ptr %a_ptr) { ; CHECK-LABEL: test_extract_vector_8: ; CHECK: # %bb.0: @@ -496,6 +515,19 @@ define void @test_extract_vector_8(ptr %ret_ptr, ptr %a_ptr) { ret void } +define void @test_extract_vector_8_elem1(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_extract_vector_8_elem1: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: srli a1, a1, 8 +; CHECK-NEXT: sb a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %extracted = extractelement <4 x i8> %a, i32 1 + store i8 %extracted, ptr %ret_ptr + ret void +} + ; Test for splat define void @test_non_const_splat_i8(ptr %ret_ptr, ptr %a_ptr, i8 %elt) { ; CHECK-LABEL: test_non_const_splat_i8: diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll index b39b807d43154..9b021df8dd452 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll @@ -495,6 +495,18 @@ define void @test_extract_vector_32(ptr %ret_ptr, ptr %a_ptr) { ret void } +define void @test_extract_vector_32_elem1(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_extract_vector_32_elem1: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 4(a1) +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %extracted = extractelement <2 x i32> %a, i32 1 + store i32 %extracted, ptr %ret_ptr + ret void +} + ; Test basic add/sub operations for v2i32 (RV64 only) define void @test_padd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { ; CHECK-LABEL: test_padd_w: diff --git a/llvm/test/CodeGen/RISCV/zicond-fp-select-zfinx.ll b/llvm/test/CodeGen/RISCV/zicond-fp-select-zfinx.ll new file mode 100644 index 0000000000000..b505c84166eb1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/zicond-fp-select-zfinx.ll @@ -0,0 +1,798 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; Zicond with zfinx(implies by zdinx) +; RUN: llc -mtriple=riscv64 -mattr=+zdinx,+zicond -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64ZDINX_ZICOND +; RUN: llc -mtriple=riscv64 -mattr=+zdinx -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64ZDINX_NOZICOND + +; Zicond with zfinx(implies by zhinx) +; RUN: llc -mtriple=riscv64 -mattr=+zhinx,+zicond -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64ZHINX_ZICOND + +; Baseline with classic FP registers (no *inx); zicond select should NOT trigger +; RUN: llc -mtriple=riscv64 -mattr=+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64FD + +; Check same optimize work on 32bit machine +; RUN: llc -mtriple=riscv32 -mattr=+zfinx,+zicond -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32ZFINX_ZICOND +; RUN: llc -mtriple=riscv32 -mattr=+zfinx -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32ZFINX_NOZICOND +; RUN: llc -mtriple=riscv32 -mattr=+zdinx,+zicond -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32ZDINX_ZICOND +; RUN: llc -mtriple=riscv32 -mattr=+zdinx -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32ZDINX_NOZICOND + +; This test checks that floating-point SELECT is lowered through integer +; SELECT (and thus to Zicond czero.* sequence) when FP values live in GPRs +; (Zfinx/Zdinx) and Zicond is enabled. When Zicond is disabled, we expect +; a branch-based lowering instead. + +; ----------------------------------------------------------------------------- +; float select with i1 condition (Zfinx) +; ----------------------------------------------------------------------------- + +define float @select_f32_i1(i1 %cond, float %t, float %f) nounwind { +; RV64ZDINX_ZICOND-LABEL: select_f32_i1: +; RV64ZDINX_ZICOND: # %bb.0: # %entry +; RV64ZDINX_ZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZDINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV64ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZDINX_ZICOND-NEXT: or a0, a0, a2 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_ZICOND-NEXT: ret +; +; RV64ZDINX_NOZICOND-LABEL: select_f32_i1: +; RV64ZDINX_NOZICOND: # %bb.0: # %entry +; RV64ZDINX_NOZICOND-NEXT: andi a3, a0, 1 +; RV64ZDINX_NOZICOND-NEXT: mv a0, a1 +; RV64ZDINX_NOZICOND-NEXT: bnez a3, .LBB0_2 +; RV64ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV64ZDINX_NOZICOND-NEXT: mv a0, a2 +; RV64ZDINX_NOZICOND-NEXT: .LBB0_2: # %entry +; RV64ZDINX_NOZICOND-NEXT: ret +; +; RV64ZHINX_ZICOND-LABEL: select_f32_i1: +; RV64ZHINX_ZICOND: # %bb.0: # %entry +; RV64ZHINX_ZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV64ZHINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZHINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZHINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV64ZHINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZHINX_ZICOND-NEXT: or a0, a0, a2 +; RV64ZHINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZHINX_ZICOND-NEXT: ret +; +; RV64FD-LABEL: select_f32_i1: +; RV64FD: # %bb.0: # %entry +; RV64FD-NEXT: andi a0, a0, 1 +; RV64FD-NEXT: bnez a0, .LBB0_2 +; RV64FD-NEXT: # %bb.1: # %entry +; RV64FD-NEXT: fmv.s fa0, fa1 +; RV64FD-NEXT: .LBB0_2: # %entry +; RV64FD-NEXT: ret +; +; RV32ZFINX_ZICOND-LABEL: select_f32_i1: +; RV32ZFINX_ZICOND: # %bb.0: # %entry +; RV32ZFINX_ZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZFINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZFINX_ZICOND-NEXT: or a0, a0, a2 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_ZICOND-NEXT: ret +; +; RV32ZFINX_NOZICOND-LABEL: select_f32_i1: +; RV32ZFINX_NOZICOND: # %bb.0: # %entry +; RV32ZFINX_NOZICOND-NEXT: andi a3, a0, 1 +; RV32ZFINX_NOZICOND-NEXT: mv a0, a1 +; RV32ZFINX_NOZICOND-NEXT: bnez a3, .LBB0_2 +; RV32ZFINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZFINX_NOZICOND-NEXT: mv a0, a2 +; RV32ZFINX_NOZICOND-NEXT: .LBB0_2: # %entry +; RV32ZFINX_NOZICOND-NEXT: ret +; +; RV32ZDINX_ZICOND-LABEL: select_f32_i1: +; RV32ZDINX_ZICOND: # %bb.0: # %entry +; RV32ZDINX_ZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV32ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZDINX_ZICOND-NEXT: or a0, a0, a2 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_ZICOND-NEXT: ret +; +; RV32ZDINX_NOZICOND-LABEL: select_f32_i1: +; RV32ZDINX_NOZICOND: # %bb.0: # %entry +; RV32ZDINX_NOZICOND-NEXT: andi a3, a0, 1 +; RV32ZDINX_NOZICOND-NEXT: mv a0, a1 +; RV32ZDINX_NOZICOND-NEXT: bnez a3, .LBB0_2 +; RV32ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZDINX_NOZICOND-NEXT: mv a0, a2 +; RV32ZDINX_NOZICOND-NEXT: .LBB0_2: # %entry +; RV32ZDINX_NOZICOND-NEXT: ret +entry: + %sel = select i1 %cond, float %t, float %f + ret float %sel +} + +; ----------------------------------------------------------------------------- +; double select with i1 condition (Zdinx) +; ----------------------------------------------------------------------------- + +define double @select_f64_i1(i1 %cond, double %t, double %f) nounwind { +; RV64ZDINX_ZICOND-LABEL: select_f64_i1: +; RV64ZDINX_ZICOND: # %bb.0: # %entry +; RV64ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZDINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV64ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZDINX_ZICOND-NEXT: or a0, a0, a2 +; RV64ZDINX_ZICOND-NEXT: ret +; +; RV64ZDINX_NOZICOND-LABEL: select_f64_i1: +; RV64ZDINX_NOZICOND: # %bb.0: # %entry +; RV64ZDINX_NOZICOND-NEXT: andi a3, a0, 1 +; RV64ZDINX_NOZICOND-NEXT: mv a0, a1 +; RV64ZDINX_NOZICOND-NEXT: bnez a3, .LBB1_2 +; RV64ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV64ZDINX_NOZICOND-NEXT: mv a0, a2 +; RV64ZDINX_NOZICOND-NEXT: .LBB1_2: # %entry +; RV64ZDINX_NOZICOND-NEXT: ret +; +; RV64ZHINX_ZICOND-LABEL: select_f64_i1: +; RV64ZHINX_ZICOND: # %bb.0: # %entry +; RV64ZHINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZHINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV64ZHINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZHINX_ZICOND-NEXT: or a0, a0, a2 +; RV64ZHINX_ZICOND-NEXT: ret +; +; RV64FD-LABEL: select_f64_i1: +; RV64FD: # %bb.0: # %entry +; RV64FD-NEXT: andi a0, a0, 1 +; RV64FD-NEXT: bnez a0, .LBB1_2 +; RV64FD-NEXT: # %bb.1: # %entry +; RV64FD-NEXT: fmv.d fa0, fa1 +; RV64FD-NEXT: .LBB1_2: # %entry +; RV64FD-NEXT: ret +; +; RV32ZFINX_ZICOND-LABEL: select_f64_i1: +; RV32ZFINX_ZICOND: # %bb.0: # %entry +; RV32ZFINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZFINX_ZICOND-NEXT: czero.nez a3, a3, a0 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a1, a1, a0 +; RV32ZFINX_ZICOND-NEXT: czero.nez a4, a4, a0 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a2, a2, a0 +; RV32ZFINX_ZICOND-NEXT: or a0, a1, a3 +; RV32ZFINX_ZICOND-NEXT: or a1, a2, a4 +; RV32ZFINX_ZICOND-NEXT: ret +; +; RV32ZFINX_NOZICOND-LABEL: select_f64_i1: +; RV32ZFINX_NOZICOND: # %bb.0: # %entry +; RV32ZFINX_NOZICOND-NEXT: andi a5, a0, 1 +; RV32ZFINX_NOZICOND-NEXT: mv a0, a1 +; RV32ZFINX_NOZICOND-NEXT: bnez a5, .LBB1_2 +; RV32ZFINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZFINX_NOZICOND-NEXT: mv a0, a3 +; RV32ZFINX_NOZICOND-NEXT: mv a2, a4 +; RV32ZFINX_NOZICOND-NEXT: .LBB1_2: # %entry +; RV32ZFINX_NOZICOND-NEXT: mv a1, a2 +; RV32ZFINX_NOZICOND-NEXT: ret +; +; RV32ZDINX_ZICOND-LABEL: select_f64_i1: +; RV32ZDINX_ZICOND: # %bb.0: # %entry +; RV32ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_ZICOND-NEXT: bnez a0, .LBB1_2 +; RV32ZDINX_ZICOND-NEXT: # %bb.1: # %entry +; RV32ZDINX_ZICOND-NEXT: mv a7, a4 +; RV32ZDINX_ZICOND-NEXT: mv a6, a3 +; RV32ZDINX_ZICOND-NEXT: fmv.d a4, a6 +; RV32ZDINX_ZICOND-NEXT: j .LBB1_3 +; RV32ZDINX_ZICOND-NEXT: .LBB1_2: +; RV32ZDINX_ZICOND-NEXT: mv a5, a2 +; RV32ZDINX_ZICOND-NEXT: mv a4, a1 +; RV32ZDINX_ZICOND-NEXT: .LBB1_3: # %entry +; RV32ZDINX_ZICOND-NEXT: mv a0, a4 +; RV32ZDINX_ZICOND-NEXT: mv a1, a5 +; RV32ZDINX_ZICOND-NEXT: ret +; +; RV32ZDINX_NOZICOND-LABEL: select_f64_i1: +; RV32ZDINX_NOZICOND: # %bb.0: # %entry +; RV32ZDINX_NOZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_NOZICOND-NEXT: bnez a0, .LBB1_2 +; RV32ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZDINX_NOZICOND-NEXT: mv a7, a4 +; RV32ZDINX_NOZICOND-NEXT: mv a6, a3 +; RV32ZDINX_NOZICOND-NEXT: fmv.d a4, a6 +; RV32ZDINX_NOZICOND-NEXT: j .LBB1_3 +; RV32ZDINX_NOZICOND-NEXT: .LBB1_2: +; RV32ZDINX_NOZICOND-NEXT: mv a5, a2 +; RV32ZDINX_NOZICOND-NEXT: mv a4, a1 +; RV32ZDINX_NOZICOND-NEXT: .LBB1_3: # %entry +; RV32ZDINX_NOZICOND-NEXT: mv a0, a4 +; RV32ZDINX_NOZICOND-NEXT: mv a1, a5 +; RV32ZDINX_NOZICOND-NEXT: ret +entry: + %sel = select i1 %cond, double %t, double %f + ret double %sel +} + +; ----------------------------------------------------------------------------- +; double select with floating-point compare condition (a > b ? c : d), Zdinx +; ----------------------------------------------------------------------------- + +define double @select_f64_fcmp(double %a, double %b, double %c, double %d) nounwind { +; RV64ZDINX_ZICOND-LABEL: select_f64_fcmp: +; RV64ZDINX_ZICOND: # %bb.0: # %entry +; RV64ZDINX_ZICOND-NEXT: flt.d a0, a1, a0 +; RV64ZDINX_ZICOND-NEXT: czero.nez a1, a3, a0 +; RV64ZDINX_ZICOND-NEXT: czero.eqz a0, a2, a0 +; RV64ZDINX_ZICOND-NEXT: or a0, a0, a1 +; RV64ZDINX_ZICOND-NEXT: ret +; +; RV64ZDINX_NOZICOND-LABEL: select_f64_fcmp: +; RV64ZDINX_NOZICOND: # %bb.0: # %entry +; RV64ZDINX_NOZICOND-NEXT: flt.d a1, a1, a0 +; RV64ZDINX_NOZICOND-NEXT: mv a0, a2 +; RV64ZDINX_NOZICOND-NEXT: bnez a1, .LBB2_2 +; RV64ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV64ZDINX_NOZICOND-NEXT: mv a0, a3 +; RV64ZDINX_NOZICOND-NEXT: .LBB2_2: # %entry +; RV64ZDINX_NOZICOND-NEXT: ret +; +; RV64ZHINX_ZICOND-LABEL: select_f64_fcmp: +; RV64ZHINX_ZICOND: # %bb.0: # %entry +; RV64ZHINX_ZICOND-NEXT: addi sp, sp, -32 +; RV64ZHINX_ZICOND-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64ZHINX_ZICOND-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64ZHINX_ZICOND-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64ZHINX_ZICOND-NEXT: mv s0, a3 +; RV64ZHINX_ZICOND-NEXT: mv s1, a2 +; RV64ZHINX_ZICOND-NEXT: call __gtdf2 +; RV64ZHINX_ZICOND-NEXT: sgtz a0, a0 +; RV64ZHINX_ZICOND-NEXT: czero.nez a1, s0, a0 +; RV64ZHINX_ZICOND-NEXT: czero.eqz a0, s1, a0 +; RV64ZHINX_ZICOND-NEXT: or a0, a0, a1 +; RV64ZHINX_ZICOND-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64ZHINX_ZICOND-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64ZHINX_ZICOND-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64ZHINX_ZICOND-NEXT: addi sp, sp, 32 +; RV64ZHINX_ZICOND-NEXT: ret +; +; RV64FD-LABEL: select_f64_fcmp: +; RV64FD: # %bb.0: # %entry +; RV64FD-NEXT: flt.d a0, fa1, fa0 +; RV64FD-NEXT: fmv.d fa0, fa2 +; RV64FD-NEXT: bnez a0, .LBB2_2 +; RV64FD-NEXT: # %bb.1: # %entry +; RV64FD-NEXT: fmv.d fa0, fa3 +; RV64FD-NEXT: .LBB2_2: # %entry +; RV64FD-NEXT: ret +; +; RV32ZFINX_ZICOND-LABEL: select_f64_fcmp: +; RV32ZFINX_ZICOND: # %bb.0: # %entry +; RV32ZFINX_ZICOND-NEXT: addi sp, sp, -32 +; RV32ZFINX_ZICOND-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32ZFINX_ZICOND-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32ZFINX_ZICOND-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32ZFINX_ZICOND-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32ZFINX_ZICOND-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32ZFINX_ZICOND-NEXT: mv s0, a7 +; RV32ZFINX_ZICOND-NEXT: mv s1, a6 +; RV32ZFINX_ZICOND-NEXT: mv s2, a5 +; RV32ZFINX_ZICOND-NEXT: mv s3, a4 +; RV32ZFINX_ZICOND-NEXT: call __gtdf2 +; RV32ZFINX_ZICOND-NEXT: sgtz a0, a0 +; RV32ZFINX_ZICOND-NEXT: czero.nez a1, s1, a0 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a2, s3, a0 +; RV32ZFINX_ZICOND-NEXT: czero.nez a3, s0, a0 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a4, s2, a0 +; RV32ZFINX_ZICOND-NEXT: or a0, a2, a1 +; RV32ZFINX_ZICOND-NEXT: or a1, a4, a3 +; RV32ZFINX_ZICOND-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32ZFINX_ZICOND-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32ZFINX_ZICOND-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32ZFINX_ZICOND-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32ZFINX_ZICOND-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32ZFINX_ZICOND-NEXT: addi sp, sp, 32 +; RV32ZFINX_ZICOND-NEXT: ret +; +; RV32ZFINX_NOZICOND-LABEL: select_f64_fcmp: +; RV32ZFINX_NOZICOND: # %bb.0: # %entry +; RV32ZFINX_NOZICOND-NEXT: addi sp, sp, -32 +; RV32ZFINX_NOZICOND-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32ZFINX_NOZICOND-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32ZFINX_NOZICOND-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32ZFINX_NOZICOND-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32ZFINX_NOZICOND-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32ZFINX_NOZICOND-NEXT: mv s1, a7 +; RV32ZFINX_NOZICOND-NEXT: mv s3, a6 +; RV32ZFINX_NOZICOND-NEXT: mv s0, a5 +; RV32ZFINX_NOZICOND-NEXT: mv s2, a4 +; RV32ZFINX_NOZICOND-NEXT: call __gtdf2 +; RV32ZFINX_NOZICOND-NEXT: bgtz a0, .LBB2_2 +; RV32ZFINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZFINX_NOZICOND-NEXT: mv s2, s3 +; RV32ZFINX_NOZICOND-NEXT: mv s0, s1 +; RV32ZFINX_NOZICOND-NEXT: .LBB2_2: # %entry +; RV32ZFINX_NOZICOND-NEXT: mv a0, s2 +; RV32ZFINX_NOZICOND-NEXT: mv a1, s0 +; RV32ZFINX_NOZICOND-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32ZFINX_NOZICOND-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32ZFINX_NOZICOND-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32ZFINX_NOZICOND-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32ZFINX_NOZICOND-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32ZFINX_NOZICOND-NEXT: addi sp, sp, 32 +; RV32ZFINX_NOZICOND-NEXT: ret +; +; RV32ZDINX_ZICOND-LABEL: select_f64_fcmp: +; RV32ZDINX_ZICOND: # %bb.0: # %entry +; RV32ZDINX_ZICOND-NEXT: flt.d a0, a2, a0 +; RV32ZDINX_ZICOND-NEXT: bnez a0, .LBB2_2 +; RV32ZDINX_ZICOND-NEXT: # %bb.1: # %entry +; RV32ZDINX_ZICOND-NEXT: fmv.d a4, a6 +; RV32ZDINX_ZICOND-NEXT: .LBB2_2: # %entry +; RV32ZDINX_ZICOND-NEXT: mv a0, a4 +; RV32ZDINX_ZICOND-NEXT: mv a1, a5 +; RV32ZDINX_ZICOND-NEXT: ret +; +; RV32ZDINX_NOZICOND-LABEL: select_f64_fcmp: +; RV32ZDINX_NOZICOND: # %bb.0: # %entry +; RV32ZDINX_NOZICOND-NEXT: flt.d a0, a2, a0 +; RV32ZDINX_NOZICOND-NEXT: bnez a0, .LBB2_2 +; RV32ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZDINX_NOZICOND-NEXT: fmv.d a4, a6 +; RV32ZDINX_NOZICOND-NEXT: .LBB2_2: # %entry +; RV32ZDINX_NOZICOND-NEXT: mv a0, a4 +; RV32ZDINX_NOZICOND-NEXT: mv a1, a5 +; RV32ZDINX_NOZICOND-NEXT: ret +entry: + %cmp = fcmp ogt double %a, %b + %sel = select i1 %cmp, double %c, double %d + ret double %sel +} + +; ----------------------------------------------------------------------------- +; half select with i1 condition (cond ? a : b), Zfinx +; ----------------------------------------------------------------------------- + +define dso_local noundef half @select_half_i1(i1 %cond, half %a, half %b) nounwind { +; RV64ZDINX_ZICOND-LABEL: select_half_i1: +; RV64ZDINX_ZICOND: # %bb.0: # %entry +; RV64ZDINX_ZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZDINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV64ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZDINX_ZICOND-NEXT: or a0, a0, a2 +; RV64ZDINX_ZICOND-NEXT: lui a1, 1048560 +; RV64ZDINX_ZICOND-NEXT: or a0, a0, a1 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_ZICOND-NEXT: ret +; +; RV64ZDINX_NOZICOND-LABEL: select_half_i1: +; RV64ZDINX_NOZICOND: # %bb.0: # %entry +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV64ZDINX_NOZICOND-NEXT: andi a0, a0, 1 +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_NOZICOND-NEXT: bnez a0, .LBB3_2 +; RV64ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV64ZDINX_NOZICOND-NEXT: mv a1, a2 +; RV64ZDINX_NOZICOND-NEXT: .LBB3_2: # %entry +; RV64ZDINX_NOZICOND-NEXT: lui a0, 1048560 +; RV64ZDINX_NOZICOND-NEXT: or a0, a1, a0 +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_NOZICOND-NEXT: ret +; +; RV64ZHINX_ZICOND-LABEL: select_half_i1: +; RV64ZHINX_ZICOND: # %bb.0: # %entry +; RV64ZHINX_ZICOND-NEXT: # kill: def $x12_h killed $x12_h def $x12 +; RV64ZHINX_ZICOND-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV64ZHINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZHINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV64ZHINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZHINX_ZICOND-NEXT: or a0, a0, a2 +; RV64ZHINX_ZICOND-NEXT: # kill: def $x10_h killed $x10_h killed $x10 +; RV64ZHINX_ZICOND-NEXT: ret +; +; RV64FD-LABEL: select_half_i1: +; RV64FD: # %bb.0: # %entry +; RV64FD-NEXT: andi a0, a0, 1 +; RV64FD-NEXT: bnez a0, .LBB3_2 +; RV64FD-NEXT: # %bb.1: # %entry +; RV64FD-NEXT: fmv.x.w a0, fa1 +; RV64FD-NEXT: j .LBB3_3 +; RV64FD-NEXT: .LBB3_2: +; RV64FD-NEXT: fmv.x.w a0, fa0 +; RV64FD-NEXT: .LBB3_3: # %entry +; RV64FD-NEXT: lui a1, 1048560 +; RV64FD-NEXT: or a0, a0, a1 +; RV64FD-NEXT: fmv.w.x fa0, a0 +; RV64FD-NEXT: ret +; +; RV32ZFINX_ZICOND-LABEL: select_half_i1: +; RV32ZFINX_ZICOND: # %bb.0: # %entry +; RV32ZFINX_ZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZFINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZFINX_ZICOND-NEXT: or a0, a0, a2 +; RV32ZFINX_ZICOND-NEXT: lui a1, 1048560 +; RV32ZFINX_ZICOND-NEXT: or a0, a0, a1 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_ZICOND-NEXT: ret +; +; RV32ZFINX_NOZICOND-LABEL: select_half_i1: +; RV32ZFINX_NOZICOND: # %bb.0: # %entry +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV32ZFINX_NOZICOND-NEXT: andi a0, a0, 1 +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_NOZICOND-NEXT: bnez a0, .LBB3_2 +; RV32ZFINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZFINX_NOZICOND-NEXT: mv a1, a2 +; RV32ZFINX_NOZICOND-NEXT: .LBB3_2: # %entry +; RV32ZFINX_NOZICOND-NEXT: lui a0, 1048560 +; RV32ZFINX_NOZICOND-NEXT: or a0, a1, a0 +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_NOZICOND-NEXT: ret +; +; RV32ZDINX_ZICOND-LABEL: select_half_i1: +; RV32ZDINX_ZICOND: # %bb.0: # %entry +; RV32ZDINX_ZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV32ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZDINX_ZICOND-NEXT: or a0, a0, a2 +; RV32ZDINX_ZICOND-NEXT: lui a1, 1048560 +; RV32ZDINX_ZICOND-NEXT: or a0, a0, a1 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_ZICOND-NEXT: ret +; +; RV32ZDINX_NOZICOND-LABEL: select_half_i1: +; RV32ZDINX_NOZICOND: # %bb.0: # %entry +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV32ZDINX_NOZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_NOZICOND-NEXT: bnez a0, .LBB3_2 +; RV32ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZDINX_NOZICOND-NEXT: mv a1, a2 +; RV32ZDINX_NOZICOND-NEXT: .LBB3_2: # %entry +; RV32ZDINX_NOZICOND-NEXT: lui a0, 1048560 +; RV32ZDINX_NOZICOND-NEXT: or a0, a1, a0 +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_NOZICOND-NEXT: ret +entry: + %sel = select i1 %cond, half %a, half %b + ret half %sel +} + +; ----------------------------------------------------------------------------- +; Test select with i1 condition and zero ret val (cond ? a : 0), Zfinx +; ----------------------------------------------------------------------------- +define dso_local noundef float @select_i1_f32_0(i1 %cond, float %t) nounwind { +; RV64ZDINX_ZICOND-LABEL: select_i1_f32_0: +; RV64ZDINX_ZICOND: # %bb.0: # %entry +; RV64ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_ZICOND-NEXT: ret +; +; RV64ZDINX_NOZICOND-LABEL: select_i1_f32_0: +; RV64ZDINX_NOZICOND: # %bb.0: # %entry +; RV64ZDINX_NOZICOND-NEXT: andi a2, a0, 1 +; RV64ZDINX_NOZICOND-NEXT: mv a0, a1 +; RV64ZDINX_NOZICOND-NEXT: bnez a2, .LBB4_2 +; RV64ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV64ZDINX_NOZICOND-NEXT: li a0, 0 +; RV64ZDINX_NOZICOND-NEXT: .LBB4_2: # %entry +; RV64ZDINX_NOZICOND-NEXT: ret +; +; RV64ZHINX_ZICOND-LABEL: select_i1_f32_0: +; RV64ZHINX_ZICOND: # %bb.0: # %entry +; RV64ZHINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZHINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZHINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZHINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZHINX_ZICOND-NEXT: ret +; +; RV64FD-LABEL: select_i1_f32_0: +; RV64FD: # %bb.0: # %entry +; RV64FD-NEXT: andi a0, a0, 1 +; RV64FD-NEXT: bnez a0, .LBB4_2 +; RV64FD-NEXT: # %bb.1: # %entry +; RV64FD-NEXT: fmv.w.x fa0, zero +; RV64FD-NEXT: .LBB4_2: # %entry +; RV64FD-NEXT: ret +; +; RV32ZFINX_ZICOND-LABEL: select_i1_f32_0: +; RV32ZFINX_ZICOND: # %bb.0: # %entry +; RV32ZFINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_ZICOND-NEXT: ret +; +; RV32ZFINX_NOZICOND-LABEL: select_i1_f32_0: +; RV32ZFINX_NOZICOND: # %bb.0: # %entry +; RV32ZFINX_NOZICOND-NEXT: andi a2, a0, 1 +; RV32ZFINX_NOZICOND-NEXT: mv a0, a1 +; RV32ZFINX_NOZICOND-NEXT: bnez a2, .LBB4_2 +; RV32ZFINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZFINX_NOZICOND-NEXT: li a0, 0 +; RV32ZFINX_NOZICOND-NEXT: .LBB4_2: # %entry +; RV32ZFINX_NOZICOND-NEXT: ret +; +; RV32ZDINX_ZICOND-LABEL: select_i1_f32_0: +; RV32ZDINX_ZICOND: # %bb.0: # %entry +; RV32ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_ZICOND-NEXT: ret +; +; RV32ZDINX_NOZICOND-LABEL: select_i1_f32_0: +; RV32ZDINX_NOZICOND: # %bb.0: # %entry +; RV32ZDINX_NOZICOND-NEXT: andi a2, a0, 1 +; RV32ZDINX_NOZICOND-NEXT: mv a0, a1 +; RV32ZDINX_NOZICOND-NEXT: bnez a2, .LBB4_2 +; RV32ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZDINX_NOZICOND-NEXT: li a0, 0 +; RV32ZDINX_NOZICOND-NEXT: .LBB4_2: # %entry +; RV32ZDINX_NOZICOND-NEXT: ret +entry: + %sel = select i1 %cond, float %t, float 0.000000e+00 + ret float %sel +} + +; ----------------------------------------------------------------------------- +; Test select with i1 condition and zero ret val for half fp (cond ? a : 0) +; ----------------------------------------------------------------------------- +define dso_local noundef half @select_i1_half_0(i1 %cond, half %val) nounwind { +; RV64ZDINX_ZICOND-LABEL: select_i1_half_0: +; RV64ZDINX_ZICOND: # %bb.0: # %entry +; RV64ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZDINX_ZICOND-NEXT: lui a1, 1048560 +; RV64ZDINX_ZICOND-NEXT: or a0, a0, a1 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_ZICOND-NEXT: ret +; +; RV64ZDINX_NOZICOND-LABEL: select_i1_half_0: +; RV64ZDINX_NOZICOND: # %bb.0: # %entry +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_NOZICOND-NEXT: slli a0, a0, 63 +; RV64ZDINX_NOZICOND-NEXT: srai a0, a0, 63 +; RV64ZDINX_NOZICOND-NEXT: and a0, a0, a1 +; RV64ZDINX_NOZICOND-NEXT: lui a1, 1048560 +; RV64ZDINX_NOZICOND-NEXT: or a0, a0, a1 +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_NOZICOND-NEXT: ret +; +; RV64ZHINX_ZICOND-LABEL: select_i1_half_0: +; RV64ZHINX_ZICOND: # %bb.0: # %entry +; RV64ZHINX_ZICOND-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV64ZHINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZHINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZHINX_ZICOND-NEXT: # kill: def $x10_h killed $x10_h killed $x10 +; RV64ZHINX_ZICOND-NEXT: ret +; +; RV64FD-LABEL: select_i1_half_0: +; RV64FD: # %bb.0: # %entry +; RV64FD-NEXT: fmv.x.w a1, fa0 +; RV64FD-NEXT: slli a0, a0, 63 +; RV64FD-NEXT: srai a0, a0, 63 +; RV64FD-NEXT: and a0, a0, a1 +; RV64FD-NEXT: lui a1, 1048560 +; RV64FD-NEXT: or a0, a0, a1 +; RV64FD-NEXT: fmv.w.x fa0, a0 +; RV64FD-NEXT: ret +; +; RV32ZFINX_ZICOND-LABEL: select_i1_half_0: +; RV32ZFINX_ZICOND: # %bb.0: # %entry +; RV32ZFINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZFINX_ZICOND-NEXT: lui a1, 1048560 +; RV32ZFINX_ZICOND-NEXT: or a0, a0, a1 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_ZICOND-NEXT: ret +; +; RV32ZFINX_NOZICOND-LABEL: select_i1_half_0: +; RV32ZFINX_NOZICOND: # %bb.0: # %entry +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_NOZICOND-NEXT: slli a0, a0, 31 +; RV32ZFINX_NOZICOND-NEXT: srai a0, a0, 31 +; RV32ZFINX_NOZICOND-NEXT: and a0, a0, a1 +; RV32ZFINX_NOZICOND-NEXT: lui a1, 1048560 +; RV32ZFINX_NOZICOND-NEXT: or a0, a0, a1 +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_NOZICOND-NEXT: ret +; +; RV32ZDINX_ZICOND-LABEL: select_i1_half_0: +; RV32ZDINX_ZICOND: # %bb.0: # %entry +; RV32ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZDINX_ZICOND-NEXT: lui a1, 1048560 +; RV32ZDINX_ZICOND-NEXT: or a0, a0, a1 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_ZICOND-NEXT: ret +; +; RV32ZDINX_NOZICOND-LABEL: select_i1_half_0: +; RV32ZDINX_NOZICOND: # %bb.0: # %entry +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_NOZICOND-NEXT: slli a0, a0, 31 +; RV32ZDINX_NOZICOND-NEXT: srai a0, a0, 31 +; RV32ZDINX_NOZICOND-NEXT: and a0, a0, a1 +; RV32ZDINX_NOZICOND-NEXT: lui a1, 1048560 +; RV32ZDINX_NOZICOND-NEXT: or a0, a0, a1 +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_NOZICOND-NEXT: ret +entry: + %sel = select i1 %cond, half %val, half 0xH0000 + ret half %sel +} + +; ----------------------------------------------------------------------------- +; Test select with i1 condition and zero value for half fp, feeding into fadd ((cond ? a : 0) + 1.0) +; ----------------------------------------------------------------------------- +define half @select_i1_half_0_add(i1 %cond, half %val) nounwind { +; RV64ZDINX_ZICOND-LABEL: select_i1_half_0_add: +; RV64ZDINX_ZICOND: # %bb.0: # %entry +; RV64ZDINX_ZICOND-NEXT: addi sp, sp, -16 +; RV64ZDINX_ZICOND-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_ZICOND-NEXT: call __extendhfsf2 +; RV64ZDINX_ZICOND-NEXT: lui a1, 260096 +; RV64ZDINX_ZICOND-NEXT: fadd.s a0, a0, a1 +; RV64ZDINX_ZICOND-NEXT: call __truncsfhf2 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w def $x10 +; RV64ZDINX_ZICOND-NEXT: lui a1, 1048560 +; RV64ZDINX_ZICOND-NEXT: or a0, a0, a1 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_ZICOND-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ZDINX_ZICOND-NEXT: addi sp, sp, 16 +; RV64ZDINX_ZICOND-NEXT: ret +; +; RV64ZDINX_NOZICOND-LABEL: select_i1_half_0_add: +; RV64ZDINX_NOZICOND: # %bb.0: # %entry +; RV64ZDINX_NOZICOND-NEXT: addi sp, sp, -16 +; RV64ZDINX_NOZICOND-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_NOZICOND-NEXT: slli a0, a0, 63 +; RV64ZDINX_NOZICOND-NEXT: srai a0, a0, 63 +; RV64ZDINX_NOZICOND-NEXT: and a0, a0, a1 +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_NOZICOND-NEXT: call __extendhfsf2 +; RV64ZDINX_NOZICOND-NEXT: lui a1, 260096 +; RV64ZDINX_NOZICOND-NEXT: fadd.s a0, a0, a1 +; RV64ZDINX_NOZICOND-NEXT: call __truncsfhf2 +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w def $x10 +; RV64ZDINX_NOZICOND-NEXT: lui a1, 1048560 +; RV64ZDINX_NOZICOND-NEXT: or a0, a0, a1 +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_NOZICOND-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ZDINX_NOZICOND-NEXT: addi sp, sp, 16 +; RV64ZDINX_NOZICOND-NEXT: ret +; +; RV64ZHINX_ZICOND-LABEL: select_i1_half_0_add: +; RV64ZHINX_ZICOND: # %bb.0: # %entry +; RV64ZHINX_ZICOND-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV64ZHINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZHINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZHINX_ZICOND-NEXT: li a1, 15 +; RV64ZHINX_ZICOND-NEXT: slli a1, a1, 10 +; RV64ZHINX_ZICOND-NEXT: fadd.h a0, a0, a1 +; RV64ZHINX_ZICOND-NEXT: ret +; +; RV64FD-LABEL: select_i1_half_0_add: +; RV64FD: # %bb.0: # %entry +; RV64FD-NEXT: addi sp, sp, -16 +; RV64FD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64FD-NEXT: fmv.x.w a1, fa0 +; RV64FD-NEXT: slli a0, a0, 63 +; RV64FD-NEXT: srai a0, a0, 63 +; RV64FD-NEXT: and a0, a0, a1 +; RV64FD-NEXT: fmv.w.x fa0, a0 +; RV64FD-NEXT: call __extendhfsf2 +; RV64FD-NEXT: lui a0, 260096 +; RV64FD-NEXT: fmv.w.x fa5, a0 +; RV64FD-NEXT: fadd.s fa0, fa0, fa5 +; RV64FD-NEXT: call __truncsfhf2 +; RV64FD-NEXT: fmv.x.w a0, fa0 +; RV64FD-NEXT: lui a1, 1048560 +; RV64FD-NEXT: or a0, a0, a1 +; RV64FD-NEXT: fmv.w.x fa0, a0 +; RV64FD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64FD-NEXT: addi sp, sp, 16 +; RV64FD-NEXT: ret +; +; RV32ZFINX_ZICOND-LABEL: select_i1_half_0_add: +; RV32ZFINX_ZICOND: # %bb.0: # %entry +; RV32ZFINX_ZICOND-NEXT: addi sp, sp, -16 +; RV32ZFINX_ZICOND-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ZFINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_ZICOND-NEXT: call __extendhfsf2 +; RV32ZFINX_ZICOND-NEXT: lui a1, 260096 +; RV32ZFINX_ZICOND-NEXT: fadd.s a0, a0, a1 +; RV32ZFINX_ZICOND-NEXT: call __truncsfhf2 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w def $x10 +; RV32ZFINX_ZICOND-NEXT: lui a1, 1048560 +; RV32ZFINX_ZICOND-NEXT: or a0, a0, a1 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_ZICOND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZFINX_ZICOND-NEXT: addi sp, sp, 16 +; RV32ZFINX_ZICOND-NEXT: ret +; +; RV32ZFINX_NOZICOND-LABEL: select_i1_half_0_add: +; RV32ZFINX_NOZICOND: # %bb.0: # %entry +; RV32ZFINX_NOZICOND-NEXT: addi sp, sp, -16 +; RV32ZFINX_NOZICOND-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_NOZICOND-NEXT: slli a0, a0, 31 +; RV32ZFINX_NOZICOND-NEXT: srai a0, a0, 31 +; RV32ZFINX_NOZICOND-NEXT: and a0, a0, a1 +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_NOZICOND-NEXT: call __extendhfsf2 +; RV32ZFINX_NOZICOND-NEXT: lui a1, 260096 +; RV32ZFINX_NOZICOND-NEXT: fadd.s a0, a0, a1 +; RV32ZFINX_NOZICOND-NEXT: call __truncsfhf2 +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w def $x10 +; RV32ZFINX_NOZICOND-NEXT: lui a1, 1048560 +; RV32ZFINX_NOZICOND-NEXT: or a0, a0, a1 +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_NOZICOND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZFINX_NOZICOND-NEXT: addi sp, sp, 16 +; RV32ZFINX_NOZICOND-NEXT: ret +; +; RV32ZDINX_ZICOND-LABEL: select_i1_half_0_add: +; RV32ZDINX_ZICOND: # %bb.0: # %entry +; RV32ZDINX_ZICOND-NEXT: addi sp, sp, -16 +; RV32ZDINX_ZICOND-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_ZICOND-NEXT: call __extendhfsf2 +; RV32ZDINX_ZICOND-NEXT: lui a1, 260096 +; RV32ZDINX_ZICOND-NEXT: fadd.s a0, a0, a1 +; RV32ZDINX_ZICOND-NEXT: call __truncsfhf2 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w def $x10 +; RV32ZDINX_ZICOND-NEXT: lui a1, 1048560 +; RV32ZDINX_ZICOND-NEXT: or a0, a0, a1 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_ZICOND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZDINX_ZICOND-NEXT: addi sp, sp, 16 +; RV32ZDINX_ZICOND-NEXT: ret +; +; RV32ZDINX_NOZICOND-LABEL: select_i1_half_0_add: +; RV32ZDINX_NOZICOND: # %bb.0: # %entry +; RV32ZDINX_NOZICOND-NEXT: addi sp, sp, -16 +; RV32ZDINX_NOZICOND-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_NOZICOND-NEXT: slli a0, a0, 31 +; RV32ZDINX_NOZICOND-NEXT: srai a0, a0, 31 +; RV32ZDINX_NOZICOND-NEXT: and a0, a0, a1 +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_NOZICOND-NEXT: call __extendhfsf2 +; RV32ZDINX_NOZICOND-NEXT: lui a1, 260096 +; RV32ZDINX_NOZICOND-NEXT: fadd.s a0, a0, a1 +; RV32ZDINX_NOZICOND-NEXT: call __truncsfhf2 +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w def $x10 +; RV32ZDINX_NOZICOND-NEXT: lui a1, 1048560 +; RV32ZDINX_NOZICOND-NEXT: or a0, a0, a1 +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_NOZICOND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZDINX_NOZICOND-NEXT: addi sp, sp, 16 +; RV32ZDINX_NOZICOND-NEXT: ret +entry: + %sel = select i1 %cond, half %val, half 0xH0000 + %add = fadd half %sel, 1.0 + ret half %add +} diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_arbitrary_precision_fixed_point/capability-arbitrary-precision-fixed-point-numbers.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_arbitrary_precision_fixed_point/capability-arbitrary-precision-fixed-point-numbers.ll new file mode 100644 index 0000000000000..e8bc48ec100b1 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_arbitrary_precision_fixed_point/capability-arbitrary-precision-fixed-point-numbers.ll @@ -0,0 +1,254 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_fixed_point,+SPV_ALTERA_arbitrary_precision_integers %s -o - | FileCheck %s +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_fixed_point,+SPV_ALTERA_arbitrary_precision_integers %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpCapability Kernel +; CHECK-DAG: OpCapability ArbitraryPrecisionIntegersALTERA +; CHECK-DAG: OpCapability ArbitraryPrecisionFixedPointALTERA +; CHECK-DAG: OpExtension "SPV_ALTERA_arbitrary_precision_fixed_point" +; CHECK-DAG: OpExtension "SPV_ALTERA_arbitrary_precision_integers" + +; CHECK-DAG: %[[Ty_8:[0-9]+]] = OpTypeInt 8 0 +; CHECK-DAG: %[[Ty_13:[0-9]+]] = OpTypeInt 13 0 +; CHECK-DAG: %[[Ty_5:[0-9]+]] = OpTypeInt 5 0 +; CHECK-DAG: %[[Ty_3:[0-9]+]] = OpTypeInt 3 0 +; CHECK-DAG: %[[Ty_11:[0-9]+]] = OpTypeInt 11 0 +; CHECK-DAG: %[[Ty_10:[0-9]+]] = OpTypeInt 10 0 +; CHECK-DAG: %[[Ty_17:[0-9]+]] = OpTypeInt 17 0 +; CHECK-DAG: %[[Ty_35:[0-9]+]] = OpTypeInt 35 0 +; CHECK-DAG: %[[Ty_28:[0-9]+]] = OpTypeInt 28 0 +; CHECK-DAG: %[[Ty_31:[0-9]+]] = OpTypeInt 31 0 +; CHECK-DAG: %[[Ty_40:[0-9]+]] = OpTypeInt 40 0 +; CHECK-DAG: %[[Ty_60:[0-9]+]] = OpTypeInt 60 0 +; CHECK-DAG: %[[Ty_16:[0-9]+]] = OpTypeInt 16 0 +; CHECK-DAG: %[[Ty_64:[0-9]+]] = OpTypeInt 64 0 +; CHECK-DAG: %[[Ty_44:[0-9]+]] = OpTypeInt 44 0 +; CHECK-DAG: %[[Ty_34:[0-9]+]] = OpTypeInt 34 0 +; CHECK-DAG: %[[Ty_51:[0-9]+]] = OpTypeInt 51 0 + +; CHECK: %[[Sqrt_InId:[0-9]+]] = OpLoad %[[Ty_13]] +; CHECK-NEXT: %[[#]] = OpFixedSqrtALTERA %[[Ty_5]] %[[Sqrt_InId]] 0 2 2 0 0 + +; CHECK: %[[Recip_InId:[0-9]+]] = OpLoad %[[Ty_3]] +; CHECK-NEXT: %[[#]] = OpFixedRecipALTERA %[[Ty_8]] %[[Recip_InId]] 1 4 4 0 0 + +; CHECK: %[[Rsqrt_InId:[0-9]+]] = OpLoad %[[Ty_11]] +; CHECK-NEXT: %[[#]] = OpFixedRsqrtALTERA %[[Ty_10]] %[[Rsqrt_InId]] 0 8 6 0 0 + +; CHECK: %[[Sin_InId:[0-9]+]] = OpLoad %[[Ty_17]] +; CHECK-NEXT: %[[#]] = OpFixedSinALTERA %[[Ty_11]] %[[Sin_InId]] 1 7 5 0 0 + +; CHECK: %[[Cos_InId:[0-9]+]] = OpLoad %[[Ty_35]] +; CHECK-NEXT: %[[#]] = OpFixedCosALTERA %[[Ty_28]] %[[Cos_InId]] 0 9 3 0 0 + +; CHECK: %[[SinCos_InId:[0-9]+]] = OpLoad %[[Ty_31]] +; CHECK-NEXT: %[[#]] = OpFixedSinCosALTERA %[[Ty_40]] %[[SinCos_InId]] 1 10 12 0 0 + +; CHECK: %[[SinPi_InId:[0-9]+]] = OpLoad %[[Ty_60]] +; CHECK-NEXT: %[[#]] = OpFixedSinPiALTERA %[[Ty_5]] %[[SinPi_InId]] 0 2 2 0 0 + +; CHECK: %[[CosPi_InId:[0-9]+]] = OpLoad %[[Ty_28]] +; CHECK-NEXT: %[[#]] = OpFixedCosPiALTERA %[[Ty_16]] %[[CosPi_InId]] 0 8 5 0 0 + +; CHECK: %[[SinCosPi_InId:[0-9]+]] = OpLoad %[[Ty_13]] +; CHECK-NEXT: %[[#]] = OpFixedSinCosPiALTERA %[[Ty_10]] %[[SinCosPi_InId]] 0 2 2 0 0 + +; CHECK: %[[Log_InId:[0-9]+]] = OpLoad %[[Ty_64]] +; CHECK-NEXT: %[[#]] = OpFixedLogALTERA %[[Ty_44]] %[[Log_InId]] 1 24 22 0 0 + +; CHECK: %[[Exp_InId:[0-9]+]] = OpLoad %[[Ty_44]] +; CHECK-NEXT: %[[#]] = OpFixedExpALTERA %[[Ty_34]] %[[Exp_InId]] 0 20 20 0 0 + +; CHECK: %[[SinCos_InId:[0-9]+]] = OpLoad %[[Ty_34]] +; CHECK-NEXT: %[[SinCos_ResultId:[0-9]+]] = OpFixedSinCosALTERA %[[Ty_51]] %[[SinCos_InId]] 1 3 2 0 0 +; CHECK-NEXT: OpStore %[[#]] %[[SinCos_ResultId]] + +; CHECK: %[[ResId:[0-9]+]] = OpLoad %[[Ty_51]] +; CHECK-NEXT: OpStore %[[PtrId:[0-9]+]] %[[ResId]] +; CHECK-NEXT: %[[ExpInId2:[0-9]+]] = OpLoad %[[Ty_51]] %[[PtrId]] +; CHECK-NEXT: %[[#]] = OpFixedExpALTERA %[[Ty_51]] %[[ExpInId2]] 0 20 20 0 0 + +%"class._ZTSZ4mainE3$_0.anon" = type { i8 } + +define dso_local spir_kernel void @_ZTSZ4mainE15kernel_function() !kernel_arg_addr_space !{} !kernel_arg_access_qual !{} !kernel_arg_type !{} !kernel_arg_base_type !{} !kernel_arg_type_qual !{} { +entry: + %0 = alloca %"class._ZTSZ4mainE3$_0.anon", align 1 + %1 = addrspacecast ptr %0 to ptr addrspace(4) + call spir_func void @"_ZZ4mainENK3$_0clEv"(ptr addrspace(4) %1) + ret void +} + +define internal spir_func void @"_ZZ4mainENK3$_0clEv"(ptr addrspace(4) %this) align 2 { +entry: + %this.addr = alloca ptr addrspace(4), align 8 + store ptr addrspace(4) %this, ptr %this.addr, align 8 + call spir_func void @_Z4sqrtILi13ELi5ELb0ELi2ELi2EEvv() + call spir_func void @_Z5recipILi3ELi8ELb1ELi4ELi4EEvv() + call spir_func void @_Z5rsqrtILi11ELi10ELb0ELi8ELi6EEvv() + call spir_func void @_Z3sinILi17ELi11ELb1ELi7ELi5EEvv() + call spir_func void @_Z3cosILi35ELi28ELb0ELi9ELi3EEvv() + call spir_func void @_Z7sin_cosILi31ELi20ELb1ELi10ELi12EEvv() + call spir_func void @_Z6sin_piILi60ELi5ELb0ELi2ELi2EEvv() + call spir_func void @_Z6cos_piILi28ELi16ELb0ELi8ELi5EEvv() + call spir_func void @_Z10sin_cos_piILi13ELi5ELb0ELi2ELi2EEvv() + call spir_func void @_Z3logILi64ELi44ELb1ELi24ELi22EEvv() + call spir_func void @_Z3expILi44ELi34ELb0ELi20ELi20EEvv() + call spir_func void @_Z7sin_cosILi31ELi20ELb1ELi10ELi12EEvv_() + call spir_func void @_Z3expILi51ELi51ELb0ELi20ELi20EEvv() + ret void +} + +define linkonce_odr dso_local spir_func void @_Z4sqrtILi13ELi5ELb0ELi2ELi2EEvv() { +entry: + %in_ptr = alloca i13, align 2 + %out_ptr = alloca i5, align 1 + %in_val = load i13, ptr %in_ptr, align 2 + %res = call spir_func signext i5 @_Z22__spirv_FixedSqrtINTELILi13ELi5EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i13 signext %in_val, i1 zeroext false, i32 2, i32 2, i32 0, i32 0) + store i5 %res, ptr %out_ptr, align 1 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z5recipILi3ELi8ELb1ELi4ELi4EEvv() { +entry: + %in_ptr = alloca i3, align 1 + %out_ptr = alloca i8, align 1 + %in_val = load i3, ptr %in_ptr, align 1 + %res = call spir_func signext i8 @_Z23__spirv_FixedRecipINTELILi3ELi8EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i3 signext %in_val, i1 zeroext true, i32 4, i32 4, i32 0, i32 0) + store i8 %res, ptr %out_ptr, align 1 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z5rsqrtILi11ELi10ELb0ELi8ELi6EEvv() { +entry: + %in_ptr = alloca i11, align 2 + %out_ptr = alloca i10, align 2 + %in_val = load i11, ptr %in_ptr, align 2 + %res = call spir_func signext i10 @_Z23__spirv_FixedRsqrtINTELILi11ELi10EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i11 signext %in_val, i1 zeroext false, i32 8, i32 6, i32 0, i32 0) + store i10 %res, ptr %out_ptr, align 2 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z3sinILi17ELi11ELb1ELi7ELi5EEvv() { +entry: + %in_ptr = alloca i17, align 4 + %out_ptr = alloca i11, align 2 + %in_val = load i17, ptr %in_ptr, align 4 + %res = call spir_func signext i11 @_Z21__spirv_FixedSinINTELILi17ELi11EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i17 signext %in_val, i1 zeroext true, i32 7, i32 5, i32 0, i32 0) + store i11 %res, ptr %out_ptr, align 2 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z3cosILi35ELi28ELb0ELi9ELi3EEvv() { +entry: + %in_ptr = alloca i35, align 8 + %out_ptr = alloca i28, align 4 + %in_val = load i35, ptr %in_ptr, align 8 + %res = call spir_func signext i28 @_Z21__spirv_FixedCosINTELILi35ELi28EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i35 signext %in_val, i1 zeroext false, i32 9, i32 3, i32 0, i32 0) + store i28 %res, ptr %out_ptr, align 4 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z7sin_cosILi31ELi20ELb1ELi10ELi12EEvv() { +entry: + %in_ptr = alloca i31, align 4 + %out_ptr = alloca i40, align 8 + %in_val = load i31, ptr %in_ptr, align 4 + %res = call spir_func i40 @_Z24__spirv_FixedSinCosINTELILi31ELi20EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEibiiii(i31 signext %in_val, i1 zeroext true, i32 10, i32 12, i32 0, i32 0) + store i40 %res, ptr %out_ptr, align 8 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z6sin_piILi60ELi5ELb0ELi2ELi2EEvv() { +entry: + %in_ptr = alloca i60, align 8 + %out_ptr = alloca i5, align 1 + %in_val = load i60, ptr %in_ptr, align 8 + %res = call spir_func signext i5 @_Z23__spirv_FixedSinPiINTELILi60ELi5EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i60 signext %in_val, i1 zeroext false, i32 2, i32 2, i32 0, i32 0) + store i5 %res, ptr %out_ptr, align 1 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z6cos_piILi28ELi16ELb0ELi8ELi5EEvv() { +entry: + %in_ptr = alloca i28, align 4 + %out_ptr = alloca i16, align 2 + %in_val = load i28, ptr %in_ptr, align 4 + %res = call spir_func signext i16 @_Z23__spirv_FixedCosPiINTELILi28ELi16EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i28 signext %in_val, i1 zeroext false, i32 8, i32 5, i32 0, i32 0) + store i16 %res, ptr %out_ptr, align 2 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z10sin_cos_piILi13ELi5ELb0ELi2ELi2EEvv() { +entry: + %in_ptr = alloca i13, align 2 + %out_ptr = alloca i10, align 2 + %in_val = load i13, ptr %in_ptr, align 2 + %res = call spir_func signext i10 @_Z26__spirv_FixedSinCosPiINTELILi13ELi5EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEibiiii(i13 signext %in_val, i1 zeroext false, i32 2, i32 2, i32 0, i32 0) + store i10 %res, ptr %out_ptr, align 2 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z3logILi64ELi44ELb1ELi24ELi22EEvv() { +entry: + %in_ptr = alloca i64, align 8 + %out_ptr = alloca i44, align 8 + %in_val = load i64, ptr %in_ptr, align 8 + %res = call spir_func i44 @_Z21__spirv_FixedLogINTELILi64ELi44EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i64 %in_val, i1 zeroext true, i32 24, i32 22, i32 0, i32 0) + store i44 %res, ptr %out_ptr, align 8 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z3expILi44ELi34ELb0ELi20ELi20EEvv() { +entry: + %in_ptr = alloca i44, align 8 + %out_ptr = alloca i34, align 8 + %in_val = load i44, ptr %in_ptr, align 8 + %res = call spir_func i34 @_Z21__spirv_FixedExpINTELILi44ELi34EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i44 %in_val, i1 zeroext false, i32 20, i32 20, i32 0, i32 0) + store i34 %res, ptr %out_ptr, align 8 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z7sin_cosILi31ELi20ELb1ELi10ELi12EEvv_() { +entry: + %tmp = alloca i34, align 8 + %out_ptr = alloca i51, align 8 + %in_ptr = addrspacecast ptr %tmp to ptr addrspace(4) + %out_s = addrspacecast ptr %out_ptr to ptr addrspace(4) + %in_val = load i34, ptr addrspace(4) %in_ptr, align 8 + call spir_func void @_Z24__spirv_FixedSinCosINTELILi34ELi51EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEibiiii(ptr addrspace(4) sret(i51) align 8 %out_s, i34 %in_val, i1 zeroext true, i32 3, i32 2, i32 0, i32 0) + ret void +} + +define linkonce_odr dso_local spir_func void @_Z3expILi51ELi51ELb0ELi20ELi20EEvv() { +entry: + %a = alloca i51, align 8 + %a.ascast = addrspacecast ptr %a to ptr addrspace(4) + %ap_fixed_Exp = alloca i51, align 8 + %ap_fixed_Exp.ascast = addrspacecast ptr %ap_fixed_Exp to ptr addrspace(4) + %tmp = alloca i51, align 8 + %tmp.ascast = addrspacecast ptr %tmp to ptr addrspace(4) + %indirect-arg-temp = alloca i51, align 8 + %0 = load i51, ptr addrspace(4) %a.ascast, align 8 + store i51 %0, ptr %indirect-arg-temp, align 8 + call spir_func void @_Z21__spirv_FixedExpINTELILi51ELi51EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii( + ptr addrspace(4) sret(i51) align 8 %tmp.ascast, + ptr byval(i64) align 8 %indirect-arg-temp, + i1 zeroext false, i32 20, i32 20, i32 0, i32 0) + %1 = load i51, ptr addrspace(4) %tmp.ascast, align 8 + store i51 %1, ptr addrspace(4) %ap_fixed_Exp.ascast, align 8 + ret void +} + +declare dso_local spir_func signext i5 @_Z22__spirv_FixedSqrtINTELILi13ELi5EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i13 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i13 @_Z22__spirv_FixedSqrtINTELILi5ELi13EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i5 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i8 @_Z23__spirv_FixedRecipINTELILi3ELi8EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i3 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i10 @_Z23__spirv_FixedRsqrtINTELILi11ELi10EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i11 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i11 @_Z21__spirv_FixedSinINTELILi17ELi11EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i17 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i28 @_Z21__spirv_FixedCosINTELILi35ELi28EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i35, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func i40 @_Z24__spirv_FixedSinCosINTELILi31ELi20EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEibiiii(i31 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i5 @_Z23__spirv_FixedSinPiINTELILi60ELi5EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i60, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i16 @_Z23__spirv_FixedCosPiINTELILi28ELi16EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i28 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i10 @_Z26__spirv_FixedSinCosPiINTELILi13ELi5EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEibiiii(i13 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func i44 @_Z21__spirv_FixedLogINTELILi64ELi44EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i64, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func i34 @_Z21__spirv_FixedExpINTELILi44ELi34EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i44, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func void @_Z24__spirv_FixedSinCosINTELILi34ELi51EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEibiiii(ptr addrspace(4) sret(i51) align 8, i34, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func void @_Z21__spirv_FixedExpINTELILi51ELi51EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(ptr addrspace(4) sret(i51) align 8, ptr byval(i51) align 8, i1 zeroext, i32, i32, i32, i32) diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_arbitrary_precision_integers.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_arbitrary_precision_integers.ll index 41d4b58ed1157..9ea8a5709154c 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_arbitrary_precision_integers.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_arbitrary_precision_integers.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers %s -o - | FileCheck %s define i6 @getConstantI6() { ret i6 2 @@ -9,8 +9,8 @@ define i13 @getConstantI13() { } ;; Capabilities: -; CHECK-DAG: OpExtension "SPV_INTEL_arbitrary_precision_integers" -; CHECK-DAG: OpCapability ArbitraryPrecisionIntegersINTEL +; CHECK-DAG: OpExtension "SPV_ALTERA_arbitrary_precision_integers" +; CHECK-DAG: OpCapability ArbitraryPrecisionIntegersALTERA ; CHECK-NOT: DAG-FENCE diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_int4/negative.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_int4/negative.ll index 4d5fa52a166f2..fdb2776a7e2ec 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_int4/negative.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_int4/negative.ll @@ -1,11 +1,11 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-INT-4 +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-INT-4 ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-INT-8 ; No error would be reported in comparison to Khronos llvm-spirv, because type adjustments to integer size are made ; in case no appropriate extension is enabled. Here we expect that the type is adjusted to 8 bits. -; CHECK-SPIRV: Capability ArbitraryPrecisionIntegersINTEL -; CHECK-SPIRV: Extension "SPV_INTEL_arbitrary_precision_integers" +; CHECK-SPIRV: Capability ArbitraryPrecisionIntegersALTERA +; CHECK-SPIRV: Extension "SPV_ALTERA_arbitrary_precision_integers" ; CHECK-INT-4: %[[#Int4:]] = OpTypeInt 4 0 ; CHECK-INT-8: %[[#Int4:]] = OpTypeInt 8 0 ; CHECK: OpTypeFunction %[[#]] %[[#Int4]] diff --git a/llvm/test/CodeGen/SPIRV/extensions/both-allowed-disallowed-extension-error.ll b/llvm/test/CodeGen/SPIRV/extensions/both-allowed-disallowed-extension-error.ll index fc07cca4dd240..96dca53b8ba59 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/both-allowed-disallowed-extension-error.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/both-allowed-disallowed-extension-error.ll @@ -1,6 +1,6 @@ -; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers,-SPV_INTEL_arbitrary_precision_integers %s -o %t.spvt 2>&1 | FileCheck %s -; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=-SPV_INTEL_arbitrary_precision_integers,+SPV_INTEL_arbitrary_precision_integers %s -o %t.spvt 2>&1 | FileCheck %s -; CHECK: Extension cannot be allowed and disallowed at the same time: SPV_INTEL_arbitrary_precision_integers +; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers,-SPV_ALTERA_arbitrary_precision_integers %s -o %t.spvt 2>&1 | FileCheck %s +; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=-SPV_ALTERA_arbitrary_precision_integers,+SPV_ALTERA_arbitrary_precision_integers %s -o %t.spvt 2>&1 | FileCheck %s +; CHECK: Extension cannot be allowed and disallowed at the same time: SPV_ALTERA_arbitrary_precision_integers define i8 @foo() { ret i8 2 diff --git a/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions-but-one.ll b/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions-but-one.ll index face4a9f5e615..5ddfc85702540 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions-but-one.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions-but-one.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=all,-SPV_INTEL_arbitrary_precision_integers %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=all,-SPV_ALTERA_arbitrary_precision_integers %s -o - | FileCheck %s ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=KHR %s -o - | FileCheck %s ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=khr %s -o - | FileCheck %s @@ -10,7 +10,7 @@ define i6 @foo() { ret i6 2 } -; CHECK-NOT: OpExtension "SPV_INTEL_arbitrary_precision_integers" +; CHECK-NOT: OpExtension "SPV_ALTERA_arbitrary_precision_integers" ; CHECK-DAG: OpExtension "SPV_KHR_bit_instructions" declare i32 @llvm.bitreverse.i32(i32) diff --git a/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions.ll b/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions.ll index 15905dd1894e2..80b094f462a70 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions.ll @@ -5,4 +5,4 @@ define i6 @getConstantI6() { ret i6 2 } -; CHECK: OpExtension "SPV_INTEL_arbitrary_precision_integers" +; CHECK: OpExtension "SPV_ALTERA_arbitrary_precision_integers" diff --git a/llvm/test/CodeGen/SPIRV/extensions/unused-but-allowed-SPV_INTEL_arbitrary_precision_integers.ll b/llvm/test/CodeGen/SPIRV/extensions/unused-but-allowed-SPV_INTEL_arbitrary_precision_integers.ll index 2c1257471d159..cc3f1ae29a681 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/unused-but-allowed-SPV_INTEL_arbitrary_precision_integers.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/unused-but-allowed-SPV_INTEL_arbitrary_precision_integers.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers %s -o - | FileCheck %s define i8 @getConstantI8() { ret i8 2 @@ -15,5 +15,5 @@ define i64 @getConstantI64() { } ;; Capabilities: -; CHECK-NOT: OpExtension "SPV_INTEL_arbitrary_precision_integers" -; CHECK-NOT: OpCapability ArbitraryPrecisionIntegersINTEL +; CHECK-NOT: OpExtension "SPV_ALTERA_arbitrary_precision_integers" +; CHECK-NOT: OpCapability ArbitraryPrecisionIntegersALTERA diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll index 18856147896bb..d4b1592a044bc 100644 --- a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll @@ -1,11 +1,11 @@ ;; Check that llvm.bitreverse.* intrinsics are lowered for ;; 2/4-bit scalar and vector types. -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers,+SPV_KHR_bit_instructions %s -o - | FileCheck %s -; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers,+SPV_KHR_bit_instructions %s -o - -filetype=obj | spirv-val %} +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers,+SPV_KHR_bit_instructions %s -o - | FileCheck %s +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers,+SPV_KHR_bit_instructions %s -o - -filetype=obj | spirv-val %} -; CHECK: OpCapability ArbitraryPrecisionIntegersINTEL -; CHECK: OpExtension "SPV_INTEL_arbitrary_precision_integers" +; CHECK: OpCapability ArbitraryPrecisionIntegersALTERA +; CHECK: OpExtension "SPV_ALTERA_arbitrary_precision_integers" ; CHECK-DAG: %[[#I4:]] = OpTypeInt 4 0 ; CHECK-DAG: %[[#I2:]] = OpTypeInt 2 0 diff --git a/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll b/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll index 79c2824c3dde1..16cd00b7180a7 100644 --- a/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll +++ b/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll @@ -1,12 +1,12 @@ ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOEXT ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOEXT ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT ; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled ; XFAIL: expensive_checks diff --git a/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll b/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll index 283dc075a9aee..0d732a80d7221 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll @@ -45,6 +45,48 @@ exit: ret i64 %res } +define i64 @test_vectorize_select_umin_idx_signed_sentinel_possible(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_umin_idx_signed_sentinel_possible( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[INDEX]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP4]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_SELECT:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RDX_SELECT]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %gep = getelementptr i64, ptr %src, i64 %iv + %l = load i64, ptr %gep + %cmp = icmp ugt i64 %min.val, %l + %min.val.next = tail call i64 @llvm.umin.i64(i64 %min.val, i64 %l) + %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 100 + br i1 %exitcond.not, label %exit, label %loop + +exit: + %res = phi i64 [ %min.idx.next, %loop ] + ret i64 %res +} + define i64 @test_vectorize_select_umin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_umin_idx_cond_flipped( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { @@ -553,5 +595,52 @@ exit: ret i64 %res } +define i64 @test_vectorize_select_umin_idx_wraps(ptr %src, i64 %n, i64 %start) { +; CHECK-LABEL: define i64 @test_vectorize_select_umin_idx_wraps( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i64 [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[START]], %[[ENTRY]] ], [ [[IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IDX]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx = phi i64 [ %start, %entry ], [ %idx.next, %loop ] + %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %gep = getelementptr i64, ptr %src, i64 %iv + %l = load i64, ptr %gep + %cmp = icmp ugt i64 %min.val, %l + %min.val.next = tail call i64 @llvm.umin.i64(i64 %min.val, i64 %l) + %min.idx.next = select i1 %cmp, i64 %idx, i64 %min.idx + %iv.next = add nuw nsw i64 %iv, 1 + %idx.next = add i64 %idx, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %loop + +exit: + %res = phi i64 [ %min.idx.next, %loop ] + ret i64 %res +} + + declare i64 @llvm.umin.i64(i64, i64) declare i16 @llvm.umin.i16(i16, i16) diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td index 7b363fac6e627..ecbbf39a534e1 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td @@ -792,7 +792,7 @@ def SPIRV_C_FPGABufferLocationINTEL : I32EnumAttrCase<"FPGAB Extension<[SPV_INTEL_fpga_buffer_location]> ]; } -def SPIRV_C_ArbitraryPrecisionFixedPointINTEL : I32EnumAttrCase<"ArbitraryPrecisionFixedPointINTEL", 5922> { +def SPIRV_C_ArbitraryPrecisionFixedPointINTEL : I32EnumAttrCase<"ArbitraryPrecisionFixedPointINTEL", 5922> { list availability = [ Extension<[SPV_INTEL_arbitrary_precision_fixed_point]> ]; diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp index 4743941deff3f..8f1249e3afaf0 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -1711,6 +1711,12 @@ LogicalResult mlir::affine::coalesceLoops(MutableArrayRef loops) { outermost.getBody()->getOperations().splice( Block::iterator(secondOutermostLoop.getOperation()), innermost.getBody()->getOperations()); + for (auto [iter, init] : + llvm::zip_equal(secondOutermostLoop.getRegionIterArgs(), + secondOutermostLoop.getInits())) { + iter.replaceAllUsesWith(init); + iter.dropAllUses(); + } secondOutermostLoop.erase(); return success(); } diff --git a/mlir/test/Dialect/Affine/loop-coalescing.mlir b/mlir/test/Dialect/Affine/loop-coalescing.mlir index 3be14eaf5c326..6a825320ff20f 100644 --- a/mlir/test/Dialect/Affine/loop-coalescing.mlir +++ b/mlir/test/Dialect/Affine/loop-coalescing.mlir @@ -416,3 +416,31 @@ func.func @test_loops_do_not_get_coalesced() { // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @inner_loop_has_iter_args +// CHECK-SAME: %[[ALLOC:.*]]: memref) +func.func @inner_loop_has_iter_args(%alloc : memref) { + %c17 = arith.constant 17 : index + affine.for %arg0 = 0 to 79 { + %0 = affine.for %arg1 = 0 to 64 iter_args(%arg2 = %alloc) -> (memref) { + %1 = arith.remui %arg1, %c17 : index + %2 = arith.index_cast %arg1 : index to i64 + memref.store %2, %arg2[%1] : memref + affine.yield %arg2 : memref + } + } + return +} + +// CHECK: %[[CONSTANT_0:.*]] = arith.constant 17 : index +// CHECK: %[[APPLY_0:.*]] = affine.apply affine_map<() -> (79)>() +// CHECK: %[[APPLY_1:.*]] = affine.apply affine_map<() -> (64)>() +// CHECK: %[[APPLY_2:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[APPLY_0]]){{\[}}%[[APPLY_1]]] +// CHECK: affine.for %[[IV:.*]] = 0 to %[[APPLY_2]] { +// CHECK: %[[APPLY_3:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]]){{\[}}%[[APPLY_1]]] +// CHECK: %[[REMUI_0:.*]] = arith.remui %[[APPLY_3]], %[[CONSTANT_0]] : index +// CHECK: %[[INDEX_CAST_0:.*]] = arith.index_cast %[[APPLY_3]] : index to i64 +// CHECK: memref.store %[[INDEX_CAST_0]], %[[ALLOC]]{{\[}}%[[REMUI_0]]] : memref +// CHECK: }