From 113e0c95a89ab3ce9f1ac4e2ba6351d957a64da9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 30 Nov 2025 21:07:28 +0000 Subject: [PATCH 1/9] [LV] Add additional tests for argmin with find-first wrapping IV ranges. Add test cases for upcoming argmin vectorization changes that have wrapping IV ranges. --- .../LoopVectorize/select-umin-first-index.ll | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll b/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll index 283dc075a9aee..0d732a80d7221 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll @@ -45,6 +45,48 @@ exit: ret i64 %res } +define i64 @test_vectorize_select_umin_idx_signed_sentinel_possible(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_umin_idx_signed_sentinel_possible( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[INDEX]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP4]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_SELECT:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RDX_SELECT]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %gep = getelementptr i64, ptr %src, i64 %iv + %l = load i64, ptr %gep + %cmp = icmp ugt i64 %min.val, %l + %min.val.next = tail call i64 @llvm.umin.i64(i64 %min.val, i64 %l) + %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 100 + br i1 %exitcond.not, label %exit, label %loop + +exit: + %res = phi i64 [ %min.idx.next, %loop ] + ret i64 %res +} + define i64 @test_vectorize_select_umin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_umin_idx_cond_flipped( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { @@ -553,5 +595,52 @@ exit: ret i64 %res } +define i64 @test_vectorize_select_umin_idx_wraps(ptr %src, i64 %n, i64 %start) { +; CHECK-LABEL: define i64 @test_vectorize_select_umin_idx_wraps( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i64 [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[START]], %[[ENTRY]] ], [ [[IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IDX]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %idx = phi i64 [ %start, %entry ], [ %idx.next, %loop ] + %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %gep = getelementptr i64, ptr %src, i64 %iv + %l = load i64, ptr %gep + %cmp = icmp ugt i64 %min.val, %l + %min.val.next = tail call i64 @llvm.umin.i64(i64 %min.val, i64 %l) + %min.idx.next = select i1 %cmp, i64 %idx, i64 %min.idx + %iv.next = add nuw nsw i64 %iv, 1 + %idx.next = add i64 %idx, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %loop + +exit: + %res = phi i64 [ %min.idx.next, %loop ] + ret i64 %res +} + + declare i64 @llvm.umin.i64(i64, i64) declare i16 @llvm.umin.i16(i16, i16) From c465a56e9d1f244a32ea00a426d449bc7f38a9b1 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 30 Nov 2025 21:50:37 +0000 Subject: [PATCH 2/9] [VPlan] Handle canonical IVs in ::isSingleScalar. (NFCI) The canonical IV is always a single scalar. They are already treated as uniform-across-UF-and-VF. This should currently be NFC. --- llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index c7a0fd7407a4e..d36975699c4a8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -197,7 +197,8 @@ bool vputils::isSingleScalar(const VPValue *VPV) { all_of(VPI->operands(), isSingleScalar)); if (auto *RR = dyn_cast(VPV)) return !RR->isPartialReduction(); - if (isa(VPV)) + if (isa(VPV)) return true; if (auto *Expr = dyn_cast(VPV)) return Expr->isSingleScalar(); From ef3785887c7c306d1ea933430befb78fb17e1650 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 30 Nov 2025 14:37:34 -0800 Subject: [PATCH 3/9] ELF: Move .eh_frame_hdr code closer to .eh_frame . NFC ... as they are closely related. Also improve the comments. --- lld/ELF/SyntheticSections.cpp | 157 +++++++++++++++------------------- lld/ELF/SyntheticSections.h | 30 +++---- 2 files changed, 81 insertions(+), 106 deletions(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index ea9b87952cd84..1e9d44fa37bea 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -540,43 +540,6 @@ void EhFrameSection::finalizeContents() { this->size = off; } -// Returns data for .eh_frame_hdr. .eh_frame_hdr is a binary search table -// to get an FDE from an address to which FDE is applied. This function -// returns a list of such pairs. -SmallVector EhFrameSection::getFdeData() const { - uint8_t *buf = ctx.bufferStart + getParent()->offset + outSecOff; - SmallVector ret; - - uint64_t va = getPartition(ctx).ehFrameHdr->getVA(); - for (CieRecord *rec : cieRecords) { - uint8_t enc = getFdeEncoding(rec->cie); - for (EhSectionPiece *fde : rec->fdes) { - uint64_t pc = getFdePc(buf, fde->outputOff, enc); - uint64_t fdeVA = getParent()->addr + fde->outputOff; - if (!isInt<32>(pc - va)) { - Err(ctx) << fde->sec << ": PC offset is too large: 0x" - << Twine::utohexstr(pc - va); - continue; - } - ret.push_back({uint32_t(pc - va), uint32_t(fdeVA - va)}); - } - } - - // Sort the FDE list by their PC and uniqueify. Usually there is only - // one FDE for a PC (i.e. function), but if ICF merges two functions - // into one, there can be more than one FDEs pointing to the address. - auto less = [](const FdeData &a, const FdeData &b) { - return a.pcRel < b.pcRel; - }; - llvm::stable_sort(ret, less); - auto eq = [](const FdeData &a, const FdeData &b) { - return a.pcRel == b.pcRel; - }; - ret.erase(llvm::unique(ret, eq), ret.end()); - - return ret; -} - static uint64_t readFdeAddr(Ctx &ctx, uint8_t *buf, int size) { switch (size) { case DW_EH_PE_udata2: @@ -630,14 +593,79 @@ void EhFrameSection::writeTo(uint8_t *buf) { } } - // Apply relocations. .eh_frame section contents are not contiguous - // in the output buffer, but relocateAlloc() still works because - // getOffset() takes care of discontiguous section pieces. + // Apply relocations to .eh_frame entries. This includes CIE personality + // pointers, FDE initial_location fields, and LSDA pointers. for (EhInputSection *s : sections) ctx.target->relocateEh(*s, buf); - if (getPartition(ctx).ehFrameHdr && getPartition(ctx).ehFrameHdr->getParent()) - getPartition(ctx).ehFrameHdr->write(); + EhFrameHeader *hdr = getPartition(ctx).ehFrameHdr.get(); + if (!hdr || !hdr->getParent()) + return; + + // Write the .eh_frame_hdr section, which contains a binary search table of + // pointers to FDEs. This must be written after .eh_frame relocation since + // the content depends on relocated initial_location fields in FDEs. + using FdeData = EhFrameSection::FdeData; + SmallVector fdes; + uint64_t va = hdr->getVA(); + for (CieRecord *rec : cieRecords) { + uint8_t enc = getFdeEncoding(rec->cie); + for (EhSectionPiece *fde : rec->fdes) { + uint64_t pc = getFdePc(buf, fde->outputOff, enc); + uint64_t fdeVA = getParent()->addr + fde->outputOff; + if (!isInt<32>(pc - va)) { + Err(ctx) << fde->sec << ": PC offset is too large: 0x" + << Twine::utohexstr(pc - va); + continue; + } + fdes.push_back({uint32_t(pc - va), uint32_t(fdeVA - va)}); + } + } + + // Sort the FDE list by their PC and uniqueify. Usually there is only + // one FDE for a PC (i.e. function), but if ICF merges two functions + // into one, there can be more than one FDEs pointing to the address. + llvm::stable_sort(fdes, [](const FdeData &a, const FdeData &b) { + return a.pcRel < b.pcRel; + }); + fdes.erase( + llvm::unique(fdes, [](auto &a, auto &b) { return a.pcRel == b.pcRel; }), + fdes.end()); + + // Write header. + uint8_t *hdrBuf = ctx.bufferStart + hdr->getParent()->offset + hdr->outSecOff; + hdrBuf[0] = 1; // version + hdrBuf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; // eh_frame_ptr_enc + hdrBuf[2] = DW_EH_PE_udata4; // fde_count_enc + hdrBuf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; // table_enc + write32(ctx, hdrBuf + 4, + getParent()->addr - hdr->getVA() - 4); // eh_frame_ptr + write32(ctx, hdrBuf + 8, fdes.size()); // fde_count + hdrBuf += 12; + + // Write binary search table. Each entry describes the starting PC and the FDE + // address. + for (FdeData &fde : fdes) { + write32(ctx, hdrBuf, fde.pcRel); + write32(ctx, hdrBuf + 4, fde.fdeVARel); + hdrBuf += 8; + } +} + +EhFrameHeader::EhFrameHeader(Ctx &ctx) + : SyntheticSection(ctx, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, 4) {} + +void EhFrameHeader::writeTo(uint8_t *buf) { + // The section content is written during EhFrameSection::writeTo. +} + +size_t EhFrameHeader::getSize() const { + // .eh_frame_hdr has a 12 bytes header followed by an array of FDEs. + return 12 + getPartition(ctx).ehFrame->numFdes * 8; +} + +bool EhFrameHeader::isNeeded() const { + return isLive() && getPartition(ctx).ehFrame->isNeeded(); } GotSection::GotSection(Ctx &ctx) @@ -3658,51 +3686,6 @@ void GdbIndexSection::writeTo(uint8_t *buf) { bool GdbIndexSection::isNeeded() const { return !chunks.empty(); } -EhFrameHeader::EhFrameHeader(Ctx &ctx) - : SyntheticSection(ctx, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, 4) {} - -void EhFrameHeader::writeTo(uint8_t *buf) { - // Unlike most sections, the EhFrameHeader section is written while writing - // another section, namely EhFrameSection, which calls the write() function - // below from its writeTo() function. This is necessary because the contents - // of EhFrameHeader depend on the relocated contents of EhFrameSection and we - // don't know which order the sections will be written in. -} - -// .eh_frame_hdr contains a binary search table of pointers to FDEs. -// Each entry of the search table consists of two values, -// the starting PC from where FDEs covers, and the FDE's address. -// It is sorted by PC. -void EhFrameHeader::write() { - uint8_t *buf = ctx.bufferStart + getParent()->offset + outSecOff; - using FdeData = EhFrameSection::FdeData; - SmallVector fdes = getPartition(ctx).ehFrame->getFdeData(); - - buf[0] = 1; - buf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; - buf[2] = DW_EH_PE_udata4; - buf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; - write32(ctx, buf + 4, - getPartition(ctx).ehFrame->getParent()->addr - this->getVA() - 4); - write32(ctx, buf + 8, fdes.size()); - buf += 12; - - for (FdeData &fde : fdes) { - write32(ctx, buf, fde.pcRel); - write32(ctx, buf + 4, fde.fdeVARel); - buf += 8; - } -} - -size_t EhFrameHeader::getSize() const { - // .eh_frame_hdr has a 12 bytes header followed by an array of FDEs. - return 12 + getPartition(ctx).ehFrame->numFdes * 8; -} - -bool EhFrameHeader::isNeeded() const { - return isLive() && getPartition(ctx).ehFrame->isNeeded(); -} - VersionDefinitionSection::VersionDefinitionSection(Ctx &ctx) : SyntheticSection(ctx, ".gnu.version_d", SHT_GNU_verdef, SHF_ALLOC, sizeof(uint32_t)) {} diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 66c866d7e8cde..e01a5ad8abc60 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -68,7 +68,6 @@ class EhFrameSection final : public SyntheticSection { uint32_t fdeVARel; }; - SmallVector getFdeData() const; ArrayRef getCieRecords() const { return cieRecords; } template void iterateFDEWithLSDA(llvm::function_ref fn); @@ -95,6 +94,17 @@ class EhFrameSection final : public SyntheticSection { llvm::DenseMap, Symbol *>, CieRecord *> cieMap; }; +// .eh_frame_hdr contains a binary search table for .eh_frame FDEs. The section +// is covered by a PT_GNU_EH_FRAME segment, which allows the runtime unwinder to +// locate it via functions like `dl_iterate_phdr`. +class EhFrameHeader final : public SyntheticSection { +public: + EhFrameHeader(Ctx &); + void writeTo(uint8_t *buf) override; + size_t getSize() const override; + bool isNeeded() const override; +}; + class GotSection final : public SyntheticSection { public: GotSection(Ctx &); @@ -967,24 +977,6 @@ class GdbIndexSection final : public SyntheticSection { size_t size; }; -// --eh-frame-hdr option tells linker to construct a header for all the -// .eh_frame sections. This header is placed to a section named .eh_frame_hdr -// and also to a PT_GNU_EH_FRAME segment. -// At runtime the unwinder then can find all the PT_GNU_EH_FRAME segments by -// calling dl_iterate_phdr. -// This section contains a lookup table for quick binary search of FDEs. -// Detailed info about internals can be found in Ian Lance Taylor's blog: -// http://www.airs.com/blog/archives/460 (".eh_frame") -// http://www.airs.com/blog/archives/462 (".eh_frame_hdr") -class EhFrameHeader final : public SyntheticSection { -public: - EhFrameHeader(Ctx &); - void write(); - void writeTo(uint8_t *buf) override; - size_t getSize() const override; - bool isNeeded() const override; -}; - // For more information about .gnu.version and .gnu.version_r see: // https://www.akkadia.org/drepper/symbol-versioning From 75aa01b89553bf4213a3b0e83829b6d0689941b9 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Mon, 1 Dec 2025 09:35:00 +0800 Subject: [PATCH 4/9] Revert "LangRef: Clarify llvm.minnum and llvm.maxnum about sNaN and signed zero (#112852)" (#168838) This reverts commit 363b05944f9212511ee6811d0eb1af841c177226. This is a follow up of #166912. Sorry for not noticing the change at the beginning, but I disagree with both sNaN and signed zero semantics change. I have 3 justifications: - llvm.minnum and llvm.maxnum are common intrinsics, we cannot change the definition just because "some architectures" support the changed semantic. For example, X86 min/max instructions neither distinguish sNaN nor signed zero. We have to add couples of extra instructions to match with the new definition, which makes the intrinsics less efficient. But efficient is not the reason for the objection. I object because such cost is unnecessary; - As the example ``minnum(fadd(sNaN, -0.0), 1.0)`` shows, minnum/maxnum themself cannot guarantee consistent result if multiple FP arithmetic operations involved. It makes the sacrifice of performance totally unnecessary. `Behavior of Floating-Point NaN values` notes all NaNs can be treated as quiet NaNs unless using Constrained Floating-Point Intrinsics. So the cost is only worth for constrained minnum/maxnum ones if we want to define them; - Signed zero handling is unnecessary either, because even the C functions don't require it. If any other front ends require, they can use the existing fminnum_ieee/fmaxnum_ieee or define new intrinsics; Fixes: https://github.com/llvm/llvm-project/issues/138303 and https://github.com/llvm/llvm-project/issues/169122 --- llvm/docs/LangRef.rst | 110 ++++++++++++------------- llvm/include/llvm/CodeGen/ISDOpcodes.h | 20 ++--- 2 files changed, 59 insertions(+), 71 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 02865f8a29c67..a57351f9598e2 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -17298,8 +17298,9 @@ LLVM Implementation: """""""""""""""""""" LLVM implements all ISO C flavors as listed in this table, except in the -default floating-point environment exceptions are ignored. The constrained -versions of the intrinsics respect the exception behavior. +default floating-point environment exceptions are ignored and return value +is non-deterministic if one or both inputs are sNaN. The constrained +versions of the intrinsics respect the exception behavior and sNaN. .. list-table:: :header-rows: 1 @@ -17331,7 +17332,7 @@ versions of the intrinsics respect the exception behavior. - qNaN, invalid exception * - ``+0.0 vs -0.0`` - - +0.0(max)/-0.0(min) + - either one - +0.0(max)/-0.0(min) - +0.0(max)/-0.0(min) @@ -17375,30 +17376,22 @@ type. Semantics: """""""""" -Follows the semantics of minNum in IEEE-754-2008, except that -0.0 < +0.0 for the purposes -of this intrinsic. As for signaling NaNs, per the minNum semantics, if either operand is sNaN, -the result is qNaN. This matches the recommended behavior for the libm -function ``fmin``, although not all implementations have implemented these recommended behaviors. - -If either operand is a qNaN, returns the other non-NaN operand. Returns NaN only if both operands are -NaN or if either operand is sNaN. Note that arithmetic on an sNaN doesn't consistently produce a qNaN, -so arithmetic feeding into a minnum can produce inconsistent results. For example, -``minnum(fadd(sNaN, -0.0), 1.0)`` can produce qNaN or 1.0 depending on whether ``fadd`` is folded. -IEEE-754-2008 defines minNum, and it was removed in IEEE-754-2019. As the replacement, IEEE-754-2019 -defines :ref:`minimumNumber `. +Follows the IEEE-754-2008 semantics for minNum, except for handling of +signaling NaNs. This matches the behavior of libm's fmin. -If the intrinsic is marked with the nsz attribute, then the effect is as in the definition in C -and IEEE-754-2008: the result of ``minnum(-0.0, +0.0)`` may be either -0.0 or +0.0. +If either operand is a NaN, returns the other non-NaN operand. Returns +NaN only if both operands are NaN. If the operands compare equal, +returns either one of the operands. For example, this means that +fmin(+0.0, -0.0) non-deterministically returns either operand (-0.0 +or 0.0). -Some architectures, such as ARMv8 (FMINNM), LoongArch (fmin), MIPSr6 (min.fmt), PowerPC/VSX (xsmindp), -have instructions that match these semantics exactly; thus it is quite simple for these architectures. -Some architectures have similar ones while they are not exact equivalent. Such as x86 implements ``MINPS``, -which implements the semantics of C code ``a`. +If either operand is a NaN, returns the other non-NaN operand. Returns +NaN only if both operands are NaN. If the operands compare equal, +returns either one of the operands. For example, this means that +fmax(+0.0, -0.0) non-deterministically returns either operand (-0.0 +or 0.0). -If the intrinsic is marked with the nsz attribute, then the effect is as in the definition in C -and IEEE-754-2008: the result of maxnum(-0.0, +0.0) may be either -0.0 or +0.0. - -Some architectures, such as ARMv8 (FMAXNM), LoongArch (fmax), MIPSr6 (max.fmt), PowerPC/VSX (xsmaxdp), -have instructions that match these semantics exactly; thus it is quite simple for these architectures. -Some architectures have similar ones while they are not exact equivalent. Such as x86 implements ``MAXPS``, -which implements the semantics of C code ``a>b?a:b``: NUM vs qNaN always return qNaN. ``MAXPS`` can be used -if ``nsz`` and ``nnan`` are given. - -For existing libc implementations, the behaviors of fmin may be quite different on sNaN and signed zero behaviors, -even in the same release of a single libm implementation. +Unlike the IEEE-754-2008 behavior, this does not distinguish between +signaling and quiet NaN inputs. If a target's implementation follows +the standard and returns a quiet NaN if either input is a signaling +NaN, the intrinsic lowering is responsible for quieting the inputs to +correctly return the non-NaN input (e.g. by using the equivalent of +``llvm.canonicalize``). .. _i_minimum: @@ -20342,8 +20326,12 @@ The '``llvm.vector.reduce.fmax.*``' intrinsics do a floating-point matches the element-type of the vector input. This instruction has the same comparison semantics as the '``llvm.maxnum.*``' -intrinsic. If the intrinsic call has the ``nnan`` fast-math flag, then the -operation can assume that NaNs are not present in the input vector. +intrinsic. That is, the result will always be a number unless all elements of +the vector are NaN. For a vector with maximum element magnitude 0.0 and +containing both +0.0 and -0.0 elements, the sign of the result is unspecified. + +If the intrinsic call has the ``nnan`` fast-math flag, then the operation can +assume that NaNs are not present in the input vector. Arguments: """""""""" @@ -20371,8 +20359,12 @@ The '``llvm.vector.reduce.fmin.*``' intrinsics do a floating-point matches the element-type of the vector input. This instruction has the same comparison semantics as the '``llvm.minnum.*``' -intrinsic. If the intrinsic call has the ``nnan`` fast-math flag, then the -operation can assume that NaNs are not present in the input vector. +intrinsic. That is, the result will always be a number unless all elements of +the vector are NaN. For a vector with minimum element magnitude 0.0 and +containing both +0.0 and -0.0 elements, the sign of the result is unspecified. + +If the intrinsic call has the ``nnan`` fast-math flag, then the operation can +assume that NaNs are not present in the input vector. Arguments: """""""""" @@ -22759,7 +22751,7 @@ This is an overloaded intrinsic. Overview: """"""""" -Predicated floating-point IEEE-754-2008 minNum of two vectors of floating-point values. +Predicated floating-point IEEE-754 minNum of two vectors of floating-point values. Arguments: @@ -22808,7 +22800,7 @@ This is an overloaded intrinsic. Overview: """"""""" -Predicated floating-point IEEE-754-2008 maxNum of two vectors of floating-point values. +Predicated floating-point IEEE-754 maxNum of two vectors of floating-point values. Arguments: @@ -24107,7 +24099,10 @@ result type. If only ``nnan`` is set then the neutral value is ``-Infinity``. This instruction has the same comparison semantics as the :ref:`llvm.vector.reduce.fmax ` intrinsic (and thus the -'``llvm.maxnum.*``' intrinsic). +'``llvm.maxnum.*``' intrinsic). That is, the result will always be a number +unless all elements of the vector and the starting value are ``NaN``. For a +vector with maximum element magnitude ``0.0`` and containing both ``+0.0`` and +``-0.0`` elements, the sign of the result is unspecified. To ignore the start value, the neutral value can be used. @@ -24174,7 +24169,10 @@ result type. If only ``nnan`` is set then the neutral value is ``+Infinity``. This instruction has the same comparison semantics as the :ref:`llvm.vector.reduce.fmin ` intrinsic (and thus the -'``llvm.minnum.*``' intrinsic). +'``llvm.minnum.*``' intrinsic). That is, the result will always be a number +unless all elements of the vector and the starting value are ``NaN``. For a +vector with maximum element magnitude ``0.0`` and containing both ``+0.0`` and +``-0.0`` elements, the sign of the result is unspecified. To ignore the start value, the neutral value can be used. @@ -29046,7 +29044,7 @@ The third argument specifies the exception behavior as described above. Semantics: """""""""" -This function follows the IEEE-754-2008 semantics for maxNum. +This function follows the IEEE-754 semantics for maxNum. '``llvm.experimental.constrained.minnum``' Intrinsic @@ -29078,7 +29076,7 @@ The third argument specifies the exception behavior as described above. Semantics: """""""""" -This function follows the IEEE-754-2008 semantics for minNum. +This function follows the IEEE-754 semantics for minNum. '``llvm.experimental.constrained.maximum``' Intrinsic diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index b32f3dacbb3a4..a9fdf803a5511 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1048,20 +1048,13 @@ enum NodeType { LRINT, LLRINT, - /// FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, - /// following IEEE-754 definitions except for signed zero behavior. + /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two + /// values. /// - /// If one input is a signaling NaN, returns a quiet NaN. This matches - /// IEEE-754 2008's minNum/maxNum behavior for signaling NaNs (which differs - /// from 2019). + /// In the case where a single input is a NaN (either signaling or quiet), + /// the non-NaN input is returned. /// - /// These treat -0 as ordered less than +0, matching the behavior of IEEE-754 - /// 2019's minimumNumber/maximumNumber. - /// - /// Note that that arithmetic on an sNaN doesn't consistently produce a qNaN, - /// so arithmetic feeding into a minnum/maxnum can produce inconsistent - /// results. FMAXIMUN/FMINIMUM or FMAXIMUMNUM/FMINIMUMNUM may be better choice - /// for non-distinction of sNaN/qNaN handling. + /// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0. FMINNUM, FMAXNUM, @@ -1075,9 +1068,6 @@ enum NodeType { /// /// These treat -0 as ordered less than +0, matching the behavior of IEEE-754 /// 2019's minimumNumber/maximumNumber. - /// - /// Deprecated, and will be removed soon, as FMINNUM/FMAXNUM have the same - /// semantics now. FMINNUM_IEEE, FMAXNUM_IEEE, From e110abc3c65bb33f738738a9fa6e0f5b602ed97f Mon Sep 17 00:00:00 2001 From: lonely eagle <2020382038@qq.com> Date: Mon, 1 Dec 2025 10:00:54 +0800 Subject: [PATCH 5/9] [mlir][affine] Use iter argument replace init when delete loop in the coalesceLoops function (#169514) Fix https://github.com/llvm/llvm-project/issues/169483 by using iter argument replace init when delete loop in the coalesceLoops function. --- mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp | 6 ++++ mlir/test/Dialect/Affine/loop-coalescing.mlir | 28 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp index 4743941deff3f..8f1249e3afaf0 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -1711,6 +1711,12 @@ LogicalResult mlir::affine::coalesceLoops(MutableArrayRef loops) { outermost.getBody()->getOperations().splice( Block::iterator(secondOutermostLoop.getOperation()), innermost.getBody()->getOperations()); + for (auto [iter, init] : + llvm::zip_equal(secondOutermostLoop.getRegionIterArgs(), + secondOutermostLoop.getInits())) { + iter.replaceAllUsesWith(init); + iter.dropAllUses(); + } secondOutermostLoop.erase(); return success(); } diff --git a/mlir/test/Dialect/Affine/loop-coalescing.mlir b/mlir/test/Dialect/Affine/loop-coalescing.mlir index 3be14eaf5c326..6a825320ff20f 100644 --- a/mlir/test/Dialect/Affine/loop-coalescing.mlir +++ b/mlir/test/Dialect/Affine/loop-coalescing.mlir @@ -416,3 +416,31 @@ func.func @test_loops_do_not_get_coalesced() { // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: return + +// ----- + +// CHECK-LABEL: func @inner_loop_has_iter_args +// CHECK-SAME: %[[ALLOC:.*]]: memref) +func.func @inner_loop_has_iter_args(%alloc : memref) { + %c17 = arith.constant 17 : index + affine.for %arg0 = 0 to 79 { + %0 = affine.for %arg1 = 0 to 64 iter_args(%arg2 = %alloc) -> (memref) { + %1 = arith.remui %arg1, %c17 : index + %2 = arith.index_cast %arg1 : index to i64 + memref.store %2, %arg2[%1] : memref + affine.yield %arg2 : memref + } + } + return +} + +// CHECK: %[[CONSTANT_0:.*]] = arith.constant 17 : index +// CHECK: %[[APPLY_0:.*]] = affine.apply affine_map<() -> (79)>() +// CHECK: %[[APPLY_1:.*]] = affine.apply affine_map<() -> (64)>() +// CHECK: %[[APPLY_2:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[APPLY_0]]){{\[}}%[[APPLY_1]]] +// CHECK: affine.for %[[IV:.*]] = 0 to %[[APPLY_2]] { +// CHECK: %[[APPLY_3:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]]){{\[}}%[[APPLY_1]]] +// CHECK: %[[REMUI_0:.*]] = arith.remui %[[APPLY_3]], %[[CONSTANT_0]] : index +// CHECK: %[[INDEX_CAST_0:.*]] = arith.index_cast %[[APPLY_3]] : index to i64 +// CHECK: memref.store %[[INDEX_CAST_0]], %[[ALLOC]]{{\[}}%[[REMUI_0]]] : memref +// CHECK: } From 2e21bb815d527ebbe4d53f0396d1e40aae9e2146 Mon Sep 17 00:00:00 2001 From: fennecJ Date: Mon, 1 Dec 2025 10:19:56 +0800 Subject: [PATCH 6/9] [RISCV][ISelLowering] Use Zicond for FP selects on Zfinx/Zdinx (#169299) ### Summary This patch let RISCVTargetLowering::lowerSELECT to lower some floating-point select operations through an integer zicond select when: * Zicond is available, and * FP values live in GPRs (Zfinx/Zdinx), and * Select condition is an integer type. In that scenario there is no extra cost for GPR <-> "FP GPR" moves, so we can implement FP selects with a CZERO-based sequence instead of a branch. For example, for ```c float foo(int cond, float x) { return (cond != 0) ? x : 0.0f; } ``` the current lowering produces: ```asm foo: mv a2, a0 li a0, 0 beqz a2, .LBB0_2 .LBB0_1: mv a0, a1 .LBB0_2: ret ``` With this patch, when targeting rv64ima_zicond_zfinx we instead get: ```asm foo: czero.nez a2, zero, a0 czero.eqz a0, a1, a0 or a0, a2, a0 ret ``` The existing branch-based lowering is preserved for: * targets without Zicond * targets where FP registers are separate (+f, +d without zfinx/zdinx) ### Testing Adds llvm/test/CodeGen/RISCV/zicond-fp-select-zfinx.ll to cover: * RV64 Zfinx/Zicond vs Zfinx without Zicond * RV64 Zdinx/Zicond vs Zdinx without Zicond * RV32 Zfinx/Zicond vs Zfinx without Zicond Also adds baseline RV32F/RV64F/RV64D cases to ensure we still use branches when FP registers are separate. The tests check that: * With Zicond + Zfinx/Zdinx, FP select lowers to a CZERO+OR sequence with no conditional branches. * Without Zicond (or without Zfinx/Zdinx), we still get branch-based code and no czero.* instructions. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 44 + .../CodeGen/RISCV/zicond-fp-select-zfinx.ll | 798 ++++++++++++++++++ 2 files changed, 842 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/zicond-fp-select-zfinx.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4550e40166525..d2e4bb4199a7a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -9584,6 +9584,50 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget)) return V; + // When there is no cost for GPR <-> FPR, we can use zicond select for + // floating value when CondV is int type + bool FPinGPR = Subtarget.hasStdExtZfinx(); + + // We can handle FGPR without spliting into hi/lo parts + bool FitsInGPR = TypeSize::isKnownLE(VT.getSizeInBits(), + Subtarget.getXLenVT().getSizeInBits()); + + bool UseZicondForFPSel = Subtarget.hasStdExtZicond() && FPinGPR && + VT.isFloatingPoint() && FitsInGPR; + + if (UseZicondForFPSel) { + + auto CastToInt = [&](SDValue V) -> SDValue { + // Treat +0.0 as int 0 to enable single 'czero' instruction generation. + if (isNullFPConstant(V)) + return DAG.getConstant(0, DL, XLenVT); + + if (VT == MVT::f16) + return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, V); + + if (VT == MVT::f32 && Subtarget.is64Bit()) + return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, V); + + return DAG.getBitcast(XLenVT, V); + }; + + SDValue TrueVInt = CastToInt(TrueV); + SDValue FalseVInt = CastToInt(FalseV); + + // Emit integer SELECT (lowers to Zicond) + SDValue ResultInt = + DAG.getNode(ISD::SELECT, DL, XLenVT, CondV, TrueVInt, FalseVInt); + + // Convert back to floating VT + if (VT == MVT::f32 && Subtarget.is64Bit()) + return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, VT, ResultInt); + + if (VT == MVT::f16) + return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, ResultInt); + + return DAG.getBitcast(VT, ResultInt); + } + // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ // nodes to implement the SELECT. Performing the lowering here allows for // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless diff --git a/llvm/test/CodeGen/RISCV/zicond-fp-select-zfinx.ll b/llvm/test/CodeGen/RISCV/zicond-fp-select-zfinx.ll new file mode 100644 index 0000000000000..b505c84166eb1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/zicond-fp-select-zfinx.ll @@ -0,0 +1,798 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; Zicond with zfinx(implies by zdinx) +; RUN: llc -mtriple=riscv64 -mattr=+zdinx,+zicond -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64ZDINX_ZICOND +; RUN: llc -mtriple=riscv64 -mattr=+zdinx -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64ZDINX_NOZICOND + +; Zicond with zfinx(implies by zhinx) +; RUN: llc -mtriple=riscv64 -mattr=+zhinx,+zicond -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64ZHINX_ZICOND + +; Baseline with classic FP registers (no *inx); zicond select should NOT trigger +; RUN: llc -mtriple=riscv64 -mattr=+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64FD + +; Check same optimize work on 32bit machine +; RUN: llc -mtriple=riscv32 -mattr=+zfinx,+zicond -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32ZFINX_ZICOND +; RUN: llc -mtriple=riscv32 -mattr=+zfinx -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32ZFINX_NOZICOND +; RUN: llc -mtriple=riscv32 -mattr=+zdinx,+zicond -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32ZDINX_ZICOND +; RUN: llc -mtriple=riscv32 -mattr=+zdinx -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32ZDINX_NOZICOND + +; This test checks that floating-point SELECT is lowered through integer +; SELECT (and thus to Zicond czero.* sequence) when FP values live in GPRs +; (Zfinx/Zdinx) and Zicond is enabled. When Zicond is disabled, we expect +; a branch-based lowering instead. + +; ----------------------------------------------------------------------------- +; float select with i1 condition (Zfinx) +; ----------------------------------------------------------------------------- + +define float @select_f32_i1(i1 %cond, float %t, float %f) nounwind { +; RV64ZDINX_ZICOND-LABEL: select_f32_i1: +; RV64ZDINX_ZICOND: # %bb.0: # %entry +; RV64ZDINX_ZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZDINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV64ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZDINX_ZICOND-NEXT: or a0, a0, a2 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_ZICOND-NEXT: ret +; +; RV64ZDINX_NOZICOND-LABEL: select_f32_i1: +; RV64ZDINX_NOZICOND: # %bb.0: # %entry +; RV64ZDINX_NOZICOND-NEXT: andi a3, a0, 1 +; RV64ZDINX_NOZICOND-NEXT: mv a0, a1 +; RV64ZDINX_NOZICOND-NEXT: bnez a3, .LBB0_2 +; RV64ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV64ZDINX_NOZICOND-NEXT: mv a0, a2 +; RV64ZDINX_NOZICOND-NEXT: .LBB0_2: # %entry +; RV64ZDINX_NOZICOND-NEXT: ret +; +; RV64ZHINX_ZICOND-LABEL: select_f32_i1: +; RV64ZHINX_ZICOND: # %bb.0: # %entry +; RV64ZHINX_ZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV64ZHINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZHINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZHINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV64ZHINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZHINX_ZICOND-NEXT: or a0, a0, a2 +; RV64ZHINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZHINX_ZICOND-NEXT: ret +; +; RV64FD-LABEL: select_f32_i1: +; RV64FD: # %bb.0: # %entry +; RV64FD-NEXT: andi a0, a0, 1 +; RV64FD-NEXT: bnez a0, .LBB0_2 +; RV64FD-NEXT: # %bb.1: # %entry +; RV64FD-NEXT: fmv.s fa0, fa1 +; RV64FD-NEXT: .LBB0_2: # %entry +; RV64FD-NEXT: ret +; +; RV32ZFINX_ZICOND-LABEL: select_f32_i1: +; RV32ZFINX_ZICOND: # %bb.0: # %entry +; RV32ZFINX_ZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZFINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZFINX_ZICOND-NEXT: or a0, a0, a2 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_ZICOND-NEXT: ret +; +; RV32ZFINX_NOZICOND-LABEL: select_f32_i1: +; RV32ZFINX_NOZICOND: # %bb.0: # %entry +; RV32ZFINX_NOZICOND-NEXT: andi a3, a0, 1 +; RV32ZFINX_NOZICOND-NEXT: mv a0, a1 +; RV32ZFINX_NOZICOND-NEXT: bnez a3, .LBB0_2 +; RV32ZFINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZFINX_NOZICOND-NEXT: mv a0, a2 +; RV32ZFINX_NOZICOND-NEXT: .LBB0_2: # %entry +; RV32ZFINX_NOZICOND-NEXT: ret +; +; RV32ZDINX_ZICOND-LABEL: select_f32_i1: +; RV32ZDINX_ZICOND: # %bb.0: # %entry +; RV32ZDINX_ZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV32ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZDINX_ZICOND-NEXT: or a0, a0, a2 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_ZICOND-NEXT: ret +; +; RV32ZDINX_NOZICOND-LABEL: select_f32_i1: +; RV32ZDINX_NOZICOND: # %bb.0: # %entry +; RV32ZDINX_NOZICOND-NEXT: andi a3, a0, 1 +; RV32ZDINX_NOZICOND-NEXT: mv a0, a1 +; RV32ZDINX_NOZICOND-NEXT: bnez a3, .LBB0_2 +; RV32ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZDINX_NOZICOND-NEXT: mv a0, a2 +; RV32ZDINX_NOZICOND-NEXT: .LBB0_2: # %entry +; RV32ZDINX_NOZICOND-NEXT: ret +entry: + %sel = select i1 %cond, float %t, float %f + ret float %sel +} + +; ----------------------------------------------------------------------------- +; double select with i1 condition (Zdinx) +; ----------------------------------------------------------------------------- + +define double @select_f64_i1(i1 %cond, double %t, double %f) nounwind { +; RV64ZDINX_ZICOND-LABEL: select_f64_i1: +; RV64ZDINX_ZICOND: # %bb.0: # %entry +; RV64ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZDINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV64ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZDINX_ZICOND-NEXT: or a0, a0, a2 +; RV64ZDINX_ZICOND-NEXT: ret +; +; RV64ZDINX_NOZICOND-LABEL: select_f64_i1: +; RV64ZDINX_NOZICOND: # %bb.0: # %entry +; RV64ZDINX_NOZICOND-NEXT: andi a3, a0, 1 +; RV64ZDINX_NOZICOND-NEXT: mv a0, a1 +; RV64ZDINX_NOZICOND-NEXT: bnez a3, .LBB1_2 +; RV64ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV64ZDINX_NOZICOND-NEXT: mv a0, a2 +; RV64ZDINX_NOZICOND-NEXT: .LBB1_2: # %entry +; RV64ZDINX_NOZICOND-NEXT: ret +; +; RV64ZHINX_ZICOND-LABEL: select_f64_i1: +; RV64ZHINX_ZICOND: # %bb.0: # %entry +; RV64ZHINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZHINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV64ZHINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZHINX_ZICOND-NEXT: or a0, a0, a2 +; RV64ZHINX_ZICOND-NEXT: ret +; +; RV64FD-LABEL: select_f64_i1: +; RV64FD: # %bb.0: # %entry +; RV64FD-NEXT: andi a0, a0, 1 +; RV64FD-NEXT: bnez a0, .LBB1_2 +; RV64FD-NEXT: # %bb.1: # %entry +; RV64FD-NEXT: fmv.d fa0, fa1 +; RV64FD-NEXT: .LBB1_2: # %entry +; RV64FD-NEXT: ret +; +; RV32ZFINX_ZICOND-LABEL: select_f64_i1: +; RV32ZFINX_ZICOND: # %bb.0: # %entry +; RV32ZFINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZFINX_ZICOND-NEXT: czero.nez a3, a3, a0 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a1, a1, a0 +; RV32ZFINX_ZICOND-NEXT: czero.nez a4, a4, a0 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a2, a2, a0 +; RV32ZFINX_ZICOND-NEXT: or a0, a1, a3 +; RV32ZFINX_ZICOND-NEXT: or a1, a2, a4 +; RV32ZFINX_ZICOND-NEXT: ret +; +; RV32ZFINX_NOZICOND-LABEL: select_f64_i1: +; RV32ZFINX_NOZICOND: # %bb.0: # %entry +; RV32ZFINX_NOZICOND-NEXT: andi a5, a0, 1 +; RV32ZFINX_NOZICOND-NEXT: mv a0, a1 +; RV32ZFINX_NOZICOND-NEXT: bnez a5, .LBB1_2 +; RV32ZFINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZFINX_NOZICOND-NEXT: mv a0, a3 +; RV32ZFINX_NOZICOND-NEXT: mv a2, a4 +; RV32ZFINX_NOZICOND-NEXT: .LBB1_2: # %entry +; RV32ZFINX_NOZICOND-NEXT: mv a1, a2 +; RV32ZFINX_NOZICOND-NEXT: ret +; +; RV32ZDINX_ZICOND-LABEL: select_f64_i1: +; RV32ZDINX_ZICOND: # %bb.0: # %entry +; RV32ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_ZICOND-NEXT: bnez a0, .LBB1_2 +; RV32ZDINX_ZICOND-NEXT: # %bb.1: # %entry +; RV32ZDINX_ZICOND-NEXT: mv a7, a4 +; RV32ZDINX_ZICOND-NEXT: mv a6, a3 +; RV32ZDINX_ZICOND-NEXT: fmv.d a4, a6 +; RV32ZDINX_ZICOND-NEXT: j .LBB1_3 +; RV32ZDINX_ZICOND-NEXT: .LBB1_2: +; RV32ZDINX_ZICOND-NEXT: mv a5, a2 +; RV32ZDINX_ZICOND-NEXT: mv a4, a1 +; RV32ZDINX_ZICOND-NEXT: .LBB1_3: # %entry +; RV32ZDINX_ZICOND-NEXT: mv a0, a4 +; RV32ZDINX_ZICOND-NEXT: mv a1, a5 +; RV32ZDINX_ZICOND-NEXT: ret +; +; RV32ZDINX_NOZICOND-LABEL: select_f64_i1: +; RV32ZDINX_NOZICOND: # %bb.0: # %entry +; RV32ZDINX_NOZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_NOZICOND-NEXT: bnez a0, .LBB1_2 +; RV32ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZDINX_NOZICOND-NEXT: mv a7, a4 +; RV32ZDINX_NOZICOND-NEXT: mv a6, a3 +; RV32ZDINX_NOZICOND-NEXT: fmv.d a4, a6 +; RV32ZDINX_NOZICOND-NEXT: j .LBB1_3 +; RV32ZDINX_NOZICOND-NEXT: .LBB1_2: +; RV32ZDINX_NOZICOND-NEXT: mv a5, a2 +; RV32ZDINX_NOZICOND-NEXT: mv a4, a1 +; RV32ZDINX_NOZICOND-NEXT: .LBB1_3: # %entry +; RV32ZDINX_NOZICOND-NEXT: mv a0, a4 +; RV32ZDINX_NOZICOND-NEXT: mv a1, a5 +; RV32ZDINX_NOZICOND-NEXT: ret +entry: + %sel = select i1 %cond, double %t, double %f + ret double %sel +} + +; ----------------------------------------------------------------------------- +; double select with floating-point compare condition (a > b ? c : d), Zdinx +; ----------------------------------------------------------------------------- + +define double @select_f64_fcmp(double %a, double %b, double %c, double %d) nounwind { +; RV64ZDINX_ZICOND-LABEL: select_f64_fcmp: +; RV64ZDINX_ZICOND: # %bb.0: # %entry +; RV64ZDINX_ZICOND-NEXT: flt.d a0, a1, a0 +; RV64ZDINX_ZICOND-NEXT: czero.nez a1, a3, a0 +; RV64ZDINX_ZICOND-NEXT: czero.eqz a0, a2, a0 +; RV64ZDINX_ZICOND-NEXT: or a0, a0, a1 +; RV64ZDINX_ZICOND-NEXT: ret +; +; RV64ZDINX_NOZICOND-LABEL: select_f64_fcmp: +; RV64ZDINX_NOZICOND: # %bb.0: # %entry +; RV64ZDINX_NOZICOND-NEXT: flt.d a1, a1, a0 +; RV64ZDINX_NOZICOND-NEXT: mv a0, a2 +; RV64ZDINX_NOZICOND-NEXT: bnez a1, .LBB2_2 +; RV64ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV64ZDINX_NOZICOND-NEXT: mv a0, a3 +; RV64ZDINX_NOZICOND-NEXT: .LBB2_2: # %entry +; RV64ZDINX_NOZICOND-NEXT: ret +; +; RV64ZHINX_ZICOND-LABEL: select_f64_fcmp: +; RV64ZHINX_ZICOND: # %bb.0: # %entry +; RV64ZHINX_ZICOND-NEXT: addi sp, sp, -32 +; RV64ZHINX_ZICOND-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64ZHINX_ZICOND-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64ZHINX_ZICOND-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64ZHINX_ZICOND-NEXT: mv s0, a3 +; RV64ZHINX_ZICOND-NEXT: mv s1, a2 +; RV64ZHINX_ZICOND-NEXT: call __gtdf2 +; RV64ZHINX_ZICOND-NEXT: sgtz a0, a0 +; RV64ZHINX_ZICOND-NEXT: czero.nez a1, s0, a0 +; RV64ZHINX_ZICOND-NEXT: czero.eqz a0, s1, a0 +; RV64ZHINX_ZICOND-NEXT: or a0, a0, a1 +; RV64ZHINX_ZICOND-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64ZHINX_ZICOND-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64ZHINX_ZICOND-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64ZHINX_ZICOND-NEXT: addi sp, sp, 32 +; RV64ZHINX_ZICOND-NEXT: ret +; +; RV64FD-LABEL: select_f64_fcmp: +; RV64FD: # %bb.0: # %entry +; RV64FD-NEXT: flt.d a0, fa1, fa0 +; RV64FD-NEXT: fmv.d fa0, fa2 +; RV64FD-NEXT: bnez a0, .LBB2_2 +; RV64FD-NEXT: # %bb.1: # %entry +; RV64FD-NEXT: fmv.d fa0, fa3 +; RV64FD-NEXT: .LBB2_2: # %entry +; RV64FD-NEXT: ret +; +; RV32ZFINX_ZICOND-LABEL: select_f64_fcmp: +; RV32ZFINX_ZICOND: # %bb.0: # %entry +; RV32ZFINX_ZICOND-NEXT: addi sp, sp, -32 +; RV32ZFINX_ZICOND-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32ZFINX_ZICOND-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32ZFINX_ZICOND-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32ZFINX_ZICOND-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32ZFINX_ZICOND-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32ZFINX_ZICOND-NEXT: mv s0, a7 +; RV32ZFINX_ZICOND-NEXT: mv s1, a6 +; RV32ZFINX_ZICOND-NEXT: mv s2, a5 +; RV32ZFINX_ZICOND-NEXT: mv s3, a4 +; RV32ZFINX_ZICOND-NEXT: call __gtdf2 +; RV32ZFINX_ZICOND-NEXT: sgtz a0, a0 +; RV32ZFINX_ZICOND-NEXT: czero.nez a1, s1, a0 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a2, s3, a0 +; RV32ZFINX_ZICOND-NEXT: czero.nez a3, s0, a0 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a4, s2, a0 +; RV32ZFINX_ZICOND-NEXT: or a0, a2, a1 +; RV32ZFINX_ZICOND-NEXT: or a1, a4, a3 +; RV32ZFINX_ZICOND-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32ZFINX_ZICOND-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32ZFINX_ZICOND-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32ZFINX_ZICOND-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32ZFINX_ZICOND-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32ZFINX_ZICOND-NEXT: addi sp, sp, 32 +; RV32ZFINX_ZICOND-NEXT: ret +; +; RV32ZFINX_NOZICOND-LABEL: select_f64_fcmp: +; RV32ZFINX_NOZICOND: # %bb.0: # %entry +; RV32ZFINX_NOZICOND-NEXT: addi sp, sp, -32 +; RV32ZFINX_NOZICOND-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32ZFINX_NOZICOND-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32ZFINX_NOZICOND-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32ZFINX_NOZICOND-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32ZFINX_NOZICOND-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32ZFINX_NOZICOND-NEXT: mv s1, a7 +; RV32ZFINX_NOZICOND-NEXT: mv s3, a6 +; RV32ZFINX_NOZICOND-NEXT: mv s0, a5 +; RV32ZFINX_NOZICOND-NEXT: mv s2, a4 +; RV32ZFINX_NOZICOND-NEXT: call __gtdf2 +; RV32ZFINX_NOZICOND-NEXT: bgtz a0, .LBB2_2 +; RV32ZFINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZFINX_NOZICOND-NEXT: mv s2, s3 +; RV32ZFINX_NOZICOND-NEXT: mv s0, s1 +; RV32ZFINX_NOZICOND-NEXT: .LBB2_2: # %entry +; RV32ZFINX_NOZICOND-NEXT: mv a0, s2 +; RV32ZFINX_NOZICOND-NEXT: mv a1, s0 +; RV32ZFINX_NOZICOND-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32ZFINX_NOZICOND-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32ZFINX_NOZICOND-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32ZFINX_NOZICOND-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32ZFINX_NOZICOND-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32ZFINX_NOZICOND-NEXT: addi sp, sp, 32 +; RV32ZFINX_NOZICOND-NEXT: ret +; +; RV32ZDINX_ZICOND-LABEL: select_f64_fcmp: +; RV32ZDINX_ZICOND: # %bb.0: # %entry +; RV32ZDINX_ZICOND-NEXT: flt.d a0, a2, a0 +; RV32ZDINX_ZICOND-NEXT: bnez a0, .LBB2_2 +; RV32ZDINX_ZICOND-NEXT: # %bb.1: # %entry +; RV32ZDINX_ZICOND-NEXT: fmv.d a4, a6 +; RV32ZDINX_ZICOND-NEXT: .LBB2_2: # %entry +; RV32ZDINX_ZICOND-NEXT: mv a0, a4 +; RV32ZDINX_ZICOND-NEXT: mv a1, a5 +; RV32ZDINX_ZICOND-NEXT: ret +; +; RV32ZDINX_NOZICOND-LABEL: select_f64_fcmp: +; RV32ZDINX_NOZICOND: # %bb.0: # %entry +; RV32ZDINX_NOZICOND-NEXT: flt.d a0, a2, a0 +; RV32ZDINX_NOZICOND-NEXT: bnez a0, .LBB2_2 +; RV32ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZDINX_NOZICOND-NEXT: fmv.d a4, a6 +; RV32ZDINX_NOZICOND-NEXT: .LBB2_2: # %entry +; RV32ZDINX_NOZICOND-NEXT: mv a0, a4 +; RV32ZDINX_NOZICOND-NEXT: mv a1, a5 +; RV32ZDINX_NOZICOND-NEXT: ret +entry: + %cmp = fcmp ogt double %a, %b + %sel = select i1 %cmp, double %c, double %d + ret double %sel +} + +; ----------------------------------------------------------------------------- +; half select with i1 condition (cond ? a : b), Zfinx +; ----------------------------------------------------------------------------- + +define dso_local noundef half @select_half_i1(i1 %cond, half %a, half %b) nounwind { +; RV64ZDINX_ZICOND-LABEL: select_half_i1: +; RV64ZDINX_ZICOND: # %bb.0: # %entry +; RV64ZDINX_ZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZDINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV64ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZDINX_ZICOND-NEXT: or a0, a0, a2 +; RV64ZDINX_ZICOND-NEXT: lui a1, 1048560 +; RV64ZDINX_ZICOND-NEXT: or a0, a0, a1 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_ZICOND-NEXT: ret +; +; RV64ZDINX_NOZICOND-LABEL: select_half_i1: +; RV64ZDINX_NOZICOND: # %bb.0: # %entry +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV64ZDINX_NOZICOND-NEXT: andi a0, a0, 1 +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_NOZICOND-NEXT: bnez a0, .LBB3_2 +; RV64ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV64ZDINX_NOZICOND-NEXT: mv a1, a2 +; RV64ZDINX_NOZICOND-NEXT: .LBB3_2: # %entry +; RV64ZDINX_NOZICOND-NEXT: lui a0, 1048560 +; RV64ZDINX_NOZICOND-NEXT: or a0, a1, a0 +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_NOZICOND-NEXT: ret +; +; RV64ZHINX_ZICOND-LABEL: select_half_i1: +; RV64ZHINX_ZICOND: # %bb.0: # %entry +; RV64ZHINX_ZICOND-NEXT: # kill: def $x12_h killed $x12_h def $x12 +; RV64ZHINX_ZICOND-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV64ZHINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZHINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV64ZHINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZHINX_ZICOND-NEXT: or a0, a0, a2 +; RV64ZHINX_ZICOND-NEXT: # kill: def $x10_h killed $x10_h killed $x10 +; RV64ZHINX_ZICOND-NEXT: ret +; +; RV64FD-LABEL: select_half_i1: +; RV64FD: # %bb.0: # %entry +; RV64FD-NEXT: andi a0, a0, 1 +; RV64FD-NEXT: bnez a0, .LBB3_2 +; RV64FD-NEXT: # %bb.1: # %entry +; RV64FD-NEXT: fmv.x.w a0, fa1 +; RV64FD-NEXT: j .LBB3_3 +; RV64FD-NEXT: .LBB3_2: +; RV64FD-NEXT: fmv.x.w a0, fa0 +; RV64FD-NEXT: .LBB3_3: # %entry +; RV64FD-NEXT: lui a1, 1048560 +; RV64FD-NEXT: or a0, a0, a1 +; RV64FD-NEXT: fmv.w.x fa0, a0 +; RV64FD-NEXT: ret +; +; RV32ZFINX_ZICOND-LABEL: select_half_i1: +; RV32ZFINX_ZICOND: # %bb.0: # %entry +; RV32ZFINX_ZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZFINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZFINX_ZICOND-NEXT: or a0, a0, a2 +; RV32ZFINX_ZICOND-NEXT: lui a1, 1048560 +; RV32ZFINX_ZICOND-NEXT: or a0, a0, a1 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_ZICOND-NEXT: ret +; +; RV32ZFINX_NOZICOND-LABEL: select_half_i1: +; RV32ZFINX_NOZICOND: # %bb.0: # %entry +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV32ZFINX_NOZICOND-NEXT: andi a0, a0, 1 +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_NOZICOND-NEXT: bnez a0, .LBB3_2 +; RV32ZFINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZFINX_NOZICOND-NEXT: mv a1, a2 +; RV32ZFINX_NOZICOND-NEXT: .LBB3_2: # %entry +; RV32ZFINX_NOZICOND-NEXT: lui a0, 1048560 +; RV32ZFINX_NOZICOND-NEXT: or a0, a1, a0 +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_NOZICOND-NEXT: ret +; +; RV32ZDINX_ZICOND-LABEL: select_half_i1: +; RV32ZDINX_ZICOND: # %bb.0: # %entry +; RV32ZDINX_ZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_ZICOND-NEXT: czero.nez a2, a2, a0 +; RV32ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZDINX_ZICOND-NEXT: or a0, a0, a2 +; RV32ZDINX_ZICOND-NEXT: lui a1, 1048560 +; RV32ZDINX_ZICOND-NEXT: or a0, a0, a1 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_ZICOND-NEXT: ret +; +; RV32ZDINX_NOZICOND-LABEL: select_half_i1: +; RV32ZDINX_NOZICOND: # %bb.0: # %entry +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x12_w killed $x12_w def $x12 +; RV32ZDINX_NOZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_NOZICOND-NEXT: bnez a0, .LBB3_2 +; RV32ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZDINX_NOZICOND-NEXT: mv a1, a2 +; RV32ZDINX_NOZICOND-NEXT: .LBB3_2: # %entry +; RV32ZDINX_NOZICOND-NEXT: lui a0, 1048560 +; RV32ZDINX_NOZICOND-NEXT: or a0, a1, a0 +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_NOZICOND-NEXT: ret +entry: + %sel = select i1 %cond, half %a, half %b + ret half %sel +} + +; ----------------------------------------------------------------------------- +; Test select with i1 condition and zero ret val (cond ? a : 0), Zfinx +; ----------------------------------------------------------------------------- +define dso_local noundef float @select_i1_f32_0(i1 %cond, float %t) nounwind { +; RV64ZDINX_ZICOND-LABEL: select_i1_f32_0: +; RV64ZDINX_ZICOND: # %bb.0: # %entry +; RV64ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_ZICOND-NEXT: ret +; +; RV64ZDINX_NOZICOND-LABEL: select_i1_f32_0: +; RV64ZDINX_NOZICOND: # %bb.0: # %entry +; RV64ZDINX_NOZICOND-NEXT: andi a2, a0, 1 +; RV64ZDINX_NOZICOND-NEXT: mv a0, a1 +; RV64ZDINX_NOZICOND-NEXT: bnez a2, .LBB4_2 +; RV64ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV64ZDINX_NOZICOND-NEXT: li a0, 0 +; RV64ZDINX_NOZICOND-NEXT: .LBB4_2: # %entry +; RV64ZDINX_NOZICOND-NEXT: ret +; +; RV64ZHINX_ZICOND-LABEL: select_i1_f32_0: +; RV64ZHINX_ZICOND: # %bb.0: # %entry +; RV64ZHINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZHINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZHINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZHINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZHINX_ZICOND-NEXT: ret +; +; RV64FD-LABEL: select_i1_f32_0: +; RV64FD: # %bb.0: # %entry +; RV64FD-NEXT: andi a0, a0, 1 +; RV64FD-NEXT: bnez a0, .LBB4_2 +; RV64FD-NEXT: # %bb.1: # %entry +; RV64FD-NEXT: fmv.w.x fa0, zero +; RV64FD-NEXT: .LBB4_2: # %entry +; RV64FD-NEXT: ret +; +; RV32ZFINX_ZICOND-LABEL: select_i1_f32_0: +; RV32ZFINX_ZICOND: # %bb.0: # %entry +; RV32ZFINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_ZICOND-NEXT: ret +; +; RV32ZFINX_NOZICOND-LABEL: select_i1_f32_0: +; RV32ZFINX_NOZICOND: # %bb.0: # %entry +; RV32ZFINX_NOZICOND-NEXT: andi a2, a0, 1 +; RV32ZFINX_NOZICOND-NEXT: mv a0, a1 +; RV32ZFINX_NOZICOND-NEXT: bnez a2, .LBB4_2 +; RV32ZFINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZFINX_NOZICOND-NEXT: li a0, 0 +; RV32ZFINX_NOZICOND-NEXT: .LBB4_2: # %entry +; RV32ZFINX_NOZICOND-NEXT: ret +; +; RV32ZDINX_ZICOND-LABEL: select_i1_f32_0: +; RV32ZDINX_ZICOND: # %bb.0: # %entry +; RV32ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_ZICOND-NEXT: ret +; +; RV32ZDINX_NOZICOND-LABEL: select_i1_f32_0: +; RV32ZDINX_NOZICOND: # %bb.0: # %entry +; RV32ZDINX_NOZICOND-NEXT: andi a2, a0, 1 +; RV32ZDINX_NOZICOND-NEXT: mv a0, a1 +; RV32ZDINX_NOZICOND-NEXT: bnez a2, .LBB4_2 +; RV32ZDINX_NOZICOND-NEXT: # %bb.1: # %entry +; RV32ZDINX_NOZICOND-NEXT: li a0, 0 +; RV32ZDINX_NOZICOND-NEXT: .LBB4_2: # %entry +; RV32ZDINX_NOZICOND-NEXT: ret +entry: + %sel = select i1 %cond, float %t, float 0.000000e+00 + ret float %sel +} + +; ----------------------------------------------------------------------------- +; Test select with i1 condition and zero ret val for half fp (cond ? a : 0) +; ----------------------------------------------------------------------------- +define dso_local noundef half @select_i1_half_0(i1 %cond, half %val) nounwind { +; RV64ZDINX_ZICOND-LABEL: select_i1_half_0: +; RV64ZDINX_ZICOND: # %bb.0: # %entry +; RV64ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZDINX_ZICOND-NEXT: lui a1, 1048560 +; RV64ZDINX_ZICOND-NEXT: or a0, a0, a1 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_ZICOND-NEXT: ret +; +; RV64ZDINX_NOZICOND-LABEL: select_i1_half_0: +; RV64ZDINX_NOZICOND: # %bb.0: # %entry +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_NOZICOND-NEXT: slli a0, a0, 63 +; RV64ZDINX_NOZICOND-NEXT: srai a0, a0, 63 +; RV64ZDINX_NOZICOND-NEXT: and a0, a0, a1 +; RV64ZDINX_NOZICOND-NEXT: lui a1, 1048560 +; RV64ZDINX_NOZICOND-NEXT: or a0, a0, a1 +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_NOZICOND-NEXT: ret +; +; RV64ZHINX_ZICOND-LABEL: select_i1_half_0: +; RV64ZHINX_ZICOND: # %bb.0: # %entry +; RV64ZHINX_ZICOND-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV64ZHINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZHINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZHINX_ZICOND-NEXT: # kill: def $x10_h killed $x10_h killed $x10 +; RV64ZHINX_ZICOND-NEXT: ret +; +; RV64FD-LABEL: select_i1_half_0: +; RV64FD: # %bb.0: # %entry +; RV64FD-NEXT: fmv.x.w a1, fa0 +; RV64FD-NEXT: slli a0, a0, 63 +; RV64FD-NEXT: srai a0, a0, 63 +; RV64FD-NEXT: and a0, a0, a1 +; RV64FD-NEXT: lui a1, 1048560 +; RV64FD-NEXT: or a0, a0, a1 +; RV64FD-NEXT: fmv.w.x fa0, a0 +; RV64FD-NEXT: ret +; +; RV32ZFINX_ZICOND-LABEL: select_i1_half_0: +; RV32ZFINX_ZICOND: # %bb.0: # %entry +; RV32ZFINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZFINX_ZICOND-NEXT: lui a1, 1048560 +; RV32ZFINX_ZICOND-NEXT: or a0, a0, a1 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_ZICOND-NEXT: ret +; +; RV32ZFINX_NOZICOND-LABEL: select_i1_half_0: +; RV32ZFINX_NOZICOND: # %bb.0: # %entry +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_NOZICOND-NEXT: slli a0, a0, 31 +; RV32ZFINX_NOZICOND-NEXT: srai a0, a0, 31 +; RV32ZFINX_NOZICOND-NEXT: and a0, a0, a1 +; RV32ZFINX_NOZICOND-NEXT: lui a1, 1048560 +; RV32ZFINX_NOZICOND-NEXT: or a0, a0, a1 +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_NOZICOND-NEXT: ret +; +; RV32ZDINX_ZICOND-LABEL: select_i1_half_0: +; RV32ZDINX_ZICOND: # %bb.0: # %entry +; RV32ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZDINX_ZICOND-NEXT: lui a1, 1048560 +; RV32ZDINX_ZICOND-NEXT: or a0, a0, a1 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_ZICOND-NEXT: ret +; +; RV32ZDINX_NOZICOND-LABEL: select_i1_half_0: +; RV32ZDINX_NOZICOND: # %bb.0: # %entry +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_NOZICOND-NEXT: slli a0, a0, 31 +; RV32ZDINX_NOZICOND-NEXT: srai a0, a0, 31 +; RV32ZDINX_NOZICOND-NEXT: and a0, a0, a1 +; RV32ZDINX_NOZICOND-NEXT: lui a1, 1048560 +; RV32ZDINX_NOZICOND-NEXT: or a0, a0, a1 +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_NOZICOND-NEXT: ret +entry: + %sel = select i1 %cond, half %val, half 0xH0000 + ret half %sel +} + +; ----------------------------------------------------------------------------- +; Test select with i1 condition and zero value for half fp, feeding into fadd ((cond ? a : 0) + 1.0) +; ----------------------------------------------------------------------------- +define half @select_i1_half_0_add(i1 %cond, half %val) nounwind { +; RV64ZDINX_ZICOND-LABEL: select_i1_half_0_add: +; RV64ZDINX_ZICOND: # %bb.0: # %entry +; RV64ZDINX_ZICOND-NEXT: addi sp, sp, -16 +; RV64ZDINX_ZICOND-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_ZICOND-NEXT: call __extendhfsf2 +; RV64ZDINX_ZICOND-NEXT: lui a1, 260096 +; RV64ZDINX_ZICOND-NEXT: fadd.s a0, a0, a1 +; RV64ZDINX_ZICOND-NEXT: call __truncsfhf2 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w def $x10 +; RV64ZDINX_ZICOND-NEXT: lui a1, 1048560 +; RV64ZDINX_ZICOND-NEXT: or a0, a0, a1 +; RV64ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_ZICOND-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ZDINX_ZICOND-NEXT: addi sp, sp, 16 +; RV64ZDINX_ZICOND-NEXT: ret +; +; RV64ZDINX_NOZICOND-LABEL: select_i1_half_0_add: +; RV64ZDINX_NOZICOND: # %bb.0: # %entry +; RV64ZDINX_NOZICOND-NEXT: addi sp, sp, -16 +; RV64ZDINX_NOZICOND-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV64ZDINX_NOZICOND-NEXT: slli a0, a0, 63 +; RV64ZDINX_NOZICOND-NEXT: srai a0, a0, 63 +; RV64ZDINX_NOZICOND-NEXT: and a0, a0, a1 +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_NOZICOND-NEXT: call __extendhfsf2 +; RV64ZDINX_NOZICOND-NEXT: lui a1, 260096 +; RV64ZDINX_NOZICOND-NEXT: fadd.s a0, a0, a1 +; RV64ZDINX_NOZICOND-NEXT: call __truncsfhf2 +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w def $x10 +; RV64ZDINX_NOZICOND-NEXT: lui a1, 1048560 +; RV64ZDINX_NOZICOND-NEXT: or a0, a0, a1 +; RV64ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV64ZDINX_NOZICOND-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ZDINX_NOZICOND-NEXT: addi sp, sp, 16 +; RV64ZDINX_NOZICOND-NEXT: ret +; +; RV64ZHINX_ZICOND-LABEL: select_i1_half_0_add: +; RV64ZHINX_ZICOND: # %bb.0: # %entry +; RV64ZHINX_ZICOND-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV64ZHINX_ZICOND-NEXT: andi a0, a0, 1 +; RV64ZHINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64ZHINX_ZICOND-NEXT: li a1, 15 +; RV64ZHINX_ZICOND-NEXT: slli a1, a1, 10 +; RV64ZHINX_ZICOND-NEXT: fadd.h a0, a0, a1 +; RV64ZHINX_ZICOND-NEXT: ret +; +; RV64FD-LABEL: select_i1_half_0_add: +; RV64FD: # %bb.0: # %entry +; RV64FD-NEXT: addi sp, sp, -16 +; RV64FD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64FD-NEXT: fmv.x.w a1, fa0 +; RV64FD-NEXT: slli a0, a0, 63 +; RV64FD-NEXT: srai a0, a0, 63 +; RV64FD-NEXT: and a0, a0, a1 +; RV64FD-NEXT: fmv.w.x fa0, a0 +; RV64FD-NEXT: call __extendhfsf2 +; RV64FD-NEXT: lui a0, 260096 +; RV64FD-NEXT: fmv.w.x fa5, a0 +; RV64FD-NEXT: fadd.s fa0, fa0, fa5 +; RV64FD-NEXT: call __truncsfhf2 +; RV64FD-NEXT: fmv.x.w a0, fa0 +; RV64FD-NEXT: lui a1, 1048560 +; RV64FD-NEXT: or a0, a0, a1 +; RV64FD-NEXT: fmv.w.x fa0, a0 +; RV64FD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64FD-NEXT: addi sp, sp, 16 +; RV64FD-NEXT: ret +; +; RV32ZFINX_ZICOND-LABEL: select_i1_half_0_add: +; RV32ZFINX_ZICOND: # %bb.0: # %entry +; RV32ZFINX_ZICOND-NEXT: addi sp, sp, -16 +; RV32ZFINX_ZICOND-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ZFINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZFINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_ZICOND-NEXT: call __extendhfsf2 +; RV32ZFINX_ZICOND-NEXT: lui a1, 260096 +; RV32ZFINX_ZICOND-NEXT: fadd.s a0, a0, a1 +; RV32ZFINX_ZICOND-NEXT: call __truncsfhf2 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w def $x10 +; RV32ZFINX_ZICOND-NEXT: lui a1, 1048560 +; RV32ZFINX_ZICOND-NEXT: or a0, a0, a1 +; RV32ZFINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_ZICOND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZFINX_ZICOND-NEXT: addi sp, sp, 16 +; RV32ZFINX_ZICOND-NEXT: ret +; +; RV32ZFINX_NOZICOND-LABEL: select_i1_half_0_add: +; RV32ZFINX_NOZICOND: # %bb.0: # %entry +; RV32ZFINX_NOZICOND-NEXT: addi sp, sp, -16 +; RV32ZFINX_NOZICOND-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZFINX_NOZICOND-NEXT: slli a0, a0, 31 +; RV32ZFINX_NOZICOND-NEXT: srai a0, a0, 31 +; RV32ZFINX_NOZICOND-NEXT: and a0, a0, a1 +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_NOZICOND-NEXT: call __extendhfsf2 +; RV32ZFINX_NOZICOND-NEXT: lui a1, 260096 +; RV32ZFINX_NOZICOND-NEXT: fadd.s a0, a0, a1 +; RV32ZFINX_NOZICOND-NEXT: call __truncsfhf2 +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w def $x10 +; RV32ZFINX_NOZICOND-NEXT: lui a1, 1048560 +; RV32ZFINX_NOZICOND-NEXT: or a0, a0, a1 +; RV32ZFINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZFINX_NOZICOND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZFINX_NOZICOND-NEXT: addi sp, sp, 16 +; RV32ZFINX_NOZICOND-NEXT: ret +; +; RV32ZDINX_ZICOND-LABEL: select_i1_half_0_add: +; RV32ZDINX_ZICOND: # %bb.0: # %entry +; RV32ZDINX_ZICOND-NEXT: addi sp, sp, -16 +; RV32ZDINX_ZICOND-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ZDINX_ZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_ZICOND-NEXT: andi a0, a0, 1 +; RV32ZDINX_ZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_ZICOND-NEXT: call __extendhfsf2 +; RV32ZDINX_ZICOND-NEXT: lui a1, 260096 +; RV32ZDINX_ZICOND-NEXT: fadd.s a0, a0, a1 +; RV32ZDINX_ZICOND-NEXT: call __truncsfhf2 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w def $x10 +; RV32ZDINX_ZICOND-NEXT: lui a1, 1048560 +; RV32ZDINX_ZICOND-NEXT: or a0, a0, a1 +; RV32ZDINX_ZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_ZICOND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZDINX_ZICOND-NEXT: addi sp, sp, 16 +; RV32ZDINX_ZICOND-NEXT: ret +; +; RV32ZDINX_NOZICOND-LABEL: select_i1_half_0_add: +; RV32ZDINX_NOZICOND: # %bb.0: # %entry +; RV32ZDINX_NOZICOND-NEXT: addi sp, sp, -16 +; RV32ZDINX_NOZICOND-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x11_w killed $x11_w def $x11 +; RV32ZDINX_NOZICOND-NEXT: slli a0, a0, 31 +; RV32ZDINX_NOZICOND-NEXT: srai a0, a0, 31 +; RV32ZDINX_NOZICOND-NEXT: and a0, a0, a1 +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_NOZICOND-NEXT: call __extendhfsf2 +; RV32ZDINX_NOZICOND-NEXT: lui a1, 260096 +; RV32ZDINX_NOZICOND-NEXT: fadd.s a0, a0, a1 +; RV32ZDINX_NOZICOND-NEXT: call __truncsfhf2 +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w def $x10 +; RV32ZDINX_NOZICOND-NEXT: lui a1, 1048560 +; RV32ZDINX_NOZICOND-NEXT: or a0, a0, a1 +; RV32ZDINX_NOZICOND-NEXT: # kill: def $x10_w killed $x10_w killed $x10 +; RV32ZDINX_NOZICOND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZDINX_NOZICOND-NEXT: addi sp, sp, 16 +; RV32ZDINX_NOZICOND-NEXT: ret +entry: + %sel = select i1 %cond, half %val, half 0xH0000 + %add = fadd half %sel, 1.0 + ret half %add +} From 7494f3df14e5d401b73f2f8ccbd811f3556c5be5 Mon Sep 17 00:00:00 2001 From: Aadesh Premkumar Date: Mon, 1 Dec 2025 08:14:51 +0530 Subject: [PATCH 7/9] [SPIRV] Added support for extension SPV_ALTERA_arbitrary_precision_fixed_point and name change of SPV_INTEL_arbitrary_precision_integers to SPV_ALTERA_arbitrary_precision_integers (#136085) --Added support for extension SPV_ALTERA_arbitrary_precision_fixed_point --Added test files for extension SPV_ALTERA_arbitrary_precision_fixed_point --- llvm/docs/SPIRVUsage.rst | 13 +- llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp | 73 +++++ llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 14 + llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp | 10 +- llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp | 8 +- llvm/lib/Target/SPIRV/SPIRVInstrInfo.td | 24 ++ llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp | 2 +- llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 21 ++ llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp | 2 +- .../lib/Target/SPIRV/SPIRVSymbolicOperands.td | 6 +- ...arbitrary-precision-fixed-point-numbers.ll | 254 ++++++++++++++++++ .../SPV_INTEL_arbitrary_precision_integers.ll | 6 +- .../extensions/SPV_INTEL_int4/negative.ll | 6 +- ...both-allowed-disallowed-extension-error.ll | 6 +- .../enable-all-extensions-but-one.ll | 4 +- .../SPIRV/extensions/enable-all-extensions.ll | 2 +- ...-SPV_INTEL_arbitrary_precision_integers.ll | 6 +- .../llvm-intrinsics/bitreverse_small_type.ll | 8 +- .../CodeGen/SPIRV/trunc-nonstd-bitwidth.ll | 4 +- .../mlir/Dialect/SPIRV/IR/SPIRVBase.td | 2 +- 20 files changed, 433 insertions(+), 38 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_arbitrary_precision_fixed_point/capability-arbitrary-precision-fixed-point-numbers.ll diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst index aedb6643cf581..88164e6fa53d8 100644 --- a/llvm/docs/SPIRVUsage.rst +++ b/llvm/docs/SPIRVUsage.rst @@ -30,8 +30,8 @@ Static Compiler Commands Description: This command compiles an LLVM IL file (`input.ll`) to a SPIR-V binary (`output.spvt`) for a 32-bit architecture. 2. **Compilation with Extensions and Optimization** - Command: `llc -O1 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers input.ll -o output.spvt` - Description: Compiles an LLVM IL file to SPIR-V with (`-O1`) optimizations, targeting a 64-bit architecture. It enables the SPV_INTEL_arbitrary_precision_integers extension. + Command: `llc -O1 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers input.ll -o output.spvt` + Description: Compiles an LLVM IL file to SPIR-V with (`-O1`) optimizations, targeting a 64-bit architecture. It enables the SPV_ALTERA_arbitrary_precision_integers extension. 3. **Compilation with experimental NonSemantic.Shader.DebugInfo.100 support** Command: `llc --spv-emit-nonsemantic-debug-info --spirv-ext=+SPV_KHR_non_semantic_info input.ll -o output.spvt` @@ -136,7 +136,7 @@ extensions to enable or disable, each prefixed with ``+`` or ``-``, respectively To enable multiple extensions, list them separated by comma. For example, to enable support for atomic operations on floating-point numbers and arbitrary precision integers, use: -``-spirv-ext=+SPV_EXT_shader_atomic_float_add,+SPV_INTEL_arbitrary_precision_integers`` +``-spirv-ext=+SPV_EXT_shader_atomic_float_add,+SPV_ALTERA_arbitrary_precision_integers`` To enable all extensions, use the following option: ``-spirv-ext=all`` @@ -145,7 +145,7 @@ To enable all KHR extensions, use the following option: ``-spirv-ext=khr`` To enable all extensions except specified, specify ``all`` followed by a list of disallowed extensions. For example: -``-spirv-ext=all,-SPV_INTEL_arbitrary_precision_integers`` +``-spirv-ext=all,-SPV_ALTERA_arbitrary_precision_integers`` Below is a list of supported SPIR-V extensions, sorted alphabetically by their extension names: @@ -171,7 +171,7 @@ Below is a list of supported SPIR-V extensions, sorted alphabetically by their e - Extends the SPV_EXT_shader_atomic_float_add and SPV_EXT_shader_atomic_float_min_max to support addition, minimum and maximum on 16-bit `bfloat16` floating-point numbers in memory. * - ``SPV_INTEL_2d_block_io`` - Adds additional subgroup block prefetch, load, load transposed, load transformed and store instructions to read two-dimensional blocks of data from a two-dimensional region of memory, or to write two-dimensional blocks of data to a two dimensional region of memory. - * - ``SPV_INTEL_arbitrary_precision_integers`` + * - ``SPV_ALTERA_arbitrary_precision_integers`` - Allows generating arbitrary width integer types. * - ``SPV_INTEL_bindless_images`` - Adds instructions to convert convert unsigned integer handles to images, samplers and sampled images. @@ -245,6 +245,9 @@ Below is a list of supported SPIR-V extensions, sorted alphabetically by their e - Adds execution mode and capability to enable maximal reconvergence. * - ``SPV_ALTERA_blocking_pipes`` - Adds new pipe read and write functions that have blocking semantics instead of the non-blocking semantics of the existing pipe read/write functions. + * - ``SPV_ALTERA_arbitrary_precision_fixed_point`` + - Add instructions for fixed point arithmetic. The extension works without SPV_ALTERA_arbitrary_precision_integers, but together they allow greater flexibility in representing arbitrary precision data types. + SPIR-V representation in LLVM IR ================================ diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index 709f49b0fecc1..87ebee6a14eac 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -2399,6 +2399,77 @@ static bool generateBlockingPipesInst(const SPIRV::IncomingCall *Call, return buildOpFromWrapper(MIRBuilder, Opcode, Call, Register(0)); } +static bool buildAPFixedPointInst(const SPIRV::IncomingCall *Call, + unsigned Opcode, MachineIRBuilder &MIRBuilder, + SPIRVGlobalRegistry *GR) { + MachineRegisterInfo *MRI = MIRBuilder.getMRI(); + SmallVector ImmArgs; + Register InputReg = Call->Arguments[0]; + const Type *RetTy = GR->getTypeForSPIRVType(Call->ReturnType); + bool IsSRet = RetTy->isVoidTy(); + + if (IsSRet) { + const LLT ValTy = MRI->getType(InputReg); + Register ActualRetValReg = MRI->createGenericVirtualRegister(ValTy); + SPIRVType *InstructionType = + GR->getPointeeType(GR->getSPIRVTypeForVReg(InputReg)); + InputReg = Call->Arguments[1]; + auto InputType = GR->getTypeForSPIRVType(GR->getSPIRVTypeForVReg(InputReg)); + Register PtrInputReg; + if (InputType->getTypeID() == llvm::Type::TypeID::TypedPointerTyID) { + LLT InputLLT = MRI->getType(InputReg); + PtrInputReg = MRI->createGenericVirtualRegister(InputLLT); + SPIRVType *PtrType = + GR->getPointeeType(GR->getSPIRVTypeForVReg(InputReg)); + MachineMemOperand *MMO1 = MIRBuilder.getMF().getMachineMemOperand( + MachinePointerInfo(), MachineMemOperand::MOLoad, + InputLLT.getSizeInBytes(), Align(4)); + MIRBuilder.buildLoad(PtrInputReg, InputReg, *MMO1); + MRI->setRegClass(PtrInputReg, &SPIRV::iIDRegClass); + GR->assignSPIRVTypeToVReg(PtrType, PtrInputReg, MIRBuilder.getMF()); + } + + for (unsigned index = 2; index < 7; index++) { + ImmArgs.push_back(getConstFromIntrinsic(Call->Arguments[index], MRI)); + } + + // Emit the instruction + auto MIB = MIRBuilder.buildInstr(Opcode) + .addDef(ActualRetValReg) + .addUse(GR->getSPIRVTypeID(InstructionType)); + if (PtrInputReg) + MIB.addUse(PtrInputReg); + else + MIB.addUse(InputReg); + + for (uint32_t Imm : ImmArgs) + MIB.addImm(Imm); + unsigned Size = ValTy.getSizeInBytes(); + // Store result to the pointer passed in Arg[0] + MachineMemOperand *MMO = MIRBuilder.getMF().getMachineMemOperand( + MachinePointerInfo(), MachineMemOperand::MOStore, Size, Align(4)); + MRI->setRegClass(ActualRetValReg, &SPIRV::pIDRegClass); + MIRBuilder.buildStore(ActualRetValReg, Call->Arguments[0], *MMO); + return true; + } else { + for (unsigned index = 1; index < 6; index++) + ImmArgs.push_back(getConstFromIntrinsic(Call->Arguments[index], MRI)); + + return buildOpFromWrapper(MIRBuilder, Opcode, Call, + GR->getSPIRVTypeID(Call->ReturnType), ImmArgs); + } +} + +static bool generateAPFixedPointInst(const SPIRV::IncomingCall *Call, + MachineIRBuilder &MIRBuilder, + SPIRVGlobalRegistry *GR) { + const SPIRV::DemangledBuiltin *Builtin = Call->Builtin; + unsigned Opcode = + SPIRV::lookupNativeBuiltin(Builtin->Name, Builtin->Set)->Opcode; + + return buildAPFixedPointInst(Call, Opcode, MIRBuilder, GR); +} + static bool generateTernaryBitwiseFunctionINTELInst(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, @@ -3061,6 +3132,8 @@ std::optional lowerBuiltin(const StringRef DemangledCall, return generatePredicatedLoadStoreInst(Call.get(), MIRBuilder, GR); case SPIRV::BlockingPipes: return generateBlockingPipesInst(Call.get(), MIRBuilder, GR); + case SPIRV::ArbitraryPrecisionFixedPoint: + return generateAPFixedPointInst(Call.get(), MIRBuilder, GR); } return false; } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index 492a98e1995fe..98440856387c9 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -71,6 +71,7 @@ def TernaryBitwiseINTEL : BuiltinGroup; def Block2DLoadStore : BuiltinGroup; def Pipe : BuiltinGroup; def PredicatedLoadStore : BuiltinGroup; +def ArbitraryPrecisionFixedPoint : BuiltinGroup; def BlockingPipes : BuiltinGroup; //===----------------------------------------------------------------------===// @@ -1181,6 +1182,19 @@ defm : DemangledNativeBuiltin<"__spirv_WritePipeBlockingINTEL", OpenCL_std, Bloc defm : DemangledNativeBuiltin<"__spirv_ReadPipeBlockingINTEL", OpenCL_std, BlockingPipes, 0, 0, OpReadPipeBlockingALTERA>; defm : DemangledNativeBuiltin<"__spirv_ReadClockKHR", OpenCL_std, KernelClock, 1, 1, OpReadClockKHR>; +//SPV_ALTERA_arbitrary_precision_fixed_point +defm : DemangledNativeBuiltin<"__spirv_FixedSqrtINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedSqrtALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedRecipINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedRecipALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedRsqrtINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedRsqrtALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedSinINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedSinALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedCosINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedCosALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedSinCosINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedSinCosALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedSinPiINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedSinPiALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedCosPiINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedCosPiALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedSinCosPiINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedSinCosPiALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedLogINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedLogALTERA>; +defm : DemangledNativeBuiltin<"__spirv_FixedExpINTEL", OpenCL_std, ArbitraryPrecisionFixedPoint, 6 , 8, OpFixedExpALTERA>; + //===----------------------------------------------------------------------===// // Class defining an atomic instruction on floating-point numbers. // diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index d394b3ac243a9..146384f4bf08c 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -53,8 +53,8 @@ static const std::map> SPIRV::Extension::Extension::SPV_GOOGLE_hlsl_functionality1}, {"SPV_GOOGLE_user_type", SPIRV::Extension::Extension::SPV_GOOGLE_user_type}, - {"SPV_INTEL_arbitrary_precision_integers", - SPIRV::Extension::Extension::SPV_INTEL_arbitrary_precision_integers}, + {"SPV_ALTERA_arbitrary_precision_integers", + SPIRV::Extension::Extension::SPV_ALTERA_arbitrary_precision_integers}, {"SPV_INTEL_cache_controls", SPIRV::Extension::Extension::SPV_INTEL_cache_controls}, {"SPV_INTEL_float_controls2", @@ -163,7 +163,11 @@ static const std::map> {"SPV_INTEL_kernel_attributes", SPIRV::Extension::Extension::SPV_INTEL_kernel_attributes}, {"SPV_ALTERA_blocking_pipes", - SPIRV::Extension::Extension::SPV_ALTERA_blocking_pipes}}; + SPIRV::Extension::Extension::SPV_ALTERA_blocking_pipes}, + {"SPV_INTEL_int4", SPIRV::Extension::Extension::SPV_INTEL_int4}, + {"SPV_ALTERA_arbitrary_precision_fixed_point", + SPIRV::Extension::Extension:: + SPV_ALTERA_arbitrary_precision_fixed_point}}; bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName, StringRef ArgValue, diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 8b1a09caf907d..ae81d38579c18 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -155,7 +155,7 @@ unsigned SPIRVGlobalRegistry::adjustOpTypeIntWidth(unsigned Width) const { report_fatal_error("Unsupported integer width!"); const SPIRVSubtarget &ST = cast(CurMF->getSubtarget()); if (ST.canUseExtension( - SPIRV::Extension::SPV_INTEL_arbitrary_precision_integers) || + SPIRV::Extension::SPV_ALTERA_arbitrary_precision_integers) || ST.canUseExtension(SPIRV::Extension::SPV_INTEL_int4)) return Width; if (Width <= 8) @@ -183,11 +183,11 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeInt(unsigned Width, .addImm(SPIRV::Capability::Int4TypeINTEL); } else if ((!isPowerOf2_32(Width) || Width < 8) && ST.canUseExtension( - SPIRV::Extension::SPV_INTEL_arbitrary_precision_integers)) { + SPIRV::Extension::SPV_ALTERA_arbitrary_precision_integers)) { MIRBuilder.buildInstr(SPIRV::OpExtension) - .addImm(SPIRV::Extension::SPV_INTEL_arbitrary_precision_integers); + .addImm(SPIRV::Extension::SPV_ALTERA_arbitrary_precision_integers); MIRBuilder.buildInstr(SPIRV::OpCapability) - .addImm(SPIRV::Capability::ArbitraryPrecisionIntegersINTEL); + .addImm(SPIRV::Capability::ArbitraryPrecisionIntegersALTERA); } return MIRBuilder.buildInstr(SPIRV::OpTypeInt) .addDef(createTypeVReg(MIRBuilder)) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index 03bd61bdf2cf6..815d2d7ed854b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -999,3 +999,27 @@ def OpReadPipeBlockingALTERA :Op<5946, (outs), (ins ID:$pipe, ID:$pointer, ID:$p "OpReadPipeBlockingALTERA $pipe $pointer $packetSize $packetAlignment">; def OpWritePipeBlockingALTERA :Op<5946, (outs), (ins ID:$pipe, ID:$pointer, ID:$packetSize, ID:$packetAlignment), "OpWritePipeBlockingALTERA $pipe $pointer $packetSize $packetAlignment">; + +//SPV_ALTERA_arbitrary_precision_fixed_point +def OpFixedSqrtALTERA: Op<5923, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedSqrtALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedRecipALTERA: Op<5924, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedRecipALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedRsqrtALTERA: Op<5925, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedRsqrtALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedSinALTERA: Op<5926, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedSinALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedCosALTERA: Op<5927, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedCosALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedSinCosALTERA: Op<5928, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedSinCosALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedSinPiALTERA: Op<5929, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedSinPiALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedCosPiALTERA: Op<5930, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedCosPiALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedSinCosPiALTERA: Op<5931, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedSinCosPiALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedLogALTERA: Op<5932, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedLogALTERA $result_type $input $sign $l $rl $q $o">; +def OpFixedExpALTERA: Op<5933, (outs ID:$res), (ins TYPE:$result_type, ID:$input, i32imm:$sign, i32imm:$l, i32imm:$rl, i32imm:$q, i32imm:$o), + "$res = OpFixedExpALTERA $result_type $input $sign $l $rl $q $o">; diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp index 53074ea3b2597..189c03a0ca3f9 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp @@ -128,7 +128,7 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) { bool IsExtendedInts = ST.canUseExtension( - SPIRV::Extension::SPV_INTEL_arbitrary_precision_integers) || + SPIRV::Extension::SPV_ALTERA_arbitrary_precision_integers) || ST.canUseExtension(SPIRV::Extension::SPV_KHR_bit_instructions) || ST.canUseExtension(SPIRV::Extension::SPV_INTEL_int4); auto extendedScalarsAndVectors = diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index 00f750b88a608..2feb73d8dedfa 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -1692,6 +1692,27 @@ void addInstrRequirements(const MachineInstr &MI, Reqs.addCapability(SPIRV::Capability::GroupNonUniformRotateKHR); Reqs.addCapability(SPIRV::Capability::GroupNonUniform); break; + case SPIRV::OpFixedCosALTERA: + case SPIRV::OpFixedSinALTERA: + case SPIRV::OpFixedCosPiALTERA: + case SPIRV::OpFixedSinPiALTERA: + case SPIRV::OpFixedExpALTERA: + case SPIRV::OpFixedLogALTERA: + case SPIRV::OpFixedRecipALTERA: + case SPIRV::OpFixedSqrtALTERA: + case SPIRV::OpFixedSinCosALTERA: + case SPIRV::OpFixedSinCosPiALTERA: + case SPIRV::OpFixedRsqrtALTERA: + if (!ST.canUseExtension( + SPIRV::Extension::SPV_ALTERA_arbitrary_precision_fixed_point)) + report_fatal_error("This instruction requires the " + "following SPIR-V extension: " + "SPV_ALTERA_arbitrary_precision_fixed_point", + false); + Reqs.addExtension( + SPIRV::Extension::SPV_ALTERA_arbitrary_precision_fixed_point); + Reqs.addCapability(SPIRV::Capability::ArbitraryPrecisionFixedPointALTERA); + break; case SPIRV::OpGroupIMulKHR: case SPIRV::OpGroupFMulKHR: case SPIRV::OpGroupBitwiseAndKHR: diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index 0f4b3d59b904a..7ca463460ffad 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -509,7 +509,7 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, bool IsExtendedInts = ST->canUseExtension( - SPIRV::Extension::SPV_INTEL_arbitrary_precision_integers) || + SPIRV::Extension::SPV_ALTERA_arbitrary_precision_integers) || ST->canUseExtension(SPIRV::Extension::SPV_KHR_bit_instructions) || ST->canUseExtension(SPIRV::Extension::SPV_INTEL_int4); diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index f02a587013856..94e0138c66487 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -318,7 +318,7 @@ defm SPV_INTEL_io_pipes : ExtensionOperand<63, [EnvOpenCL]>; defm SPV_KHR_ray_tracing : ExtensionOperand<64, [EnvVulkan]>; defm SPV_KHR_ray_query : ExtensionOperand<65, [EnvVulkan]>; defm SPV_INTEL_fpga_memory_accesses : ExtensionOperand<66, [EnvOpenCL]>; -defm SPV_INTEL_arbitrary_precision_integers : ExtensionOperand<67, [EnvOpenCL]>; +defm SPV_ALTERA_arbitrary_precision_integers : ExtensionOperand<67, [EnvOpenCL]>; defm SPV_EXT_shader_atomic_float_add : ExtensionOperand<68, [EnvVulkan, EnvOpenCL]>; defm SPV_KHR_terminate_invocation : ExtensionOperand<69, [EnvVulkan]>; @@ -390,6 +390,7 @@ defm SPV_KHR_maximal_reconvergence : ExtensionOperand<128, [EnvVulkan]>; defm SPV_INTEL_bfloat16_arithmetic : ExtensionOperand<129, [EnvVulkan, EnvOpenCL]>; defm SPV_INTEL_16bit_atomics : ExtensionOperand<130, [EnvVulkan, EnvOpenCL]>; +defm SPV_ALTERA_arbitrary_precision_fixed_point : ExtensionOperand<131, [EnvOpenCL, EnvVulkan]>; //===----------------------------------------------------------------------===// // Multiclass used to define Capabilities enum values and at the same time @@ -549,7 +550,7 @@ defm ComputeDerivativeGroupLinearNV : CapabilityOperand<5350, 0, 0, [], []>; defm FragmentDensityEXT : CapabilityOperand<5291, 0, 0, [], [Shader]>; defm PhysicalStorageBufferAddressesEXT : CapabilityOperand<5347, 0, 0, [], [Shader]>; defm CooperativeMatrixNV : CapabilityOperand<5357, 0, 0, [], [Shader]>; -defm ArbitraryPrecisionIntegersINTEL : CapabilityOperand<5844, 0, 0, [SPV_INTEL_arbitrary_precision_integers], [Int8, Int16]>; +defm ArbitraryPrecisionIntegersALTERA : CapabilityOperand<5844, 0, 0, [SPV_ALTERA_arbitrary_precision_integers], [Int8, Int16]>; defm OptNoneINTEL : CapabilityOperand<6094, 0, 0, [SPV_INTEL_optnone], []>; defm OptNoneEXT : CapabilityOperand<6094, 0, 0, [SPV_EXT_optnone], []>; defm BitInstructions : CapabilityOperand<6025, 0, 0, [SPV_KHR_bit_instructions], []>; @@ -615,6 +616,7 @@ defm BFloat16TypeKHR : CapabilityOperand<5116, 0, 0, [SPV_KHR_bfloat16], []>; defm BFloat16DotProductKHR : CapabilityOperand<5117, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR]>; defm BFloat16CooperativeMatrixKHR : CapabilityOperand<5118, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR, CooperativeMatrixKHR]>; defm BlockingPipesALTERA : CapabilityOperand<5945, 0, 0, [SPV_ALTERA_blocking_pipes], []>; +defm ArbitraryPrecisionFixedPointALTERA : CapabilityOperand<5922, 0, 0, [SPV_ALTERA_arbitrary_precision_fixed_point], []>; //===----------------------------------------------------------------------===// // Multiclass used to define SourceLanguage enum values and at the same time diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_arbitrary_precision_fixed_point/capability-arbitrary-precision-fixed-point-numbers.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_arbitrary_precision_fixed_point/capability-arbitrary-precision-fixed-point-numbers.ll new file mode 100644 index 0000000000000..e8bc48ec100b1 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_arbitrary_precision_fixed_point/capability-arbitrary-precision-fixed-point-numbers.ll @@ -0,0 +1,254 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_fixed_point,+SPV_ALTERA_arbitrary_precision_integers %s -o - | FileCheck %s +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_fixed_point,+SPV_ALTERA_arbitrary_precision_integers %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpCapability Kernel +; CHECK-DAG: OpCapability ArbitraryPrecisionIntegersALTERA +; CHECK-DAG: OpCapability ArbitraryPrecisionFixedPointALTERA +; CHECK-DAG: OpExtension "SPV_ALTERA_arbitrary_precision_fixed_point" +; CHECK-DAG: OpExtension "SPV_ALTERA_arbitrary_precision_integers" + +; CHECK-DAG: %[[Ty_8:[0-9]+]] = OpTypeInt 8 0 +; CHECK-DAG: %[[Ty_13:[0-9]+]] = OpTypeInt 13 0 +; CHECK-DAG: %[[Ty_5:[0-9]+]] = OpTypeInt 5 0 +; CHECK-DAG: %[[Ty_3:[0-9]+]] = OpTypeInt 3 0 +; CHECK-DAG: %[[Ty_11:[0-9]+]] = OpTypeInt 11 0 +; CHECK-DAG: %[[Ty_10:[0-9]+]] = OpTypeInt 10 0 +; CHECK-DAG: %[[Ty_17:[0-9]+]] = OpTypeInt 17 0 +; CHECK-DAG: %[[Ty_35:[0-9]+]] = OpTypeInt 35 0 +; CHECK-DAG: %[[Ty_28:[0-9]+]] = OpTypeInt 28 0 +; CHECK-DAG: %[[Ty_31:[0-9]+]] = OpTypeInt 31 0 +; CHECK-DAG: %[[Ty_40:[0-9]+]] = OpTypeInt 40 0 +; CHECK-DAG: %[[Ty_60:[0-9]+]] = OpTypeInt 60 0 +; CHECK-DAG: %[[Ty_16:[0-9]+]] = OpTypeInt 16 0 +; CHECK-DAG: %[[Ty_64:[0-9]+]] = OpTypeInt 64 0 +; CHECK-DAG: %[[Ty_44:[0-9]+]] = OpTypeInt 44 0 +; CHECK-DAG: %[[Ty_34:[0-9]+]] = OpTypeInt 34 0 +; CHECK-DAG: %[[Ty_51:[0-9]+]] = OpTypeInt 51 0 + +; CHECK: %[[Sqrt_InId:[0-9]+]] = OpLoad %[[Ty_13]] +; CHECK-NEXT: %[[#]] = OpFixedSqrtALTERA %[[Ty_5]] %[[Sqrt_InId]] 0 2 2 0 0 + +; CHECK: %[[Recip_InId:[0-9]+]] = OpLoad %[[Ty_3]] +; CHECK-NEXT: %[[#]] = OpFixedRecipALTERA %[[Ty_8]] %[[Recip_InId]] 1 4 4 0 0 + +; CHECK: %[[Rsqrt_InId:[0-9]+]] = OpLoad %[[Ty_11]] +; CHECK-NEXT: %[[#]] = OpFixedRsqrtALTERA %[[Ty_10]] %[[Rsqrt_InId]] 0 8 6 0 0 + +; CHECK: %[[Sin_InId:[0-9]+]] = OpLoad %[[Ty_17]] +; CHECK-NEXT: %[[#]] = OpFixedSinALTERA %[[Ty_11]] %[[Sin_InId]] 1 7 5 0 0 + +; CHECK: %[[Cos_InId:[0-9]+]] = OpLoad %[[Ty_35]] +; CHECK-NEXT: %[[#]] = OpFixedCosALTERA %[[Ty_28]] %[[Cos_InId]] 0 9 3 0 0 + +; CHECK: %[[SinCos_InId:[0-9]+]] = OpLoad %[[Ty_31]] +; CHECK-NEXT: %[[#]] = OpFixedSinCosALTERA %[[Ty_40]] %[[SinCos_InId]] 1 10 12 0 0 + +; CHECK: %[[SinPi_InId:[0-9]+]] = OpLoad %[[Ty_60]] +; CHECK-NEXT: %[[#]] = OpFixedSinPiALTERA %[[Ty_5]] %[[SinPi_InId]] 0 2 2 0 0 + +; CHECK: %[[CosPi_InId:[0-9]+]] = OpLoad %[[Ty_28]] +; CHECK-NEXT: %[[#]] = OpFixedCosPiALTERA %[[Ty_16]] %[[CosPi_InId]] 0 8 5 0 0 + +; CHECK: %[[SinCosPi_InId:[0-9]+]] = OpLoad %[[Ty_13]] +; CHECK-NEXT: %[[#]] = OpFixedSinCosPiALTERA %[[Ty_10]] %[[SinCosPi_InId]] 0 2 2 0 0 + +; CHECK: %[[Log_InId:[0-9]+]] = OpLoad %[[Ty_64]] +; CHECK-NEXT: %[[#]] = OpFixedLogALTERA %[[Ty_44]] %[[Log_InId]] 1 24 22 0 0 + +; CHECK: %[[Exp_InId:[0-9]+]] = OpLoad %[[Ty_44]] +; CHECK-NEXT: %[[#]] = OpFixedExpALTERA %[[Ty_34]] %[[Exp_InId]] 0 20 20 0 0 + +; CHECK: %[[SinCos_InId:[0-9]+]] = OpLoad %[[Ty_34]] +; CHECK-NEXT: %[[SinCos_ResultId:[0-9]+]] = OpFixedSinCosALTERA %[[Ty_51]] %[[SinCos_InId]] 1 3 2 0 0 +; CHECK-NEXT: OpStore %[[#]] %[[SinCos_ResultId]] + +; CHECK: %[[ResId:[0-9]+]] = OpLoad %[[Ty_51]] +; CHECK-NEXT: OpStore %[[PtrId:[0-9]+]] %[[ResId]] +; CHECK-NEXT: %[[ExpInId2:[0-9]+]] = OpLoad %[[Ty_51]] %[[PtrId]] +; CHECK-NEXT: %[[#]] = OpFixedExpALTERA %[[Ty_51]] %[[ExpInId2]] 0 20 20 0 0 + +%"class._ZTSZ4mainE3$_0.anon" = type { i8 } + +define dso_local spir_kernel void @_ZTSZ4mainE15kernel_function() !kernel_arg_addr_space !{} !kernel_arg_access_qual !{} !kernel_arg_type !{} !kernel_arg_base_type !{} !kernel_arg_type_qual !{} { +entry: + %0 = alloca %"class._ZTSZ4mainE3$_0.anon", align 1 + %1 = addrspacecast ptr %0 to ptr addrspace(4) + call spir_func void @"_ZZ4mainENK3$_0clEv"(ptr addrspace(4) %1) + ret void +} + +define internal spir_func void @"_ZZ4mainENK3$_0clEv"(ptr addrspace(4) %this) align 2 { +entry: + %this.addr = alloca ptr addrspace(4), align 8 + store ptr addrspace(4) %this, ptr %this.addr, align 8 + call spir_func void @_Z4sqrtILi13ELi5ELb0ELi2ELi2EEvv() + call spir_func void @_Z5recipILi3ELi8ELb1ELi4ELi4EEvv() + call spir_func void @_Z5rsqrtILi11ELi10ELb0ELi8ELi6EEvv() + call spir_func void @_Z3sinILi17ELi11ELb1ELi7ELi5EEvv() + call spir_func void @_Z3cosILi35ELi28ELb0ELi9ELi3EEvv() + call spir_func void @_Z7sin_cosILi31ELi20ELb1ELi10ELi12EEvv() + call spir_func void @_Z6sin_piILi60ELi5ELb0ELi2ELi2EEvv() + call spir_func void @_Z6cos_piILi28ELi16ELb0ELi8ELi5EEvv() + call spir_func void @_Z10sin_cos_piILi13ELi5ELb0ELi2ELi2EEvv() + call spir_func void @_Z3logILi64ELi44ELb1ELi24ELi22EEvv() + call spir_func void @_Z3expILi44ELi34ELb0ELi20ELi20EEvv() + call spir_func void @_Z7sin_cosILi31ELi20ELb1ELi10ELi12EEvv_() + call spir_func void @_Z3expILi51ELi51ELb0ELi20ELi20EEvv() + ret void +} + +define linkonce_odr dso_local spir_func void @_Z4sqrtILi13ELi5ELb0ELi2ELi2EEvv() { +entry: + %in_ptr = alloca i13, align 2 + %out_ptr = alloca i5, align 1 + %in_val = load i13, ptr %in_ptr, align 2 + %res = call spir_func signext i5 @_Z22__spirv_FixedSqrtINTELILi13ELi5EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i13 signext %in_val, i1 zeroext false, i32 2, i32 2, i32 0, i32 0) + store i5 %res, ptr %out_ptr, align 1 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z5recipILi3ELi8ELb1ELi4ELi4EEvv() { +entry: + %in_ptr = alloca i3, align 1 + %out_ptr = alloca i8, align 1 + %in_val = load i3, ptr %in_ptr, align 1 + %res = call spir_func signext i8 @_Z23__spirv_FixedRecipINTELILi3ELi8EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i3 signext %in_val, i1 zeroext true, i32 4, i32 4, i32 0, i32 0) + store i8 %res, ptr %out_ptr, align 1 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z5rsqrtILi11ELi10ELb0ELi8ELi6EEvv() { +entry: + %in_ptr = alloca i11, align 2 + %out_ptr = alloca i10, align 2 + %in_val = load i11, ptr %in_ptr, align 2 + %res = call spir_func signext i10 @_Z23__spirv_FixedRsqrtINTELILi11ELi10EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i11 signext %in_val, i1 zeroext false, i32 8, i32 6, i32 0, i32 0) + store i10 %res, ptr %out_ptr, align 2 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z3sinILi17ELi11ELb1ELi7ELi5EEvv() { +entry: + %in_ptr = alloca i17, align 4 + %out_ptr = alloca i11, align 2 + %in_val = load i17, ptr %in_ptr, align 4 + %res = call spir_func signext i11 @_Z21__spirv_FixedSinINTELILi17ELi11EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i17 signext %in_val, i1 zeroext true, i32 7, i32 5, i32 0, i32 0) + store i11 %res, ptr %out_ptr, align 2 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z3cosILi35ELi28ELb0ELi9ELi3EEvv() { +entry: + %in_ptr = alloca i35, align 8 + %out_ptr = alloca i28, align 4 + %in_val = load i35, ptr %in_ptr, align 8 + %res = call spir_func signext i28 @_Z21__spirv_FixedCosINTELILi35ELi28EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i35 signext %in_val, i1 zeroext false, i32 9, i32 3, i32 0, i32 0) + store i28 %res, ptr %out_ptr, align 4 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z7sin_cosILi31ELi20ELb1ELi10ELi12EEvv() { +entry: + %in_ptr = alloca i31, align 4 + %out_ptr = alloca i40, align 8 + %in_val = load i31, ptr %in_ptr, align 4 + %res = call spir_func i40 @_Z24__spirv_FixedSinCosINTELILi31ELi20EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEibiiii(i31 signext %in_val, i1 zeroext true, i32 10, i32 12, i32 0, i32 0) + store i40 %res, ptr %out_ptr, align 8 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z6sin_piILi60ELi5ELb0ELi2ELi2EEvv() { +entry: + %in_ptr = alloca i60, align 8 + %out_ptr = alloca i5, align 1 + %in_val = load i60, ptr %in_ptr, align 8 + %res = call spir_func signext i5 @_Z23__spirv_FixedSinPiINTELILi60ELi5EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i60 signext %in_val, i1 zeroext false, i32 2, i32 2, i32 0, i32 0) + store i5 %res, ptr %out_ptr, align 1 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z6cos_piILi28ELi16ELb0ELi8ELi5EEvv() { +entry: + %in_ptr = alloca i28, align 4 + %out_ptr = alloca i16, align 2 + %in_val = load i28, ptr %in_ptr, align 4 + %res = call spir_func signext i16 @_Z23__spirv_FixedCosPiINTELILi28ELi16EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i28 signext %in_val, i1 zeroext false, i32 8, i32 5, i32 0, i32 0) + store i16 %res, ptr %out_ptr, align 2 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z10sin_cos_piILi13ELi5ELb0ELi2ELi2EEvv() { +entry: + %in_ptr = alloca i13, align 2 + %out_ptr = alloca i10, align 2 + %in_val = load i13, ptr %in_ptr, align 2 + %res = call spir_func signext i10 @_Z26__spirv_FixedSinCosPiINTELILi13ELi5EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEibiiii(i13 signext %in_val, i1 zeroext false, i32 2, i32 2, i32 0, i32 0) + store i10 %res, ptr %out_ptr, align 2 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z3logILi64ELi44ELb1ELi24ELi22EEvv() { +entry: + %in_ptr = alloca i64, align 8 + %out_ptr = alloca i44, align 8 + %in_val = load i64, ptr %in_ptr, align 8 + %res = call spir_func i44 @_Z21__spirv_FixedLogINTELILi64ELi44EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i64 %in_val, i1 zeroext true, i32 24, i32 22, i32 0, i32 0) + store i44 %res, ptr %out_ptr, align 8 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z3expILi44ELi34ELb0ELi20ELi20EEvv() { +entry: + %in_ptr = alloca i44, align 8 + %out_ptr = alloca i34, align 8 + %in_val = load i44, ptr %in_ptr, align 8 + %res = call spir_func i34 @_Z21__spirv_FixedExpINTELILi44ELi34EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i44 %in_val, i1 zeroext false, i32 20, i32 20, i32 0, i32 0) + store i34 %res, ptr %out_ptr, align 8 + ret void +} + +define linkonce_odr dso_local spir_func void @_Z7sin_cosILi31ELi20ELb1ELi10ELi12EEvv_() { +entry: + %tmp = alloca i34, align 8 + %out_ptr = alloca i51, align 8 + %in_ptr = addrspacecast ptr %tmp to ptr addrspace(4) + %out_s = addrspacecast ptr %out_ptr to ptr addrspace(4) + %in_val = load i34, ptr addrspace(4) %in_ptr, align 8 + call spir_func void @_Z24__spirv_FixedSinCosINTELILi34ELi51EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEibiiii(ptr addrspace(4) sret(i51) align 8 %out_s, i34 %in_val, i1 zeroext true, i32 3, i32 2, i32 0, i32 0) + ret void +} + +define linkonce_odr dso_local spir_func void @_Z3expILi51ELi51ELb0ELi20ELi20EEvv() { +entry: + %a = alloca i51, align 8 + %a.ascast = addrspacecast ptr %a to ptr addrspace(4) + %ap_fixed_Exp = alloca i51, align 8 + %ap_fixed_Exp.ascast = addrspacecast ptr %ap_fixed_Exp to ptr addrspace(4) + %tmp = alloca i51, align 8 + %tmp.ascast = addrspacecast ptr %tmp to ptr addrspace(4) + %indirect-arg-temp = alloca i51, align 8 + %0 = load i51, ptr addrspace(4) %a.ascast, align 8 + store i51 %0, ptr %indirect-arg-temp, align 8 + call spir_func void @_Z21__spirv_FixedExpINTELILi51ELi51EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii( + ptr addrspace(4) sret(i51) align 8 %tmp.ascast, + ptr byval(i64) align 8 %indirect-arg-temp, + i1 zeroext false, i32 20, i32 20, i32 0, i32 0) + %1 = load i51, ptr addrspace(4) %tmp.ascast, align 8 + store i51 %1, ptr addrspace(4) %ap_fixed_Exp.ascast, align 8 + ret void +} + +declare dso_local spir_func signext i5 @_Z22__spirv_FixedSqrtINTELILi13ELi5EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i13 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i13 @_Z22__spirv_FixedSqrtINTELILi5ELi13EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i5 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i8 @_Z23__spirv_FixedRecipINTELILi3ELi8EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i3 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i10 @_Z23__spirv_FixedRsqrtINTELILi11ELi10EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i11 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i11 @_Z21__spirv_FixedSinINTELILi17ELi11EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i17 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i28 @_Z21__spirv_FixedCosINTELILi35ELi28EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i35, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func i40 @_Z24__spirv_FixedSinCosINTELILi31ELi20EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEibiiii(i31 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i5 @_Z23__spirv_FixedSinPiINTELILi60ELi5EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i60, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i16 @_Z23__spirv_FixedCosPiINTELILi28ELi16EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i28 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func signext i10 @_Z26__spirv_FixedSinCosPiINTELILi13ELi5EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEibiiii(i13 signext, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func i44 @_Z21__spirv_FixedLogINTELILi64ELi44EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i64, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func i34 @_Z21__spirv_FixedExpINTELILi44ELi34EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(i44, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func void @_Z24__spirv_FixedSinCosINTELILi34ELi51EEU7_ExtIntIXmlLi2ET0_EEiU7_ExtIntIXT_EEibiiii(ptr addrspace(4) sret(i51) align 8, i34, i1 zeroext, i32, i32, i32, i32) +declare dso_local spir_func void @_Z21__spirv_FixedExpINTELILi51ELi51EEU7_ExtIntIXT0_EEiU7_ExtIntIXT_EEibiiii(ptr addrspace(4) sret(i51) align 8, ptr byval(i51) align 8, i1 zeroext, i32, i32, i32, i32) diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_arbitrary_precision_integers.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_arbitrary_precision_integers.ll index 41d4b58ed1157..9ea8a5709154c 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_arbitrary_precision_integers.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_arbitrary_precision_integers.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers %s -o - | FileCheck %s define i6 @getConstantI6() { ret i6 2 @@ -9,8 +9,8 @@ define i13 @getConstantI13() { } ;; Capabilities: -; CHECK-DAG: OpExtension "SPV_INTEL_arbitrary_precision_integers" -; CHECK-DAG: OpCapability ArbitraryPrecisionIntegersINTEL +; CHECK-DAG: OpExtension "SPV_ALTERA_arbitrary_precision_integers" +; CHECK-DAG: OpCapability ArbitraryPrecisionIntegersALTERA ; CHECK-NOT: DAG-FENCE diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_int4/negative.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_int4/negative.ll index 4d5fa52a166f2..fdb2776a7e2ec 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_int4/negative.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_int4/negative.ll @@ -1,11 +1,11 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-INT-4 +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-INT-4 ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-INT-8 ; No error would be reported in comparison to Khronos llvm-spirv, because type adjustments to integer size are made ; in case no appropriate extension is enabled. Here we expect that the type is adjusted to 8 bits. -; CHECK-SPIRV: Capability ArbitraryPrecisionIntegersINTEL -; CHECK-SPIRV: Extension "SPV_INTEL_arbitrary_precision_integers" +; CHECK-SPIRV: Capability ArbitraryPrecisionIntegersALTERA +; CHECK-SPIRV: Extension "SPV_ALTERA_arbitrary_precision_integers" ; CHECK-INT-4: %[[#Int4:]] = OpTypeInt 4 0 ; CHECK-INT-8: %[[#Int4:]] = OpTypeInt 8 0 ; CHECK: OpTypeFunction %[[#]] %[[#Int4]] diff --git a/llvm/test/CodeGen/SPIRV/extensions/both-allowed-disallowed-extension-error.ll b/llvm/test/CodeGen/SPIRV/extensions/both-allowed-disallowed-extension-error.ll index fc07cca4dd240..96dca53b8ba59 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/both-allowed-disallowed-extension-error.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/both-allowed-disallowed-extension-error.ll @@ -1,6 +1,6 @@ -; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers,-SPV_INTEL_arbitrary_precision_integers %s -o %t.spvt 2>&1 | FileCheck %s -; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=-SPV_INTEL_arbitrary_precision_integers,+SPV_INTEL_arbitrary_precision_integers %s -o %t.spvt 2>&1 | FileCheck %s -; CHECK: Extension cannot be allowed and disallowed at the same time: SPV_INTEL_arbitrary_precision_integers +; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers,-SPV_ALTERA_arbitrary_precision_integers %s -o %t.spvt 2>&1 | FileCheck %s +; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=-SPV_ALTERA_arbitrary_precision_integers,+SPV_ALTERA_arbitrary_precision_integers %s -o %t.spvt 2>&1 | FileCheck %s +; CHECK: Extension cannot be allowed and disallowed at the same time: SPV_ALTERA_arbitrary_precision_integers define i8 @foo() { ret i8 2 diff --git a/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions-but-one.ll b/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions-but-one.ll index face4a9f5e615..5ddfc85702540 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions-but-one.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions-but-one.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=all,-SPV_INTEL_arbitrary_precision_integers %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=all,-SPV_ALTERA_arbitrary_precision_integers %s -o - | FileCheck %s ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=KHR %s -o - | FileCheck %s ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=khr %s -o - | FileCheck %s @@ -10,7 +10,7 @@ define i6 @foo() { ret i6 2 } -; CHECK-NOT: OpExtension "SPV_INTEL_arbitrary_precision_integers" +; CHECK-NOT: OpExtension "SPV_ALTERA_arbitrary_precision_integers" ; CHECK-DAG: OpExtension "SPV_KHR_bit_instructions" declare i32 @llvm.bitreverse.i32(i32) diff --git a/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions.ll b/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions.ll index 15905dd1894e2..80b094f462a70 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/enable-all-extensions.ll @@ -5,4 +5,4 @@ define i6 @getConstantI6() { ret i6 2 } -; CHECK: OpExtension "SPV_INTEL_arbitrary_precision_integers" +; CHECK: OpExtension "SPV_ALTERA_arbitrary_precision_integers" diff --git a/llvm/test/CodeGen/SPIRV/extensions/unused-but-allowed-SPV_INTEL_arbitrary_precision_integers.ll b/llvm/test/CodeGen/SPIRV/extensions/unused-but-allowed-SPV_INTEL_arbitrary_precision_integers.ll index 2c1257471d159..cc3f1ae29a681 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/unused-but-allowed-SPV_INTEL_arbitrary_precision_integers.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/unused-but-allowed-SPV_INTEL_arbitrary_precision_integers.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers %s -o - | FileCheck %s define i8 @getConstantI8() { ret i8 2 @@ -15,5 +15,5 @@ define i64 @getConstantI64() { } ;; Capabilities: -; CHECK-NOT: OpExtension "SPV_INTEL_arbitrary_precision_integers" -; CHECK-NOT: OpCapability ArbitraryPrecisionIntegersINTEL +; CHECK-NOT: OpExtension "SPV_ALTERA_arbitrary_precision_integers" +; CHECK-NOT: OpCapability ArbitraryPrecisionIntegersALTERA diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll index 18856147896bb..d4b1592a044bc 100644 --- a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/bitreverse_small_type.ll @@ -1,11 +1,11 @@ ;; Check that llvm.bitreverse.* intrinsics are lowered for ;; 2/4-bit scalar and vector types. -; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers,+SPV_KHR_bit_instructions %s -o - | FileCheck %s -; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers,+SPV_KHR_bit_instructions %s -o - -filetype=obj | spirv-val %} +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers,+SPV_KHR_bit_instructions %s -o - | FileCheck %s +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers,+SPV_KHR_bit_instructions %s -o - -filetype=obj | spirv-val %} -; CHECK: OpCapability ArbitraryPrecisionIntegersINTEL -; CHECK: OpExtension "SPV_INTEL_arbitrary_precision_integers" +; CHECK: OpCapability ArbitraryPrecisionIntegersALTERA +; CHECK: OpExtension "SPV_ALTERA_arbitrary_precision_integers" ; CHECK-DAG: %[[#I4:]] = OpTypeInt 4 0 ; CHECK-DAG: %[[#I2:]] = OpTypeInt 2 0 diff --git a/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll b/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll index 79c2824c3dde1..16cd00b7180a7 100644 --- a/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll +++ b/llvm/test/CodeGen/SPIRV/trunc-nonstd-bitwidth.ll @@ -1,12 +1,12 @@ ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOEXT ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOEXT ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_INTEL_arbitrary_precision_integers -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT ; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled ; XFAIL: expensive_checks diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td index 7b363fac6e627..ecbbf39a534e1 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td @@ -792,7 +792,7 @@ def SPIRV_C_FPGABufferLocationINTEL : I32EnumAttrCase<"FPGAB Extension<[SPV_INTEL_fpga_buffer_location]> ]; } -def SPIRV_C_ArbitraryPrecisionFixedPointINTEL : I32EnumAttrCase<"ArbitraryPrecisionFixedPointINTEL", 5922> { +def SPIRV_C_ArbitraryPrecisionFixedPointINTEL : I32EnumAttrCase<"ArbitraryPrecisionFixedPointINTEL", 5922> { list availability = [ Extension<[SPV_INTEL_arbitrary_precision_fixed_point]> ]; From 6369279a0c4ca1a008241f171657c1db83cfe026 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 30 Nov 2025 21:56:47 -0500 Subject: [PATCH 8/9] Revert "Revert "LangRef: Clarify llvm.minnum and llvm.maxnum about sNaN and signed zero (#112852)"" (#170067) Reverts llvm/llvm-project#168838 Justification is confused and this did not receive adequate discussion, particularly during a holiday week --- llvm/docs/LangRef.rst | 110 +++++++++++++------------ llvm/include/llvm/CodeGen/ISDOpcodes.h | 20 +++-- 2 files changed, 71 insertions(+), 59 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index a57351f9598e2..02865f8a29c67 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -17298,9 +17298,8 @@ LLVM Implementation: """""""""""""""""""" LLVM implements all ISO C flavors as listed in this table, except in the -default floating-point environment exceptions are ignored and return value -is non-deterministic if one or both inputs are sNaN. The constrained -versions of the intrinsics respect the exception behavior and sNaN. +default floating-point environment exceptions are ignored. The constrained +versions of the intrinsics respect the exception behavior. .. list-table:: :header-rows: 1 @@ -17332,7 +17331,7 @@ versions of the intrinsics respect the exception behavior and sNaN. - qNaN, invalid exception * - ``+0.0 vs -0.0`` - - either one + - +0.0(max)/-0.0(min) - +0.0(max)/-0.0(min) - +0.0(max)/-0.0(min) @@ -17376,22 +17375,30 @@ type. Semantics: """""""""" +Follows the semantics of minNum in IEEE-754-2008, except that -0.0 < +0.0 for the purposes +of this intrinsic. As for signaling NaNs, per the minNum semantics, if either operand is sNaN, +the result is qNaN. This matches the recommended behavior for the libm +function ``fmin``, although not all implementations have implemented these recommended behaviors. + +If either operand is a qNaN, returns the other non-NaN operand. Returns NaN only if both operands are +NaN or if either operand is sNaN. Note that arithmetic on an sNaN doesn't consistently produce a qNaN, +so arithmetic feeding into a minnum can produce inconsistent results. For example, +``minnum(fadd(sNaN, -0.0), 1.0)`` can produce qNaN or 1.0 depending on whether ``fadd`` is folded. -Follows the IEEE-754-2008 semantics for minNum, except for handling of -signaling NaNs. This matches the behavior of libm's fmin. +IEEE-754-2008 defines minNum, and it was removed in IEEE-754-2019. As the replacement, IEEE-754-2019 +defines :ref:`minimumNumber `. -If either operand is a NaN, returns the other non-NaN operand. Returns -NaN only if both operands are NaN. If the operands compare equal, -returns either one of the operands. For example, this means that -fmin(+0.0, -0.0) non-deterministically returns either operand (-0.0 -or 0.0). +If the intrinsic is marked with the nsz attribute, then the effect is as in the definition in C +and IEEE-754-2008: the result of ``minnum(-0.0, +0.0)`` may be either -0.0 or +0.0. -Unlike the IEEE-754-2008 behavior, this does not distinguish between -signaling and quiet NaN inputs. If a target's implementation follows -the standard and returns a quiet NaN if either input is a signaling -NaN, the intrinsic lowering is responsible for quieting the inputs to -correctly return the non-NaN input (e.g. by using the equivalent of -``llvm.canonicalize``). +Some architectures, such as ARMv8 (FMINNM), LoongArch (fmin), MIPSr6 (min.fmt), PowerPC/VSX (xsmindp), +have instructions that match these semantics exactly; thus it is quite simple for these architectures. +Some architectures have similar ones while they are not exact equivalent. Such as x86 implements ``MINPS``, +which implements the semantics of C code ``a`. -Unlike the IEEE-754-2008 behavior, this does not distinguish between -signaling and quiet NaN inputs. If a target's implementation follows -the standard and returns a quiet NaN if either input is a signaling -NaN, the intrinsic lowering is responsible for quieting the inputs to -correctly return the non-NaN input (e.g. by using the equivalent of -``llvm.canonicalize``). +If the intrinsic is marked with the nsz attribute, then the effect is as in the definition in C +and IEEE-754-2008: the result of maxnum(-0.0, +0.0) may be either -0.0 or +0.0. + +Some architectures, such as ARMv8 (FMAXNM), LoongArch (fmax), MIPSr6 (max.fmt), PowerPC/VSX (xsmaxdp), +have instructions that match these semantics exactly; thus it is quite simple for these architectures. +Some architectures have similar ones while they are not exact equivalent. Such as x86 implements ``MAXPS``, +which implements the semantics of C code ``a>b?a:b``: NUM vs qNaN always return qNaN. ``MAXPS`` can be used +if ``nsz`` and ``nnan`` are given. + +For existing libc implementations, the behaviors of fmin may be quite different on sNaN and signed zero behaviors, +even in the same release of a single libm implementation. .. _i_minimum: @@ -20326,12 +20342,8 @@ The '``llvm.vector.reduce.fmax.*``' intrinsics do a floating-point matches the element-type of the vector input. This instruction has the same comparison semantics as the '``llvm.maxnum.*``' -intrinsic. That is, the result will always be a number unless all elements of -the vector are NaN. For a vector with maximum element magnitude 0.0 and -containing both +0.0 and -0.0 elements, the sign of the result is unspecified. - -If the intrinsic call has the ``nnan`` fast-math flag, then the operation can -assume that NaNs are not present in the input vector. +intrinsic. If the intrinsic call has the ``nnan`` fast-math flag, then the +operation can assume that NaNs are not present in the input vector. Arguments: """""""""" @@ -20359,12 +20371,8 @@ The '``llvm.vector.reduce.fmin.*``' intrinsics do a floating-point matches the element-type of the vector input. This instruction has the same comparison semantics as the '``llvm.minnum.*``' -intrinsic. That is, the result will always be a number unless all elements of -the vector are NaN. For a vector with minimum element magnitude 0.0 and -containing both +0.0 and -0.0 elements, the sign of the result is unspecified. - -If the intrinsic call has the ``nnan`` fast-math flag, then the operation can -assume that NaNs are not present in the input vector. +intrinsic. If the intrinsic call has the ``nnan`` fast-math flag, then the +operation can assume that NaNs are not present in the input vector. Arguments: """""""""" @@ -22751,7 +22759,7 @@ This is an overloaded intrinsic. Overview: """"""""" -Predicated floating-point IEEE-754 minNum of two vectors of floating-point values. +Predicated floating-point IEEE-754-2008 minNum of two vectors of floating-point values. Arguments: @@ -22800,7 +22808,7 @@ This is an overloaded intrinsic. Overview: """"""""" -Predicated floating-point IEEE-754 maxNum of two vectors of floating-point values. +Predicated floating-point IEEE-754-2008 maxNum of two vectors of floating-point values. Arguments: @@ -24099,10 +24107,7 @@ result type. If only ``nnan`` is set then the neutral value is ``-Infinity``. This instruction has the same comparison semantics as the :ref:`llvm.vector.reduce.fmax ` intrinsic (and thus the -'``llvm.maxnum.*``' intrinsic). That is, the result will always be a number -unless all elements of the vector and the starting value are ``NaN``. For a -vector with maximum element magnitude ``0.0`` and containing both ``+0.0`` and -``-0.0`` elements, the sign of the result is unspecified. +'``llvm.maxnum.*``' intrinsic). To ignore the start value, the neutral value can be used. @@ -24169,10 +24174,7 @@ result type. If only ``nnan`` is set then the neutral value is ``+Infinity``. This instruction has the same comparison semantics as the :ref:`llvm.vector.reduce.fmin ` intrinsic (and thus the -'``llvm.minnum.*``' intrinsic). That is, the result will always be a number -unless all elements of the vector and the starting value are ``NaN``. For a -vector with maximum element magnitude ``0.0`` and containing both ``+0.0`` and -``-0.0`` elements, the sign of the result is unspecified. +'``llvm.minnum.*``' intrinsic). To ignore the start value, the neutral value can be used. @@ -29044,7 +29046,7 @@ The third argument specifies the exception behavior as described above. Semantics: """""""""" -This function follows the IEEE-754 semantics for maxNum. +This function follows the IEEE-754-2008 semantics for maxNum. '``llvm.experimental.constrained.minnum``' Intrinsic @@ -29076,7 +29078,7 @@ The third argument specifies the exception behavior as described above. Semantics: """""""""" -This function follows the IEEE-754 semantics for minNum. +This function follows the IEEE-754-2008 semantics for minNum. '``llvm.experimental.constrained.maximum``' Intrinsic diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index a9fdf803a5511..b32f3dacbb3a4 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1048,13 +1048,20 @@ enum NodeType { LRINT, LLRINT, - /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two - /// values. + /// FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, + /// following IEEE-754 definitions except for signed zero behavior. /// - /// In the case where a single input is a NaN (either signaling or quiet), - /// the non-NaN input is returned. + /// If one input is a signaling NaN, returns a quiet NaN. This matches + /// IEEE-754 2008's minNum/maxNum behavior for signaling NaNs (which differs + /// from 2019). /// - /// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0. + /// These treat -0 as ordered less than +0, matching the behavior of IEEE-754 + /// 2019's minimumNumber/maximumNumber. + /// + /// Note that that arithmetic on an sNaN doesn't consistently produce a qNaN, + /// so arithmetic feeding into a minnum/maxnum can produce inconsistent + /// results. FMAXIMUN/FMINIMUM or FMAXIMUMNUM/FMINIMUMNUM may be better choice + /// for non-distinction of sNaN/qNaN handling. FMINNUM, FMAXNUM, @@ -1068,6 +1075,9 @@ enum NodeType { /// /// These treat -0 as ordered less than +0, matching the behavior of IEEE-754 /// 2019's minimumNumber/maximumNumber. + /// + /// Deprecated, and will be removed soon, as FMINNUM/FMAXNUM have the same + /// semantics now. FMINNUM_IEEE, FMAXNUM_IEEE, From e2181400d70857bc5a212a4053d5d7940c84acaf Mon Sep 17 00:00:00 2001 From: Brandon Wu Date: Mon, 1 Dec 2025 11:03:50 +0800 Subject: [PATCH 9/9] [RISCV][llvm] Correct shamt in P extension EXTRACT_VECTOR_ELT lowering (#169823) During operation legalization, element type should have been turn into XLenVT which makes the SHL a no-op. We need to use exact vector element type instead. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +- llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 32 +++++++++++++++++++++ llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 12 ++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d2e4bb4199a7a..a6212f5cc84be 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10743,7 +10743,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, VecVT != MVT::v4i8 && VecVT != MVT::v2i32) return SDValue(); SDValue Extracted = DAG.getBitcast(XLenVT, Vec); - unsigned ElemWidth = EltVT.getSizeInBits(); + unsigned ElemWidth = VecVT.getVectorElementType().getSizeInBits(); SDValue Shamt = DAG.getNode(ISD::MUL, DL, XLenVT, Idx, DAG.getConstant(ElemWidth, DL, XLenVT)); return DAG.getNode(ISD::SRL, DL, XLenVT, Extracted, Shamt); diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll index d4ea9e6c3def0..f803f6aa09652 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll @@ -484,6 +484,25 @@ define void @test_extract_vector_16(ptr %ret_ptr, ptr %a_ptr) { ret void } +define void @test_extract_vector_16_elem1(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-RV32-LABEL: test_extract_vector_16_elem1: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: lhu a1, 2(a1) +; CHECK-RV32-NEXT: sh a1, 0(a0) +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_extract_vector_16_elem1: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: lw a1, 0(a1) +; CHECK-RV64-NEXT: srli a1, a1, 16 +; CHECK-RV64-NEXT: sh a1, 0(a0) +; CHECK-RV64-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %extracted = extractelement <2 x i16> %a, i32 1 + store i16 %extracted, ptr %ret_ptr + ret void +} + define void @test_extract_vector_8(ptr %ret_ptr, ptr %a_ptr) { ; CHECK-LABEL: test_extract_vector_8: ; CHECK: # %bb.0: @@ -496,6 +515,19 @@ define void @test_extract_vector_8(ptr %ret_ptr, ptr %a_ptr) { ret void } +define void @test_extract_vector_8_elem1(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_extract_vector_8_elem1: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: srli a1, a1, 8 +; CHECK-NEXT: sb a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %extracted = extractelement <4 x i8> %a, i32 1 + store i8 %extracted, ptr %ret_ptr + ret void +} + ; Test for splat define void @test_non_const_splat_i8(ptr %ret_ptr, ptr %a_ptr, i8 %elt) { ; CHECK-LABEL: test_non_const_splat_i8: diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll index b39b807d43154..9b021df8dd452 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll @@ -495,6 +495,18 @@ define void @test_extract_vector_32(ptr %ret_ptr, ptr %a_ptr) { ret void } +define void @test_extract_vector_32_elem1(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_extract_vector_32_elem1: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 4(a1) +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %extracted = extractelement <2 x i32> %a, i32 1 + store i32 %extracted, ptr %ret_ptr + ret void +} + ; Test basic add/sub operations for v2i32 (RV64 only) define void @test_padd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { ; CHECK-LABEL: test_padd_w: