diff --git a/.github/workflows/build-ci-container-windows.yml b/.github/workflows/build-ci-container-windows.yml index b6c46b70030ab..3996948bb44e0 100644 --- a/.github/workflows/build-ci-container-windows.yml +++ b/.github/workflows/build-ci-container-windows.yml @@ -56,7 +56,7 @@ jobs: - build-ci-container-windows permissions: packages: write - runs-on: windows-2022 + runs-on: ubuntu-24.04 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: @@ -66,8 +66,12 @@ jobs: name: container - name: Push Container run: | - docker load -i ${{ needs.build-ci-container-windows.outputs.container-filename }} - docker tag ${{ needs.build-ci-container-windows.outputs.container-name-tag }} ${{ needs.build-ci-container-windows.outputs.container-name }}:latest - docker login -u ${{ github.actor }} -p $env:GITHUB_TOKEN ghcr.io - docker push ${{ needs.build-ci-container-windows.outputs.container-name-tag }} - docker push ${{ needs.build-ci-container-windows.outputs.container-name }}:latest + sudo apt-get update + sudo apt-get install -y skopeo + skopeo login -u ${{ github.actor }} -p ${{ secrets.GITHUB_TOKEN }} ghcr.io + skopeo copy docker-archive:${{ needs.build-ci-container-windows.outputs.container-filename }} \ + --dest-compress-format zstd \ + docker://${{ needs.build-ci-container-windows.outputs.container-name-tag }} + skopeo copy docker-archive:${{ needs.build-ci-container-windows.outputs.container-filename }} \ + --dest-compress-format zstd \ + docker://${{ needs.build-ci-container-windows.outputs.container-name }}:latest diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index fece3843c6470..67f2db2d8bb8d 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -1232,7 +1232,10 @@ void WhitespaceManager::alignArrayInitializers() { bool FoundComplete = false; for (unsigned InsideIndex = ChangeIndex + 1; InsideIndex < ChangeEnd; ++InsideIndex) { - if (Changes[InsideIndex].Tok == C.Tok->MatchingParen) { + const auto *Tok = Changes[InsideIndex].Tok; + if (Tok->is(tok::pp_define)) + break; + if (Tok == C.Tok->MatchingParen) { alignArrayInitializers(ChangeIndex, InsideIndex + 1); ChangeIndex = InsideIndex + 1; FoundComplete = true; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 24235b966399d..c9446fa3ff317 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -22272,6 +22272,19 @@ TEST_F(FormatTest, CatchAlignArrayOfStructuresLeftAlignment) { "});", Style); + verifyNoCrash( + "PANEL_Ic PANEL_ic[PANEL_IC_NUMBER] =\n" + " {\n" + " {PIC(0), PIC(0), PIC(99), PIC(81), 0}, // Backbox\n" + " {PIC(1), PIC(83), PIC(191), PIC(137), 0}, // AK47\n" + "\n" + "#define PICALL1(a, b, c, d) \\\n" + " { PIC(a), PIC(b), PIC(c), PIC(d), 1 }\n" + "\n" + " PICALL1(1, 1, 75, 50),\n" + "};", + Style); + Style.AlignEscapedNewlines = FormatStyle::ENAS_DontAlign; verifyFormat("#define FOO \\\n" " int foo[][2] = { \\\n" diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp index f548a8dd0532b..5107c8def3799 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp @@ -111,4 +111,25 @@ bool LoongArchTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const { } } -// TODO: Implement more hooks to provide TTI machinery for LoongArch. +LoongArchTTIImpl::TTI::MemCmpExpansionOptions +LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { + TTI::MemCmpExpansionOptions Options; + + if (!ST->hasUAL()) + return Options; + + Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); + Options.NumLoadsPerBlock = Options.MaxNumLoads; + Options.AllowOverlappingLoads = true; + + // TODO: Support for vectors. + if (ST->is64Bit()) { + Options.LoadSizes = {8, 4, 2, 1}; + Options.AllowedTailExpansions = {3, 5, 6}; + } else { + Options.LoadSizes = {4, 2, 1}; + Options.AllowedTailExpansions = {3}; + } + + return Options; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h index e3f16c7804994..9b479f9dc0dc5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h @@ -55,7 +55,8 @@ class LoongArchTTIImpl : public BasicTTIImplBase { bool shouldExpandReduction(const IntrinsicInst *II) const override; - // TODO: Implement more hooks to provide TTI machinery for LoongArch. + TTI::MemCmpExpansionOptions + enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1977d3372c5f6..a3ccbd8d4a8aa 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -87,6 +87,11 @@ static cl::opt "be combined with a shift"), cl::init(true)); +// TODO: Support more ops +static const unsigned ZvfbfaVPOps[] = {ISD::VP_FNEG, ISD::VP_FABS, + ISD::VP_FCOPYSIGN}; +static const unsigned ZvfbfaOps[] = {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN}; + RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -1208,6 +1213,61 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } }; + // Sets common actions for zvfbfa, some of instructions are supported + // natively so that we don't need to promote them. + const auto SetZvfbfaActions = [&](MVT VT) { + setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); + setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, + Custom); + setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); + setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom); + setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom); + setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, + Custom); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom); + setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS, + ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, + ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE, + ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE, + ISD::VECTOR_COMPRESS}, + VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); + + setOperationAction(ISD::FCOPYSIGN, VT, Legal); + setOperationAction(ZvfbfaVPOps, VT, Custom); + + MVT EltVT = VT.getVectorElementType(); + if (isTypeLegal(EltVT)) + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT, + ISD::EXTRACT_VECTOR_ELT}, + VT, Custom); + else + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, + EltVT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, + ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD, + ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, + ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, + ISD::VP_SCATTER}, + VT, Custom); + setOperationAction(ISD::VP_LOAD_FF, VT, Custom); + + // Expand FP operations that need libcalls. + setOperationAction(FloatingPointLibCallOps, VT, Expand); + + // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal. + if (getLMUL(VT) == RISCVVType::LMUL_8) { + setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom); + setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom); + } else { + MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); + setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT); + setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT); + } + }; + if (Subtarget.hasVInstructionsF16()) { for (MVT VT : F16VecVTs) { if (!isTypeLegal(VT)) @@ -1222,7 +1282,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } } - if (Subtarget.hasVInstructionsBF16Minimal()) { + if (Subtarget.hasVInstructionsBF16()) { + for (MVT VT : BF16VecVTs) { + if (!isTypeLegal(VT)) + continue; + SetZvfbfaActions(VT); + } + } else if (Subtarget.hasVInstructionsBF16Minimal()) { for (MVT VT : BF16VecVTs) { if (!isTypeLegal(VT)) continue; @@ -1501,6 +1567,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // available. setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom); } + if (Subtarget.hasStdExtZvfbfa()) { + setOperationAction(ZvfbfaOps, VT, Custom); + setOperationAction(ZvfbfaVPOps, VT, Custom); + } setOperationAction( {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, Custom); @@ -7245,7 +7315,11 @@ static bool isPromotedOpNeedingSplit(SDValue Op, return (Op.getValueType() == MVT::nxv32f16 && (Subtarget.hasVInstructionsF16Minimal() && !Subtarget.hasVInstructionsF16())) || - Op.getValueType() == MVT::nxv32bf16; + (Op.getValueType() == MVT::nxv32bf16 && + Subtarget.hasVInstructionsBF16Minimal() && + (!Subtarget.hasVInstructionsBF16() || + (!llvm::is_contained(ZvfbfaOps, Op.getOpcode()) && + !llvm::is_contained(ZvfbfaVPOps, Op.getOpcode())))); } static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td index b9c5b75983b1f..ffb2ac0756da4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td @@ -701,5 +701,86 @@ let Predicates = [HasStdExtZvfbfa] in { FRM_DYN, fvti.AVL, fvti.Log2SEW, TA_MA)>; } -} + + foreach vti = AllBF16Vectors in { + // 13.12. Vector Floating-Point Sign-Injection Instructions + def : Pat<(fabs (vti.Vector vti.RegClass:$rs)), + (!cast("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>; + // Handle fneg with VFSGNJN using the same input for both operands. + def : Pat<(fneg (vti.Vector vti.RegClass:$rs)), + (!cast("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>; + + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2))), + (!cast("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))), + (!cast("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; + + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector (fneg vti.RegClass:$rs2)))), + (!cast("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))), + (!cast("PseudoVFSGNJN_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; + + // 13.12. Vector Floating-Point Sign-Injection Instructions + def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, + vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TA_MA)>; + // Handle fneg with VFSGNJN using the same input for both operands. + def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, + vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TA_MA)>; + + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2), + vti.RegClass:$passthru, + (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") + vti.RegClass:$passthru, vti.RegClass:$rs1, + vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TAIL_AGNOSTIC)>; + + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (riscv_fneg_vl vti.RegClass:$rs2, + (vti.Mask true_mask), + VLOpFrag), + srcvalue, + (vti.Mask true_mask), + VLOpFrag), + (!cast("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, + vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>; + + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (SplatFPOp vti.ScalarRegClass:$rs2), + vti.RegClass:$passthru, + (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") + vti.RegClass:$passthru, vti.RegClass:$rs1, + vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TAIL_AGNOSTIC)>; + } + } } // Predicates = [HasStdExtZvfbfa] diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll new file mode 100644 index 0000000000000..36670fa801b36 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll @@ -0,0 +1,2239 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-UAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-UAL +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-NUAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-NUAL + +declare signext i32 @bcmp(ptr, ptr, iGRLen) nounwind readonly +declare signext i32 @memcmp(ptr, ptr, iGRLen) nounwind readonly + +define signext i32 @bcmp_size_0(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_0: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a2, $zero +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_0: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $a2, $zero +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_31: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 31 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 32 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_63: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 63 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 64 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind optsize { +; LA32-LABEL: bcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %bcmp +} + +define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp eq i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %bcmp, 0 + ret i1 %ret +} + +define signext i32 @memcmp_size_0(ptr %s1, ptr %s2) nounwind optsize { +; CHECK-LABEL: memcmp_size_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: and $a1, $a1, $a2 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.h $a0, $a0, 0 +; LA64-UAL-NEXT: ld.h $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2h $a0, $a0 +; LA64-UAL-NEXT: revb.2h $a1, $a1 +; LA64-UAL-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-UAL-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: lu12i.w $a4, 15 +; LA32-UAL-NEXT: ori $a4, $a4, 3840 +; LA32-UAL-NEXT: and $a5, $a0, $a4 +; LA32-UAL-NEXT: or $a2, $a5, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: and $a2, $a1, $a4 +; LA32-UAL-NEXT: or $a2, $a2, $a3 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a2, $a1 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 16 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 16 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: srli.w $a4, $a2, 8 +; LA32-UAL-NEXT: lu12i.w $a5, 15 +; LA32-UAL-NEXT: ori $a5, $a5, 3840 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a2, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a6, $a2, $a5 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a2, $a2, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a6 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: srli.w $a4, $a3, 8 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a5, $a3, $a5 +; LA32-UAL-NEXT: slli.w $a5, $a5, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: bne $a2, $a3, .LBB26_2 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB26_2: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a2, $a3 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a6, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a7, $a3, $a6 +; LA32-UAL-NEXT: slli.w $a7, $a7, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a7 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a6, $a4, $a6 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB27_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a3, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a3, $a0, $a2 +; LA32-UAL-NEXT: and $a4, $a1, $a2 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB27_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB27_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_7(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB28_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB28_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB28_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a2, $a2 +; LA64-UAL-NEXT: addi.w $a4, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a3 +; LA64-UAL-NEXT: addi.w $a5, $a3, 0 +; LA64-UAL-NEXT: bne $a4, $a5, .LBB28_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: revb.2w $a2, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a3, 0 +; LA64-UAL-NEXT: bne $a0, $a1, .LBB28_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB28_3: # %res_block +; LA64-UAL-NEXT: addi.w $a0, $a3, 0 +; LA64-UAL-NEXT: addi.w $a1, $a2, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB29_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB30_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB30_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB30_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB30_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_16(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB31_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB31_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB31_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB31_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_31: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 31 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB32_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_32(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 32 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB33_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_63(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_63: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 63 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_64(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 64 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_127(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_128(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind optsize { +; LA32-LABEL: memcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %memcmp +} + +define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: ld.bu $a2, $a1, 1 +; LA32-NUAL-NEXT: ld.bu $a3, $a1, 0 +; LA32-NUAL-NEXT: ld.bu $a4, $a1, 2 +; LA32-NUAL-NEXT: ld.bu $a1, $a1, 3 +; LA32-NUAL-NEXT: slli.w $a2, $a2, 8 +; LA32-NUAL-NEXT: or $a2, $a2, $a3 +; LA32-NUAL-NEXT: slli.w $a3, $a4, 16 +; LA32-NUAL-NEXT: slli.w $a1, $a1, 24 +; LA32-NUAL-NEXT: or $a1, $a1, $a3 +; LA32-NUAL-NEXT: or $a1, $a1, $a2 +; LA32-NUAL-NEXT: ld.bu $a2, $a0, 1 +; LA32-NUAL-NEXT: ld.bu $a3, $a0, 0 +; LA32-NUAL-NEXT: ld.bu $a4, $a0, 2 +; LA32-NUAL-NEXT: ld.bu $a0, $a0, 3 +; LA32-NUAL-NEXT: slli.w $a2, $a2, 8 +; LA32-NUAL-NEXT: or $a2, $a2, $a3 +; LA32-NUAL-NEXT: slli.w $a3, $a4, 16 +; LA32-NUAL-NEXT: slli.w $a0, $a0, 24 +; LA32-NUAL-NEXT: or $a0, $a0, $a3 +; LA32-NUAL-NEXT: or $a0, $a0, $a2 +; LA32-NUAL-NEXT: xor $a0, $a0, $a1 +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: ld.bu $a2, $a1, 1 +; LA64-NUAL-NEXT: ld.bu $a3, $a1, 0 +; LA64-NUAL-NEXT: ld.bu $a4, $a1, 2 +; LA64-NUAL-NEXT: ld.b $a1, $a1, 3 +; LA64-NUAL-NEXT: slli.d $a2, $a2, 8 +; LA64-NUAL-NEXT: or $a2, $a2, $a3 +; LA64-NUAL-NEXT: slli.d $a3, $a4, 16 +; LA64-NUAL-NEXT: slli.d $a1, $a1, 24 +; LA64-NUAL-NEXT: or $a1, $a1, $a3 +; LA64-NUAL-NEXT: or $a1, $a1, $a2 +; LA64-NUAL-NEXT: ld.bu $a2, $a0, 1 +; LA64-NUAL-NEXT: ld.bu $a3, $a0, 0 +; LA64-NUAL-NEXT: ld.bu $a4, $a0, 2 +; LA64-NUAL-NEXT: ld.b $a0, $a0, 3 +; LA64-NUAL-NEXT: slli.d $a2, $a2, 8 +; LA64-NUAL-NEXT: or $a2, $a2, $a3 +; LA64-NUAL-NEXT: slli.d $a3, $a4, 16 +; LA64-NUAL-NEXT: slli.d $a0, $a0, 24 +; LA64-NUAL-NEXT: or $a0, $a0, $a3 +; LA64-NUAL-NEXT: or $a0, $a0, $a2 +; LA64-NUAL-NEXT: xor $a0, $a0, $a1 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp eq i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %memcmp, 0 + ret i1 %ret +} diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll new file mode 100644 index 0000000000000..c1bf850baa8c3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll @@ -0,0 +1,3106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-UAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-UAL +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-NUAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-NUAL + +declare signext i32 @bcmp(ptr, ptr, iGRLen) nounwind readonly +declare signext i32 @memcmp(ptr, ptr, iGRLen) nounwind readonly + +define signext i32 @bcmp_size_0(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_0: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a2, $zero +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_0: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $a2, $zero +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_31: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $t0, $a0, 12 +; LA32-UAL-NEXT: ld.w $t1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a5, $t0, $t1 +; LA32-UAL-NEXT: ld.w $a6, $a0, 16 +; LA32-UAL-NEXT: ld.w $a7, $a1, 16 +; LA32-UAL-NEXT: ld.w $t0, $a0, 20 +; LA32-UAL-NEXT: ld.w $t1, $a1, 20 +; LA32-UAL-NEXT: ld.w $t2, $a0, 24 +; LA32-UAL-NEXT: ld.w $t3, $a1, 24 +; LA32-UAL-NEXT: ld.w $a0, $a0, 27 +; LA32-UAL-NEXT: ld.w $a1, $a1, 27 +; LA32-UAL-NEXT: xor $a6, $a6, $a7 +; LA32-UAL-NEXT: xor $a7, $t0, $t1 +; LA32-UAL-NEXT: xor $t0, $t2, $t3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a2, $a4, $a5 +; LA32-UAL-NEXT: or $a3, $a6, $a7 +; LA32-UAL-NEXT: or $a0, $t0, $a0 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a0, $a3, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_31: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 31 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_32: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $t0, $a0, 12 +; LA32-UAL-NEXT: ld.w $t1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a5, $t0, $t1 +; LA32-UAL-NEXT: ld.w $a6, $a0, 16 +; LA32-UAL-NEXT: ld.w $a7, $a1, 16 +; LA32-UAL-NEXT: ld.w $t0, $a0, 20 +; LA32-UAL-NEXT: ld.w $t1, $a1, 20 +; LA32-UAL-NEXT: ld.w $t2, $a0, 24 +; LA32-UAL-NEXT: ld.w $t3, $a1, 24 +; LA32-UAL-NEXT: ld.w $a0, $a0, 28 +; LA32-UAL-NEXT: ld.w $a1, $a1, 28 +; LA32-UAL-NEXT: xor $a6, $a6, $a7 +; LA32-UAL-NEXT: xor $a7, $t0, $t1 +; LA32-UAL-NEXT: xor $t0, $t2, $t3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a2, $a4, $a5 +; LA32-UAL-NEXT: or $a3, $a6, $a7 +; LA32-UAL-NEXT: or $a0, $t0, $a0 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a0, $a3, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_32: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 32 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_63: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $t0, $a0, 24 +; LA64-UAL-NEXT: ld.d $t1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a5, $t0, $t1 +; LA64-UAL-NEXT: ld.d $a6, $a0, 32 +; LA64-UAL-NEXT: ld.d $a7, $a1, 32 +; LA64-UAL-NEXT: ld.d $t0, $a0, 40 +; LA64-UAL-NEXT: ld.d $t1, $a1, 40 +; LA64-UAL-NEXT: ld.d $t2, $a0, 48 +; LA64-UAL-NEXT: ld.d $t3, $a1, 48 +; LA64-UAL-NEXT: ld.d $a0, $a0, 55 +; LA64-UAL-NEXT: ld.d $a1, $a1, 55 +; LA64-UAL-NEXT: xor $a6, $a6, $a7 +; LA64-UAL-NEXT: xor $a7, $t0, $t1 +; LA64-UAL-NEXT: xor $t0, $t2, $t3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a2, $a4, $a5 +; LA64-UAL-NEXT: or $a3, $a6, $a7 +; LA64-UAL-NEXT: or $a0, $t0, $a0 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: or $a0, $a3, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_63: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 63 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_64: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $t0, $a0, 24 +; LA64-UAL-NEXT: ld.d $t1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a5, $t0, $t1 +; LA64-UAL-NEXT: ld.d $a6, $a0, 32 +; LA64-UAL-NEXT: ld.d $a7, $a1, 32 +; LA64-UAL-NEXT: ld.d $t0, $a0, 40 +; LA64-UAL-NEXT: ld.d $t1, $a1, 40 +; LA64-UAL-NEXT: ld.d $t2, $a0, 48 +; LA64-UAL-NEXT: ld.d $t3, $a1, 48 +; LA64-UAL-NEXT: ld.d $a0, $a0, 56 +; LA64-UAL-NEXT: ld.d $a1, $a1, 56 +; LA64-UAL-NEXT: xor $a6, $a6, $a7 +; LA64-UAL-NEXT: xor $a7, $t0, $t1 +; LA64-UAL-NEXT: xor $t0, $t2, $t3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a2, $a4, $a5 +; LA64-UAL-NEXT: or $a3, $a6, $a7 +; LA64-UAL-NEXT: or $a0, $t0, $a0 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: or $a0, $a3, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_64: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 64 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind { +; LA32-LABEL: bcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %bcmp +} + +define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) + %ret = icmp eq i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_le_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_le_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: slti $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_le_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: slti $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_le_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: slti $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_le_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %bcmp, 1 + ret i1 %ret +} + +define i1 @bcmp_ge_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_ge_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ori $a0, $zero, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_ge_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ori $a0, $zero, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_ge_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA32-NUAL-NEXT: slt $a0, $a1, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_ge_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA64-NUAL-NEXT: slt $a0, $a1, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %bcmp, -1 + ret i1 %ret +} + +define signext i32 @memcmp_size_0(ptr %s1, ptr %s2) nounwind { +; CHECK-LABEL: memcmp_size_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: and $a1, $a1, $a2 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.h $a0, $a0, 0 +; LA64-UAL-NEXT: ld.h $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2h $a0, $a0 +; LA64-UAL-NEXT: revb.2h $a1, $a1 +; LA64-UAL-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-UAL-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: lu12i.w $a4, 15 +; LA32-UAL-NEXT: ori $a4, $a4, 3840 +; LA32-UAL-NEXT: and $a5, $a0, $a4 +; LA32-UAL-NEXT: or $a2, $a5, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: and $a2, $a1, $a4 +; LA32-UAL-NEXT: or $a2, $a2, $a3 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a2, $a1 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 16 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 16 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: srli.w $a4, $a2, 8 +; LA32-UAL-NEXT: lu12i.w $a5, 15 +; LA32-UAL-NEXT: ori $a5, $a5, 3840 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a2, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a6, $a2, $a5 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a2, $a2, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a6 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: srli.w $a4, $a3, 8 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a5, $a3, $a5 +; LA32-UAL-NEXT: slli.w $a5, $a5, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: bne $a2, $a3, .LBB28_2 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB28_2: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a2, $a3 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a6, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a7, $a3, $a6 +; LA32-UAL-NEXT: slli.w $a7, $a7, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a7 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a6, $a4, $a6 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a3, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a3, $a0, $a2 +; LA32-UAL-NEXT: and $a4, $a1, $a2 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB29_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_7(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB30_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a2, $a2 +; LA64-UAL-NEXT: addi.w $a4, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a3 +; LA64-UAL-NEXT: addi.w $a5, $a3, 0 +; LA64-UAL-NEXT: bne $a4, $a5, .LBB30_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: revb.2w $a2, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a3, 0 +; LA64-UAL-NEXT: bne $a0, $a1, .LBB30_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB30_3: # %res_block +; LA64-UAL-NEXT: addi.w $a0, $a3, 0 +; LA64-UAL-NEXT: addi.w $a1, $a2, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB31_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_15(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB32_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB32_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_16(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB33_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB33_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_31(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_31: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a3, $a0, 12 +; LA32-UAL-NEXT: ld.w $a4, $a1, 12 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.4: # %loadbb4 +; LA32-UAL-NEXT: ld.w $a3, $a0, 16 +; LA32-UAL-NEXT: ld.w $a4, $a1, 16 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.5: # %loadbb5 +; LA32-UAL-NEXT: ld.w $a3, $a0, 20 +; LA32-UAL-NEXT: ld.w $a4, $a1, 20 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.6: # %loadbb6 +; LA32-UAL-NEXT: ld.w $a3, $a0, 24 +; LA32-UAL-NEXT: ld.w $a4, $a1, 24 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.7: # %loadbb7 +; LA32-UAL-NEXT: ld.w $a0, $a0, 27 +; LA32-UAL-NEXT: ld.w $a1, $a1, 27 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.8: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB34_9: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB34_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_31: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 31 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_32(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_32: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a3, $a0, 12 +; LA32-UAL-NEXT: ld.w $a4, $a1, 12 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.4: # %loadbb4 +; LA32-UAL-NEXT: ld.w $a3, $a0, 16 +; LA32-UAL-NEXT: ld.w $a4, $a1, 16 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.5: # %loadbb5 +; LA32-UAL-NEXT: ld.w $a3, $a0, 20 +; LA32-UAL-NEXT: ld.w $a4, $a1, 20 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.6: # %loadbb6 +; LA32-UAL-NEXT: ld.w $a3, $a0, 24 +; LA32-UAL-NEXT: ld.w $a4, $a1, 24 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.7: # %loadbb7 +; LA32-UAL-NEXT: ld.w $a0, $a0, 28 +; LA32-UAL-NEXT: ld.w $a1, $a1, 28 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.8: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB35_9: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB35_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_32: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 32 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_63(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_63: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a2, $a0, 24 +; LA64-UAL-NEXT: ld.d $a3, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.4: # %loadbb4 +; LA64-UAL-NEXT: ld.d $a2, $a0, 32 +; LA64-UAL-NEXT: ld.d $a3, $a1, 32 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.5: # %loadbb5 +; LA64-UAL-NEXT: ld.d $a2, $a0, 40 +; LA64-UAL-NEXT: ld.d $a3, $a1, 40 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.6: # %loadbb6 +; LA64-UAL-NEXT: ld.d $a2, $a0, 48 +; LA64-UAL-NEXT: ld.d $a3, $a1, 48 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.7: # %loadbb7 +; LA64-UAL-NEXT: ld.d $a0, $a0, 55 +; LA64-UAL-NEXT: ld.d $a1, $a1, 55 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.8: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB36_9: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_63: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 63 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_64(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_64: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a2, $a0, 24 +; LA64-UAL-NEXT: ld.d $a3, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.4: # %loadbb4 +; LA64-UAL-NEXT: ld.d $a2, $a0, 32 +; LA64-UAL-NEXT: ld.d $a3, $a1, 32 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.5: # %loadbb5 +; LA64-UAL-NEXT: ld.d $a2, $a0, 40 +; LA64-UAL-NEXT: ld.d $a3, $a1, 40 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.6: # %loadbb6 +; LA64-UAL-NEXT: ld.d $a2, $a0, 48 +; LA64-UAL-NEXT: ld.d $a3, $a1, 48 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.7: # %loadbb7 +; LA64-UAL-NEXT: ld.d $a0, $a0, 56 +; LA64-UAL-NEXT: ld.d $a1, $a1, 56 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.8: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB37_9: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_64: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 64 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_127(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_128(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind { +; LA32-LABEL: memcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %memcmp +} + +define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) + %ret = icmp eq i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_lt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_gt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_le_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_le_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: xori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_le_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: xori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_le_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: slti $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_le_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %memcmp, 1 + ret i1 %ret +} + +define i1 @memcmp_ge_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_ge_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a0, $a1 +; LA32-UAL-NEXT: xori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_ge_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a0, $a1 +; LA64-UAL-NEXT: xori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_ge_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA32-NUAL-NEXT: slt $a0, $a1, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_ge_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA64-NUAL-NEXT: slt $a0, $a1, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %memcmp, -1 + ret i1 %ret +} diff --git a/llvm/test/CodeGen/LoongArch/memcmp.ll b/llvm/test/CodeGen/LoongArch/memcmp.ll index c4aaf9a75a852..c3811c0357793 100644 --- a/llvm/test/CodeGen/LoongArch/memcmp.ll +++ b/llvm/test/CodeGen/LoongArch/memcmp.ll @@ -7,15 +7,24 @@ define signext i32 @test1(ptr %buffer1, ptr %buffer2) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -8 -; CHECK-NEXT: ori $a2, $zero, 16 -; CHECK-NEXT: pcaddu18i $ra, %call36(memcmp) -; CHECK-NEXT: jirl $ra, $ra, 0 -; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ld.d $a2, $a0, 0 +; CHECK-NEXT: ld.d $a3, $a1, 0 +; CHECK-NEXT: revb.d $a2, $a2 +; CHECK-NEXT: revb.d $a3, $a3 +; CHECK-NEXT: bne $a2, $a3, .LBB0_3 +; CHECK-NEXT: # %bb.1: # %loadbb1 +; CHECK-NEXT: ld.d $a0, $a0, 8 +; CHECK-NEXT: ld.d $a1, $a1, 8 +; CHECK-NEXT: revb.d $a2, $a0 +; CHECK-NEXT: revb.d $a3, $a1 +; CHECK-NEXT: bne $a2, $a3, .LBB0_3 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_3: # %res_block +; CHECK-NEXT: sltu $a0, $a2, $a3 +; CHECK-NEXT: sub.d $a0, $zero, $a0 +; CHECK-NEXT: ori $a0, $a0, 1 ; CHECK-NEXT: ret entry: %call = call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 16) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll new file mode 100644 index 0000000000000..9cfed6a659c64 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define <2 x bfloat> @copysign_v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs) { +; CHECK-LABEL: copysign_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs) + ret <2 x bfloat> %r +} + +define <4 x bfloat> @copysign_v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs) { +; CHECK-LABEL: copysign_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs) + ret <4 x bfloat> %r +} + +define <8 x bfloat> @copysign_v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs) { +; CHECK-LABEL: copysign_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs) + ret <8 x bfloat> %r +} + +define <16 x bfloat> @copysign_v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs) { +; CHECK-LABEL: copysign_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v10 +; CHECK-NEXT: ret + %r = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs) + ret <16 x bfloat> %r +} + +define <32 x bfloat> @copysign_v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs) { +; CHECK-LABEL: copysign_v32bf32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v12 +; CHECK-NEXT: ret + %r = call <32 x bfloat> @llvm.copysign.v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs) + ret <32 x bfloat> %r +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll index a2178e1c571da..2455d872ae7f0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll @@ -1,8 +1,172 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFH %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFH %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s + +define <2 x bfloat> @vfsgnj_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 %evl) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfsgnj_vv_v2bf16_unmasked(<2 x bfloat> %va, <2 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> splat (i1 true), i32 %evl) + ret <2 x bfloat> %v +} + +define <4 x bfloat> @vfsgnj_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 %evl) + ret <4 x bfloat> %v +} + +define <4 x bfloat> @vfsgnj_vv_v4bf16_unmasked(<4 x bfloat> %va, <4 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> splat (i1 true), i32 %evl) + ret <4 x bfloat> %v +} + +define <8 x bfloat> @vfsgnj_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 %evl) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @vfsgnj_vv_v8bf16_unmasked(<8 x bfloat> %va, <8 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> splat (i1 true), i32 %evl) + ret <8 x bfloat> %v +} + +define <16 x bfloat> @vfsgnj_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v10, v0.t +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10, v0.t +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.copysign.v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 %evl) + ret <16 x bfloat> %v +} + +define <16 x bfloat> @vfsgnj_vv_v16bf16_unmasked(<16 x bfloat> %va, <16 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.copysign.v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> splat (i1 true), i32 %evl) + ret <16 x bfloat> %v +} declare <2 x half> @llvm.vp.copysign.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) @@ -311,10 +475,10 @@ define <32 x double> @vfsgnj_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: bltu a2, a1, .LBB26_2 +; CHECK-NEXT: bltu a2, a1, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: .LBB34_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 @@ -346,10 +510,10 @@ define <32 x double> @vfsgnj_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: vle64.v v0, (a0) ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: bltu a2, a1, .LBB27_2 +; CHECK-NEXT: bltu a2, a1, .LBB35_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: .LBB35_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v0 ; CHECK-NEXT: addi a0, a2, -16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll new file mode 100644 index 0000000000000..27c00de3c3487 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define <1 x bfloat> @v1bf16(<1 x bfloat> %v) { +; CHECK-LABEL: v1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <1 x bfloat> @llvm.fabs.v1bf16(<1 x bfloat> %v) + ret <1 x bfloat> %r +} + +define <2 x bfloat> @v2bf16(<2 x bfloat> %v) { +; CHECK-LABEL: v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> %v) + ret <2 x bfloat> %r +} + +define <4 x bfloat> @v4bf16(<4 x bfloat> %v) { +; CHECK-LABEL: v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> %v) + ret <4 x bfloat> %r +} + +define <8 x bfloat> @v8bf16(<8 x bfloat> %v) { +; CHECK-LABEL: v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> %v) + ret <8 x bfloat> %r +} + +define <16 x bfloat> @v16bf16(<16 x bfloat> %v) { +; CHECK-LABEL: v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <16 x bfloat> @llvm.fabs.v16bf16(<16 x bfloat> %v) + ret <16 x bfloat> %r +} + +define <32 x bfloat> @v32bf16(<32 x bfloat> %v) { +; CHECK-LABEL: v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <32 x bfloat> @llvm.fabs.v32bf16(<32 x bfloat> %v) + ret <32 x bfloat> %r +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll index 08f486b601328..01bd706ed31f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll @@ -1,12 +1,224 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define <2 x bfloat> @vfabs_vv_v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fabs.v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 %evl) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfabs_vv_v2bf16_unmasked(<2 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fabs.v2bf16(<2 x bfloat> %va, <2 x i1> splat (i1 true), i32 %evl) + ret <2 x bfloat> %v +} + +define <4 x bfloat> @vfabs_vv_v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.fabs.v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 %evl) + ret <4 x bfloat> %v +} + +define <4 x bfloat> @vfabs_vv_v4bf16_unmasked(<4 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.fabs.v4bf16(<4 x bfloat> %va, <4 x i1> splat (i1 true), i32 %evl) + ret <4 x bfloat> %v +} + +define <8 x bfloat> @vfabs_vv_v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.fabs.v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 %evl) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @vfabs_vv_v8bf16_unmasked(<8 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.fabs.v8bf16(<8 x bfloat> %va, <8 x i1> splat (i1 true), i32 %evl) + ret <8 x bfloat> %v +} + +define <16 x bfloat> @vfabs_vv_v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.fabs.v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 %evl) + ret <16 x bfloat> %v +} + +define <16 x bfloat> @vfabs_vv_v16bf16_unmasked(<16 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.fabs.v16bf16(<16 x bfloat> %va, <16 x i1> splat (i1 true), i32 %evl) + ret <16 x bfloat> %v +} declare <2 x half> @llvm.vp.fabs.v2f16(<2 x half>, <2 x i1>, i32) @@ -24,6 +236,14 @@ define <2 x half> @vfabs_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -42,6 +262,14 @@ define <2 x half> @vfabs_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -62,6 +290,14 @@ define <4 x half> @vfabs_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -80,6 +316,14 @@ define <4 x half> @vfabs_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -100,6 +344,14 @@ define <8 x half> @vfabs_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -118,6 +370,14 @@ define <8 x half> @vfabs_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -138,6 +398,14 @@ define <16 x half> @vfabs_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -156,6 +424,14 @@ define <16 x half> @vfabs_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -367,10 +643,10 @@ define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 +; CHECK-NEXT: bltu a0, a2, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: .LBB34_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 @@ -390,10 +666,10 @@ define <32 x double> @vfabs_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %e ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 +; CHECK-NEXT: bltu a0, a2, .LBB35_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: .LBB35_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: addi a1, a0, -16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll new file mode 100644 index 0000000000000..b3b9a62600f46 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define <1 x bfloat> @v1bf16(<1 x bfloat> %va) { +; CHECK-LABEL: v1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <1 x bfloat> %va + ret <1 x bfloat> %vb +} + +define <2 x bfloat> @v2bf16(<2 x bfloat> %va) { +; CHECK-LABEL: v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <2 x bfloat> %va + ret <2 x bfloat> %vb +} + +define <4 x bfloat> @v4bf16(<4 x bfloat> %va) { +; CHECK-LABEL: v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <4 x bfloat> %va + ret <4 x bfloat> %vb +} + +define <8 x bfloat> @v8bf16(<8 x bfloat> %va) { +; CHECK-LABEL: v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <8 x bfloat> %va + ret <8 x bfloat> %vb +} + +define <16 x bfloat> @v16bf16(<16 x bfloat> %va) { +; CHECK-LABEL: v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <16 x bfloat> %va + ret <16 x bfloat> %vb +} + +define <32 x bfloat> @v32bf16(<32 x bfloat> %va) { +; CHECK-LABEL: v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <32 x bfloat> %va + ret <32 x bfloat> %vb +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll index 968fd9f9bab80..dede0e707d929 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll @@ -1,12 +1,208 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define <2 x bfloat> @vfneg_vv_v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 %evl) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfneg_vv_v2bf16_unmasked(<2 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> %va, <2 x i1> splat (i1 true), i32 %evl) + ret <2 x bfloat> %v +} + +define <4 x bfloat> @vfneg_vv_v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 %evl) + ret <4 x bfloat> %v +} + +define <4 x bfloat> @vfneg_vv_v4bf16_unmasked(<4 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> %va, <4 x i1> splat (i1 true), i32 %evl) + ret <4 x bfloat> %v +} + +define <8 x bfloat> @vfneg_vv_v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 %evl) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @vfneg_vv_v8bf16_unmasked(<8 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> %va, <8 x i1> splat (i1 true), i32 %evl) + ret <8 x bfloat> %v +} + +define <16 x bfloat> @vfneg_vv_v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 %evl) + ret <16 x bfloat> %v +} + +define <16 x bfloat> @vfneg_vv_v16bf16_unmasked(<16 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> %va, <16 x i1> splat (i1 true), i32 %evl) + ret <16 x bfloat> %v +} declare <2 x half> @llvm.vp.fneg.v2f16(<2 x half>, <2 x i1>, i32) @@ -23,6 +219,13 @@ define <2 x half> @vfneg_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -40,6 +243,13 @@ define <2 x half> @vfneg_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -59,6 +269,13 @@ define <4 x half> @vfneg_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -76,6 +293,13 @@ define <4 x half> @vfneg_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -95,6 +319,13 @@ define <8 x half> @vfneg_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -112,6 +343,13 @@ define <8 x half> @vfneg_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -131,6 +369,13 @@ define <16 x half> @vfneg_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -148,6 +393,13 @@ define <16 x half> @vfneg_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -359,10 +611,10 @@ define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 +; CHECK-NEXT: bltu a0, a2, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: .LBB34_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 @@ -382,10 +634,10 @@ define <32 x double> @vfneg_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %e ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 +; CHECK-NEXT: bltu a0, a2, .LBB35_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: .LBB35_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: addi a1, a0, -16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll index ccf82b93d6b75..2f5fde3bb3b20 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll @@ -1,12 +1,376 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define @vfsgnj_vv_nxv1bf16( %va, %vb, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv1bf16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv1bf16_unmasked( %va, %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv1bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv1bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv1bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv1bf16( %va, %vb, splat (i1 true), i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv2bf16( %va, %vb, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv2bf16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv2bf16_unmasked( %va, %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv2bf16( %va, %vb, splat (i1 true), i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv4bf16( %va, %vb, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv4bf16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv4bf16_unmasked( %va, %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv4bf16( %va, %vb, splat (i1 true), i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv8bf16( %va, %vb, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v10, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v10, v10, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv8bf16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv8bf16_unmasked( %va, %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v10, v10, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv8bf16( %va, %vb, splat (i1 true), i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv16bf16( %va, %vb, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v12, v12, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v12, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v12, v12, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv16bf16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv16bf16_unmasked( %va, %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v12, v12, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v12, v12, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv16bf16( %va, %vb, splat (i1 true), i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv32bf16( %va, %vb, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v16, v16, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v16, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v16, v16, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v16, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv32bf16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv32bf16_unmasked( %va, %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv32bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v16, v16, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv32bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v16, v16, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv32bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv32bf16( %va, %vb, splat (i1 true), i32 %evl) + ret %v +} declare @llvm.vp.copysign.nxv1f16(, , , i32) @@ -26,6 +390,16 @@ define @vfsgnj_vv_nxv1f16( %va, @llvm.vp.copysign.nxv1f16( %va, %vb, %m, i32 %evl) ret %v } @@ -46,6 +420,16 @@ define @vfsgnj_vv_nxv1f16_unmasked( %va, ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv1f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.copysign.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -68,6 +452,16 @@ define @vfsgnj_vv_nxv2f16( %va, @llvm.vp.copysign.nxv2f16( %va, %vb, %m, i32 %evl) ret %v } @@ -88,6 +482,16 @@ define @vfsgnj_vv_nxv2f16_unmasked( %va, ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.copysign.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -110,6 +514,16 @@ define @vfsgnj_vv_nxv4f16( %va, @llvm.vp.copysign.nxv4f16( %va, %vb, %m, i32 %evl) ret %v } @@ -130,6 +544,16 @@ define @vfsgnj_vv_nxv4f16_unmasked( %va, ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.copysign.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -152,6 +576,16 @@ define @vfsgnj_vv_nxv8f16( %va, @llvm.vp.copysign.nxv8f16( %va, %vb, %m, i32 %evl) ret %v } @@ -172,6 +606,16 @@ define @vfsgnj_vv_nxv8f16_unmasked( %va, ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v10, v10, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.copysign.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -194,6 +638,16 @@ define @vfsgnj_vv_nxv16f16( %va, @llvm.vp.copysign.nxv16f16( %va, %vb, %m, i32 %evl) ret %v } @@ -214,6 +668,16 @@ define @vfsgnj_vv_nxv16f16_unmasked( %v ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v12 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFA-NEXT: vand.vx v12, v12, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.copysign.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -236,6 +700,16 @@ define @vfsgnj_vv_nxv32f16( %va, @llvm.vp.copysign.nxv32f16( %va, %vb, %m, i32 %evl) ret %v } @@ -256,6 +730,16 @@ define @vfsgnj_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v16 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv32f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFBFA-NEXT: vand.vx v16, v16, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.copysign.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll index 1d8638844af7f..28426ad018b83 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll @@ -11,75 +11,165 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA define @nxv1bf16( %v) { -; CHECK-LABEL: nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv1bf16( %v) ret %r } define @nxv2bf16( %v) { -; CHECK-LABEL: nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv2bf16( %v) ret %r } define @nxv4bf16( %v) { -; CHECK-LABEL: nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv4bf16( %v) ret %r } define @nxv8bf16( %v) { -; CHECK-LABEL: nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv8bf16( %v) ret %r } define @nxv16bf16( %v) { -; CHECK-LABEL: nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv16bf16( %v) ret %r } define @nxv32bf16( %v) { -; CHECK-LABEL: nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv32bf16( %v) ret %r } @@ -100,6 +190,14 @@ define @vfabs_nxv1f16( %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv1f16( %v) ret %r } @@ -120,6 +218,14 @@ define @vfabs_nxv2f16( %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv2f16( %v) ret %r } @@ -140,6 +246,14 @@ define @vfabs_nxv4f16( %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv4f16( %v) ret %r } @@ -160,6 +274,14 @@ define @vfabs_nxv8f16( %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv8f16( %v) ret %r } @@ -180,6 +302,14 @@ define @vfabs_nxv16f16( %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv16f16( %v) ret %r } @@ -200,6 +330,14 @@ define @vfabs_nxv32f16( %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv32f16( %v) ret %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll index 8f9f9c4256c8f..c6888c0bcae0f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll @@ -1,12 +1,328 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v,+experimental-zvfbfa -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v,+experimental-zvfbfa -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define @vfabs_vv_nxv1bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv1bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfabs_vv_nxv1bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv1bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv1bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv1bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv1bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfabs_vv_nxv2bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv2bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfabs_vv_nxv2bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv2bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfabs_vv_nxv4bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv4bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfabs_vv_nxv4bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv4bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfabs_vv_nxv8bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv8bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfabs_vv_nxv8bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv8bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfabs_vv_nxv16bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv16bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfabs_vv_nxv16bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv16bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfabs_vv_nxv32bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv32bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfabs_vv_nxv32bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv32bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv32bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv32bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv32bf16( %va, splat (i1 true), i32 %evl) + ret %v +} declare @llvm.vp.fabs.nxv1f16(, , i32) @@ -24,6 +340,14 @@ define @vfabs_vv_nxv1f16( %va, @llvm.vp.fabs.nxv1f16( %va, %m, i32 %evl) ret %v } @@ -42,6 +366,14 @@ define @vfabs_vv_nxv1f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv1f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fabs.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -62,6 +394,14 @@ define @vfabs_vv_nxv2f16( %va, @llvm.vp.fabs.nxv2f16( %va, %m, i32 %evl) ret %v } @@ -80,6 +420,14 @@ define @vfabs_vv_nxv2f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fabs.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -100,6 +448,14 @@ define @vfabs_vv_nxv4f16( %va, @llvm.vp.fabs.nxv4f16( %va, %m, i32 %evl) ret %v } @@ -118,6 +474,14 @@ define @vfabs_vv_nxv4f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fabs.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -138,6 +502,14 @@ define @vfabs_vv_nxv8f16( %va, @llvm.vp.fabs.nxv8f16( %va, %m, i32 %evl) ret %v } @@ -156,6 +528,14 @@ define @vfabs_vv_nxv8f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fabs.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -176,6 +556,14 @@ define @vfabs_vv_nxv16f16( %va, @llvm.vp.fabs.nxv16f16( %va, %m, i32 %evl) ret %v } @@ -194,6 +582,14 @@ define @vfabs_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fabs.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -214,6 +610,14 @@ define @vfabs_vv_nxv32f16( %va, @llvm.vp.fabs.nxv32f16( %va, %m, i32 %evl) ret %v } @@ -232,6 +636,14 @@ define @vfabs_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv32f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fabs.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -473,10 +885,10 @@ define @vfabs_vv_nxv16f64( %va, @vfabs_vv_nxv16f64_unmasked( ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v16 -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: bltu a0, a1, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll index 83f588ce5027d..bef2e8d3b57fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll @@ -11,87 +11,189 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA define @nxv1bf16( %vm, %vs) { -; CHECK-LABEL: nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %r = call @llvm.copysign.nxv1bf16( %vm, %vs) ret %r } define @nxv2bf16( %vm, %vs) { -; CHECK-LABEL: nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %r = call @llvm.copysign.nxv2bf16( %vm, %vs) ret %r } define @nxv4bf16( %vm, %vs) { -; CHECK-LABEL: nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %r = call @llvm.copysign.nxv4bf16( %vm, %vs) ret %r } define @nxv8bf16( %vm, %vs) { -; CHECK-LABEL: nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vand.vx v10, v10, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v10, v10, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %r = call @llvm.copysign.nxv8bf16( %vm, %vs) ret %r } define @nxv16bf16( %vm, %vs) { -; CHECK-LABEL: nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vand.vx v12, v12, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v12, v12, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v12, v12, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %r = call @llvm.copysign.nxv16bf16( %vm, %vs) ret %r } define @nxv32bf32( %vm, %vs) { -; CHECK-LABEL: nxv32bf32: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vand.vx v16, v16, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv32bf32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v16, v16, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv32bf32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v16, v16, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv32bf32: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %r = call @llvm.copysign.nxv32bf32( %vm, %vs) ret %r } @@ -114,6 +216,16 @@ define @vfcopysign_vv_nxv1f16( %vm, @llvm.copysign.nxv1f16( %vm, %vs) ret %r } @@ -136,6 +248,18 @@ define @vfcopysign_vf_nxv1f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv1f16( %vm, %splat) @@ -159,6 +283,17 @@ define @vfcopynsign_vv_nxv1f16( %vm, %vs %r = call @llvm.copysign.nxv1f16( %vm, %n) ret %r @@ -183,6 +318,19 @@ define @vfcopynsign_vf_nxv1f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v9, v9, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -208,6 +356,17 @@ define @vfcopysign_exttrunc_vv_nxv1f16_nxv1f32( %vs to %r = call @llvm.copysign.nxv1f16( %vm, %e) ret %r @@ -235,6 +394,19 @@ define @vfcopysign_exttrunc_vf_nxv1f16_nxv1f32( poison, float %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %esplat = fptrunc %splat to @@ -261,6 +433,18 @@ define @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32( %vs %eneg = fptrunc %n to %r = call @llvm.copysign.nxv1f16( %vm, %eneg) @@ -290,6 +474,20 @@ define @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32( poison, float %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -320,6 +518,19 @@ define @vfcopysign_exttrunc_vv_nxv1f16_nxv1f64( %vs to %r = call @llvm.copysign.nxv1f16( %vm, %e) ret %r @@ -351,6 +562,21 @@ define @vfcopysign_exttrunc_vf_nxv1f16_nxv1f64( poison, double %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %esplat = fptrunc %splat to @@ -381,6 +607,20 @@ define @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64( %vs %eneg = fptrunc %n to %r = call @llvm.copysign.nxv1f16( %vm, %eneg) @@ -414,6 +654,22 @@ define @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64( poison, double %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -440,6 +696,16 @@ define @vfcopysign_vv_nxv2f16( %vm, @llvm.copysign.nxv2f16( %vm, %vs) ret %r } @@ -462,6 +728,18 @@ define @vfcopysign_vf_nxv2f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv2f16( %vm, %splat) @@ -485,6 +763,17 @@ define @vfcopynsign_vv_nxv2f16( %vm, %vs %r = call @llvm.copysign.nxv2f16( %vm, %n) ret %r @@ -509,6 +798,19 @@ define @vfcopynsign_vf_nxv2f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v9, v9, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -534,6 +836,16 @@ define @vfcopysign_vv_nxv4f16( %vm, @llvm.copysign.nxv4f16( %vm, %vs) ret %r } @@ -556,6 +868,18 @@ define @vfcopysign_vf_nxv4f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv4f16( %vm, %splat) @@ -579,6 +903,17 @@ define @vfcopynsign_vv_nxv4f16( %vm, %vs %r = call @llvm.copysign.nxv4f16( %vm, %n) ret %r @@ -603,6 +938,19 @@ define @vfcopynsign_vf_nxv4f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v9, v9, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -628,6 +976,16 @@ define @vfcopysign_vv_nxv8f16( %vm, @llvm.copysign.nxv8f16( %vm, %vs) ret %r } @@ -650,6 +1008,18 @@ define @vfcopysign_vf_nxv8f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v10, v10, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v10, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v10, v10, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv8f16( %vm, %splat) @@ -673,6 +1043,17 @@ define @vfcopynsign_vv_nxv8f16( %vm, %vs %r = call @llvm.copysign.nxv8f16( %vm, %n) ret %r @@ -697,6 +1078,19 @@ define @vfcopynsign_vf_nxv8f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v10, v10, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v10, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v10, v10, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v10, v10, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -722,6 +1116,17 @@ define @vfcopysign_exttrunc_vv_nxv8f16_nxv8f32( %vs to %r = call @llvm.copysign.nxv8f16( %vm, %e) ret %r @@ -749,6 +1154,19 @@ define @vfcopysign_exttrunc_vf_nxv8f16_nxv8f32( poison, float %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %esplat = fptrunc %splat to @@ -775,6 +1193,18 @@ define @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32( %vs %eneg = fptrunc %n to %r = call @llvm.copysign.nxv8f16( %vm, %eneg) @@ -804,6 +1234,20 @@ define @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32( poison, float %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -834,6 +1278,19 @@ define @vfcopysign_exttrunc_vv_nxv8f16_nxv8f64( %vs to %r = call @llvm.copysign.nxv8f16( %vm, %e) ret %r @@ -865,6 +1322,21 @@ define @vfcopysign_exttrunc_vf_nxv8f16_nxv8f64( poison, double %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %esplat = fptrunc %splat to @@ -895,6 +1367,20 @@ define @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64( %vs %eneg = fptrunc %n to %r = call @llvm.copysign.nxv8f16( %vm, %eneg) @@ -928,6 +1414,22 @@ define @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64( poison, double %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -954,6 +1456,16 @@ define @vfcopysign_vv_nxv16f16( %vm, @llvm.copysign.nxv16f16( %vm, %vs) ret %r } @@ -976,6 +1488,18 @@ define @vfcopysign_vf_nxv16f16( %vm, ha ; ZVFHMIN-NEXT: vand.vx v12, v12, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v12 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vmv.v.x v12, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v12, v12, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv16f16( %vm, %splat) @@ -999,6 +1523,17 @@ define @vfcopynsign_vv_nxv16f16( %vm, < ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v12 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vv_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vxor.vx v12, v12, a0 +; ZVFBFA-NEXT: vand.vx v12, v12, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %n = fneg %vs %r = call @llvm.copysign.nxv16f16( %vm, %n) ret %r @@ -1023,6 +1558,19 @@ define @vfcopynsign_vf_nxv16f16( %vm, h ; ZVFHMIN-NEXT: vand.vx v12, v12, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v12 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vmv.v.x v12, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v12, v12, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v12, v12, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -1048,6 +1596,16 @@ define @vfcopysign_vv_nxv32f16( %vm, @llvm.copysign.nxv32f16( %vm, %vs) ret %r } @@ -1070,6 +1628,18 @@ define @vfcopysign_vf_nxv32f16( %vm, ha ; ZVFHMIN-NEXT: vand.vx v16, v16, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v16 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vmv.v.x v16, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v16, v16, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv32f16( %vm, %splat) @@ -1093,6 +1663,17 @@ define @vfcopynsign_vv_nxv32f16( %vm, < ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v16 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vv_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vxor.vx v16, v16, a0 +; ZVFBFA-NEXT: vand.vx v16, v16, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %n = fneg %vs %r = call @llvm.copysign.nxv32f16( %vm, %n) ret %r @@ -1117,6 +1698,19 @@ define @vfcopynsign_vf_nxv32f16( %vm, h ; ZVFHMIN-NEXT: vand.vx v16, v16, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v16 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vmv.v.x v16, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v16, v16, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v16, v16, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll index 9f456e97be11d..c0b4916a54e51 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll @@ -11,69 +11,153 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA define @nxv1bf16( %va) { -; CHECK-LABEL: nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } define @nxv2bf16( %va) { -; CHECK-LABEL: nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } define @nxv4bf16( %va) { -; CHECK-LABEL: nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } define @nxv8bf16( %va) { -; CHECK-LABEL: nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } define @nxv16bf16( %va) { -; CHECK-LABEL: nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } define @nxv32bf16( %va) { -; CHECK-LABEL: nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } @@ -91,6 +175,13 @@ define @vfneg_vv_nxv1f16( %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } @@ -108,6 +199,13 @@ define @vfneg_vv_nxv2f16( %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } @@ -125,6 +223,13 @@ define @vfneg_vv_nxv4f16( %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } @@ -142,6 +247,13 @@ define @vfneg_vv_nxv8f16( %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } @@ -159,6 +271,13 @@ define @vfneg_vv_nxv16f16( %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } @@ -176,6 +295,13 @@ define @vfneg_vv_nxv32f16( %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll index bbab056f0ff46..9bd24c44b1b90 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll @@ -1,12 +1,304 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define @vfneg_vv_nxv1bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv1bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfneg_vv_nxv1bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv1bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv1bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv1bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv1bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfneg_vv_nxv2bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv2bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfneg_vv_nxv2bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv2bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfneg_vv_nxv4bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv4bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfneg_vv_nxv4bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv4bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfneg_vv_nxv8bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv8bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfneg_vv_nxv8bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv8bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfneg_vv_nxv16bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv16bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfneg_vv_nxv16bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv16bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfneg_vv_nxv32bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv32bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfneg_vv_nxv32bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv32bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv32bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv32bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv32bf16( %va, splat (i1 true), i32 %evl) + ret %v +} declare @llvm.vp.fneg.nxv1f16(, , i32) @@ -23,6 +315,13 @@ define @vfneg_vv_nxv1f16( %va, @llvm.vp.fneg.nxv1f16( %va, %m, i32 %evl) ret %v } @@ -40,6 +339,13 @@ define @vfneg_vv_nxv1f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv1f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fneg.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -59,6 +365,13 @@ define @vfneg_vv_nxv2f16( %va, @llvm.vp.fneg.nxv2f16( %va, %m, i32 %evl) ret %v } @@ -76,6 +389,13 @@ define @vfneg_vv_nxv2f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fneg.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -95,6 +415,13 @@ define @vfneg_vv_nxv4f16( %va, @llvm.vp.fneg.nxv4f16( %va, %m, i32 %evl) ret %v } @@ -112,6 +439,13 @@ define @vfneg_vv_nxv4f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fneg.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -131,6 +465,13 @@ define @vfneg_vv_nxv8f16( %va, @llvm.vp.fneg.nxv8f16( %va, %m, i32 %evl) ret %v } @@ -148,6 +489,13 @@ define @vfneg_vv_nxv8f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fneg.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -167,6 +515,13 @@ define @vfneg_vv_nxv16f16( %va, @llvm.vp.fneg.nxv16f16( %va, %m, i32 %evl) ret %v } @@ -184,6 +539,13 @@ define @vfneg_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fneg.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -203,6 +565,13 @@ define @vfneg_vv_nxv32f16( %va, @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) ret %v } @@ -220,6 +589,13 @@ define @vfneg_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv32f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fneg.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -461,10 +837,10 @@ define @vfneg_vv_nxv16f64( %va, @vfneg_vv_nxv16f64_unmasked( ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v16, v16 -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: bltu a0, a1, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 5823914967e9c..65e0a59d39168 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -1761,6 +1761,11 @@ LogicalResult tosa::ConcatOp::verify() { } } + const ShapeAdaptor outputShape(outType); + if (outputShape.hasRank() && outputShape.getRank() != firstInputRank) + return emitOpError("expect output rank to match inputs rank, got ") + << outputShape.getRank() << " vs " << firstInputRank; + // ERROR_IF(axis_sum != shape[axis]); int64_t axisSum = 0; for (const auto &input : inputList) { @@ -1772,7 +1777,7 @@ LogicalResult tosa::ConcatOp::verify() { } axisSum += inputShape.getDimSize(axis); } - const ShapeAdaptor outputShape(outType); + if (axisSum >= 0 && outputShape.hasRank() && !outputShape.isDynamicDim(axis) && axisSum != outputShape.getDimSize(axis)) diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir index 119991ca7b451..c9e03ca53a729 100644 --- a/mlir/test/Dialect/Tosa/invalid.mlir +++ b/mlir/test/Dialect/Tosa/invalid.mlir @@ -306,6 +306,14 @@ func.func @test_concat_input_rank_mismatch(%arg0: tensor<1x2x3xf32>, %arg1: tens // ----- +func.func @test_concat_input_output_rank_mismatch(%arg0: tensor<2x2xf32>, %arg1: tensor<2x1xf32>) -> tensor<2xf32> { + // expected-error@+1 {{'tosa.concat' op expect output rank to match inputs rank, got 1 vs 2}} + %0 = tosa.concat %arg0, %arg1 {axis = 1 : i32} : (tensor<2x2xf32>, tensor<2x1xf32>) -> tensor<2xf32> + return %0 : tensor<2xf32> +} + +// ----- + func.func @test_pad_invalid_padConst_rank(%arg0: tensor<13x21xf32>) { %0 = tosa.const_shape {values = dense<1> : tensor<4xindex>} : () -> !tosa.shape<4> %1 = "tosa.const"() {values = dense<3.14> : tensor<2xf32>} : () -> tensor<2xf32>