From 836919bb34493333767fc1734e402d3ebf989acb Mon Sep 17 00:00:00 2001 From: owenca Date: Sun, 9 Nov 2025 17:35:54 -0800 Subject: [PATCH 01/11] [clang-format] Fix a crash in AlignArrayOfStructures (#167099) Fixes #157405 --- clang/lib/Format/WhitespaceManager.cpp | 5 ++++- clang/unittests/Format/FormatTest.cpp | 13 +++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index fece3843c6470..67f2db2d8bb8d 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -1232,7 +1232,10 @@ void WhitespaceManager::alignArrayInitializers() { bool FoundComplete = false; for (unsigned InsideIndex = ChangeIndex + 1; InsideIndex < ChangeEnd; ++InsideIndex) { - if (Changes[InsideIndex].Tok == C.Tok->MatchingParen) { + const auto *Tok = Changes[InsideIndex].Tok; + if (Tok->is(tok::pp_define)) + break; + if (Tok == C.Tok->MatchingParen) { alignArrayInitializers(ChangeIndex, InsideIndex + 1); ChangeIndex = InsideIndex + 1; FoundComplete = true; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 24235b966399d..c9446fa3ff317 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -22272,6 +22272,19 @@ TEST_F(FormatTest, CatchAlignArrayOfStructuresLeftAlignment) { "});", Style); + verifyNoCrash( + "PANEL_Ic PANEL_ic[PANEL_IC_NUMBER] =\n" + " {\n" + " {PIC(0), PIC(0), PIC(99), PIC(81), 0}, // Backbox\n" + " {PIC(1), PIC(83), PIC(191), PIC(137), 0}, // AK47\n" + "\n" + "#define PICALL1(a, b, c, d) \\\n" + " { PIC(a), PIC(b), PIC(c), PIC(d), 1 }\n" + "\n" + " PICALL1(1, 1, 75, 50),\n" + "};", + Style); + Style.AlignEscapedNewlines = FormatStyle::ENAS_DontAlign; verifyFormat("#define FOO \\\n" " int foo[][2] = { \\\n" From 5ba0c7c12674303574731cec3ba5d168b41cfac3 Mon Sep 17 00:00:00 2001 From: ZhaoQi Date: Mon, 10 Nov 2025 09:59:36 +0800 Subject: [PATCH 02/11] [LoongArch][NFC] Pre-commit tests for memcmp expansion (#166718) Test cases are similar as riscv. --- .../CodeGen/LoongArch/expandmemcmp-optsize.ll | 1147 +++++++++++++++ llvm/test/CodeGen/LoongArch/expandmemcmp.ll | 1227 +++++++++++++++++ 2 files changed, 2374 insertions(+) create mode 100644 llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll create mode 100644 llvm/test/CodeGen/LoongArch/expandmemcmp.ll diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll new file mode 100644 index 0000000000000..4f7c8967c3049 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll @@ -0,0 +1,1147 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-UAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-UAL +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-NUAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-NUAL + +declare signext i32 @bcmp(ptr, ptr, iGRLen) nounwind readonly +declare signext i32 @memcmp(ptr, ptr, iGRLen) nounwind readonly + +define signext i32 @bcmp_size_0(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_0: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a2, $zero +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_0: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $a2, $zero +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_1: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 1 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_1: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 1 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_2: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 2 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_2: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 2 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_3: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 3 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_3: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 3 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_4: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_4: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_5: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 5 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_5: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 5 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_6: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 6 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_6: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 6 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_7: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 7 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_7: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 7 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_8: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 8 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_8: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 8 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_15: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 15 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_15: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 15 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 16 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 16 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_31: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 31 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_31: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 31 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 32 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 32 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_63: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 63 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 64 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind optsize { +; LA32-LABEL: bcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %bcmp +} + +define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_eq_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl bcmp +; LA32-NEXT: sltui $a0, $a0, 1 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_eq_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp eq i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_lt_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl bcmp +; LA32-NEXT: srli.w $a0, $a0, 31 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_lt_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: slti $a0, $a0, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_gt_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl bcmp +; LA32-NEXT: slt $a0, $zero, $a0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_gt_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: slt $a0, $zero, $a0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %bcmp, 0 + ret i1 %ret +} + +define signext i32 @memcmp_size_0(ptr %s1, ptr %s2) nounwind optsize { +; CHECK-LABEL: memcmp_size_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_1: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 1 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_1: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 1 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_2: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 2 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_2: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 2 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_3: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 3 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_3: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 3 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_4: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_4: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_5: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 5 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_5: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 5 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_6: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 6 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_6: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 6 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_7(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_7: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 7 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_7: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 7 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_8: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 8 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_8: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 8 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_15: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 15 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_15: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 15 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_16(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 16 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 16 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_31: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 31 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_31: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 31 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_32(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 32 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 32 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_63(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_63: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 63 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_64(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 64 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_127(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_128(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind optsize { +; LA32-LABEL: memcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %memcmp +} + +define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: ld.bu $a2, $a1, 1 +; LA32-NUAL-NEXT: ld.bu $a3, $a1, 0 +; LA32-NUAL-NEXT: ld.bu $a4, $a1, 2 +; LA32-NUAL-NEXT: ld.bu $a1, $a1, 3 +; LA32-NUAL-NEXT: slli.w $a2, $a2, 8 +; LA32-NUAL-NEXT: or $a2, $a2, $a3 +; LA32-NUAL-NEXT: slli.w $a3, $a4, 16 +; LA32-NUAL-NEXT: slli.w $a1, $a1, 24 +; LA32-NUAL-NEXT: or $a1, $a1, $a3 +; LA32-NUAL-NEXT: or $a1, $a1, $a2 +; LA32-NUAL-NEXT: ld.bu $a2, $a0, 1 +; LA32-NUAL-NEXT: ld.bu $a3, $a0, 0 +; LA32-NUAL-NEXT: ld.bu $a4, $a0, 2 +; LA32-NUAL-NEXT: ld.bu $a0, $a0, 3 +; LA32-NUAL-NEXT: slli.w $a2, $a2, 8 +; LA32-NUAL-NEXT: or $a2, $a2, $a3 +; LA32-NUAL-NEXT: slli.w $a3, $a4, 16 +; LA32-NUAL-NEXT: slli.w $a0, $a0, 24 +; LA32-NUAL-NEXT: or $a0, $a0, $a3 +; LA32-NUAL-NEXT: or $a0, $a0, $a2 +; LA32-NUAL-NEXT: xor $a0, $a0, $a1 +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: ld.bu $a2, $a1, 1 +; LA64-NUAL-NEXT: ld.bu $a3, $a1, 0 +; LA64-NUAL-NEXT: ld.bu $a4, $a1, 2 +; LA64-NUAL-NEXT: ld.b $a1, $a1, 3 +; LA64-NUAL-NEXT: slli.d $a2, $a2, 8 +; LA64-NUAL-NEXT: or $a2, $a2, $a3 +; LA64-NUAL-NEXT: slli.d $a3, $a4, 16 +; LA64-NUAL-NEXT: slli.d $a1, $a1, 24 +; LA64-NUAL-NEXT: or $a1, $a1, $a3 +; LA64-NUAL-NEXT: or $a1, $a1, $a2 +; LA64-NUAL-NEXT: ld.bu $a2, $a0, 1 +; LA64-NUAL-NEXT: ld.bu $a3, $a0, 0 +; LA64-NUAL-NEXT: ld.bu $a4, $a0, 2 +; LA64-NUAL-NEXT: ld.b $a0, $a0, 3 +; LA64-NUAL-NEXT: slli.d $a2, $a2, 8 +; LA64-NUAL-NEXT: or $a2, $a2, $a3 +; LA64-NUAL-NEXT: slli.d $a3, $a4, 16 +; LA64-NUAL-NEXT: slli.d $a0, $a0, 24 +; LA64-NUAL-NEXT: or $a0, $a0, $a3 +; LA64-NUAL-NEXT: or $a0, $a0, $a2 +; LA64-NUAL-NEXT: xor $a0, $a0, $a1 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp eq i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_lt_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl memcmp +; LA32-NEXT: srli.w $a0, $a0, 31 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_lt_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: slti $a0, $a0, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_gt_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl memcmp +; LA32-NEXT: slt $a0, $zero, $a0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_gt_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: slt $a0, $zero, $a0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %memcmp, 0 + ret i1 %ret +} diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll new file mode 100644 index 0000000000000..8b8f32e6877cc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll @@ -0,0 +1,1227 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-UAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-UAL +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-NUAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-NUAL + +declare signext i32 @bcmp(ptr, ptr, iGRLen) nounwind readonly +declare signext i32 @memcmp(ptr, ptr, iGRLen) nounwind readonly + +define signext i32 @bcmp_size_0(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_0: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a2, $zero +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_0: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $a2, $zero +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_1: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 1 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_1: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 1 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_2: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 2 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_2: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 2 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_3: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 3 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_3: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 3 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_4: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_4: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_5: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 5 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_5: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 5 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_6: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 6 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_6: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 6 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_7: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 7 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_7: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 7 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_8: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 8 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_8: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 8 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_15: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 15 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_15: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 15 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 16 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 16 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_31: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 31 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_31: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 31 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 32 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 32 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_63: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 63 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 64 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind { +; LA32-LABEL: bcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %bcmp +} + +define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_eq_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 16 +; LA32-NEXT: bl bcmp +; LA32-NEXT: sltui $a0, $a0, 1 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_eq_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 16 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) + %ret = icmp eq i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_lt_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl bcmp +; LA32-NEXT: srli.w $a0, $a0, 31 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_lt_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: slti $a0, $a0, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_gt_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl bcmp +; LA32-NEXT: slt $a0, $zero, $a0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_gt_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: slt $a0, $zero, $a0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_le_zero(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_le_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl bcmp +; LA32-NEXT: slti $a0, $a0, 1 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_le_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: slti $a0, $a0, 1 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %bcmp, 1 + ret i1 %ret +} + +define i1 @bcmp_ge_zero(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_ge_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl bcmp +; LA32-NEXT: addi.w $a1, $zero, -1 +; LA32-NEXT: slt $a0, $a1, $a0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_ge_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: addi.w $a1, $zero, -1 +; LA64-NEXT: slt $a0, $a1, $a0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %bcmp, -1 + ret i1 %ret +} + +define signext i32 @memcmp_size_0(ptr %s1, ptr %s2) nounwind { +; CHECK-LABEL: memcmp_size_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_1: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 1 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_1: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 1 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_2: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 2 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_2: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 2 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_3: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 3 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_3: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 3 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_4: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_4: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_5: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 5 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_5: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 5 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_6: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 6 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_6: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 6 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_7(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_7: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 7 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_7: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 7 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_8: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 8 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_8: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 8 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_15(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_15: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 15 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_15: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 15 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_16(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 16 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 16 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_31(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_31: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 31 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_31: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 31 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_32(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 32 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 32 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_63(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_63: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 63 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_64(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 64 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_127(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_128(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind { +; LA32-LABEL: memcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %memcmp +} + +define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_eq_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 16 +; LA32-NEXT: bl memcmp +; LA32-NEXT: sltui $a0, $a0, 1 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_eq_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 16 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) + %ret = icmp eq i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_lt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_lt_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl memcmp +; LA32-NEXT: srli.w $a0, $a0, 31 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_lt_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: slti $a0, $a0, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_gt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_gt_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl memcmp +; LA32-NEXT: slt $a0, $zero, $a0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_gt_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: slt $a0, $zero, $a0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_le_zero(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_le_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl memcmp +; LA32-NEXT: slti $a0, $a0, 1 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_le_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: slti $a0, $a0, 1 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %memcmp, 1 + ret i1 %ret +} + +define i1 @memcmp_ge_zero(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_ge_zero: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 4 +; LA32-NEXT: bl memcmp +; LA32-NEXT: addi.w $a1, $zero, -1 +; LA32-NEXT: slt $a0, $a1, $a0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_ge_zero: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 4 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: addi.w $a1, $zero, -1 +; LA64-NEXT: slt $a0, $a1, $a0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %memcmp, -1 + ret i1 %ret +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; LA32-NUAL: {{.*}} +; LA32-UAL: {{.*}} +; LA64-NUAL: {{.*}} +; LA64-UAL: {{.*}} From fa0f2d2caf9be6a710e784696a839b4251387944 Mon Sep 17 00:00:00 2001 From: Longsheng Mou Date: Mon, 10 Nov 2025 10:09:43 +0800 Subject: [PATCH 03/11] [mlir][tosa] Fix crash in `tosa.concat` verifier (#165966) The `tosa.concat` verifier crashed when the output rank did not match the input rank. This PR adds a proper check and error emission to prevent the crash. Fixes #159742. --- mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 7 ++++++- mlir/test/Dialect/Tosa/invalid.mlir | 8 ++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 5823914967e9c..65e0a59d39168 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -1761,6 +1761,11 @@ LogicalResult tosa::ConcatOp::verify() { } } + const ShapeAdaptor outputShape(outType); + if (outputShape.hasRank() && outputShape.getRank() != firstInputRank) + return emitOpError("expect output rank to match inputs rank, got ") + << outputShape.getRank() << " vs " << firstInputRank; + // ERROR_IF(axis_sum != shape[axis]); int64_t axisSum = 0; for (const auto &input : inputList) { @@ -1772,7 +1777,7 @@ LogicalResult tosa::ConcatOp::verify() { } axisSum += inputShape.getDimSize(axis); } - const ShapeAdaptor outputShape(outType); + if (axisSum >= 0 && outputShape.hasRank() && !outputShape.isDynamicDim(axis) && axisSum != outputShape.getDimSize(axis)) diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir index 119991ca7b451..c9e03ca53a729 100644 --- a/mlir/test/Dialect/Tosa/invalid.mlir +++ b/mlir/test/Dialect/Tosa/invalid.mlir @@ -306,6 +306,14 @@ func.func @test_concat_input_rank_mismatch(%arg0: tensor<1x2x3xf32>, %arg1: tens // ----- +func.func @test_concat_input_output_rank_mismatch(%arg0: tensor<2x2xf32>, %arg1: tensor<2x1xf32>) -> tensor<2xf32> { + // expected-error@+1 {{'tosa.concat' op expect output rank to match inputs rank, got 1 vs 2}} + %0 = tosa.concat %arg0, %arg1 {axis = 1 : i32} : (tensor<2x2xf32>, tensor<2x1xf32>) -> tensor<2xf32> + return %0 : tensor<2xf32> +} + +// ----- + func.func @test_pad_invalid_padConst_rank(%arg0: tensor<13x21xf32>) { %0 = tosa.const_shape {values = dense<1> : tensor<4xindex>} : () -> !tosa.shape<4> %1 = "tosa.const"() {values = dense<3.14> : tensor<2xf32>} : () -> tensor<2xf32> From 10da6ab5362158c1f63e0c8eaa893c55b49dc3f4 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sun, 9 Nov 2025 18:17:44 -0800 Subject: [PATCH 04/11] [Github] Update PR labeller to v6.0.1 (#167246) This was reverted earlier due to me not realizing that the config format also changed. This patch updates the config to match the new format and bumps the version. --- .github/new-prs-labeler.yml | 1942 +++++++++++++++++++-------------- .github/workflows/new-prs.yml | 2 +- 2 files changed, 1131 insertions(+), 813 deletions(-) diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index efdc42d349195..bb0eef5842b0f 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -1,1131 +1,1449 @@ BOLT: - - bolt/**/* + - changed-files: + - any-glob-to-any-file: + - bolt/**/* ClangIR: - - clang/include/clang/CIR/**/* - - clang/lib/CIR/**/* - - clang/tools/cir-*/**/* - - clang/test/CIR/**/* + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/CIR/**/* + - clang/lib/CIR/**/* + - clang/tools/cir-*/**/* + - clang/test/CIR/**/* clang:bytecode: - - clang/docs/ConstantInterpreter.rst - - clang/lib/AST/ByteCode/**/* - - clang/test/AST/ByteCode/**/* - - clang/unittests/AST/ByteCode/**/* + - changed-files: + - any-glob-to-any-file: + - clang/docs/ConstantInterpreter.rst + - clang/lib/AST/ByteCode/**/* + - clang/test/AST/ByteCode/**/* + - clang/unittests/AST/ByteCode/**/* clang:dataflow: - - clang/include/clang/Analysis/FlowSensitive/**/* - - clang/lib/Analysis/FlowSensitive/**/* - - clang/unittests/Analysis/FlowSensitive/**/* - - clang/docs/DataFlowAnalysisIntro.md - - clang/docs/DataFlowAnalysisIntroImages/**/* + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Analysis/FlowSensitive/**/* + - clang/lib/Analysis/FlowSensitive/**/* + - clang/unittests/Analysis/FlowSensitive/**/* + - clang/docs/DataFlowAnalysisIntro.md + - clang/docs/DataFlowAnalysisIntroImages/**/* clang:frontend: - - clang/lib/AST/**/* - - clang/include/clang/AST/**/* - - clang/lib/Basic/**/* - - clang/include/clang/Basic/**/* - - clang/lib/Interpreter/**/* - - clang/include/clang/Interpreter/**/* - - clang/lib/Lex/**/* - - clang/include/clang/Lex/**/* - - clang/lib/Parse/**/* - - clang/include/clang/Parse/**/* - - clang/lib/Sema/**/* - - clang/include/clang/Sema/**/* + - changed-files: + - any-glob-to-any-file: + - clang/lib/AST/**/* + - clang/include/clang/AST/**/* + - clang/lib/Basic/**/* + - clang/include/clang/Basic/**/* + - clang/lib/Interpreter/**/* + - clang/include/clang/Interpreter/**/* + - clang/lib/Lex/**/* + - clang/include/clang/Lex/**/* + - clang/lib/Parse/**/* + - clang/include/clang/Parse/**/* + - clang/lib/Sema/**/* + - clang/include/clang/Sema/**/* clang:headers: - - clang/lib/Headers/**/* + - changed-files: + - any-glob-to-any-file: + - clang/lib/Headers/**/* compiler-rt: - - compiler-rt/**/* + - changed-files: + - any-glob-to-any-file: + - compiler-rt/**/* flang: - - flang/**/* + - changed-files: + - any-glob-to-any-file: + - flang/**/* flang:frontend: - - flang/Parser/**/* - - flang/Evaluate/**/* - - flang/Semantics/**/* + - changed-files: + - any-glob-to-any-file: + - flang/Parser/**/* + - flang/Evaluate/**/* + - flang/Semantics/**/* libclc: - - libclc/** + - changed-files: + - any-glob-to-any-file: + - libclc/** HLSL: - - clang/*HLSL*/**/* - - clang/**/*HLSL* - - llvm/**/Frontend/HLSL/**/* + - changed-files: + - any-glob-to-any-file: + - clang/*HLSL*/**/* + - clang/**/*HLSL* + - llvm/**/Frontend/HLSL/**/* lld: - - lld/**/* + - changed-files: + - any-glob-to-any-file: + - lld/**/* llvm-lit: - - llvm/utils/lit/**/* + - changed-files: + - any-glob-to-any-file: + - llvm/utils/lit/**/* PGO: - - llvm/**/ProfileData/**/* - - llvm/**/SampleProfile* - - llvm/**/CodeGen/MIRSampleProfile* - - llvm/lib/Transforms/Instrumentation/CGProfile.cpp - - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp - - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp - - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp - - llvm/lib/Transforms/Instrumentation/PGO* - - llvm/lib/Transforms/Instrumentation/ValueProfile* - - llvm/test/Instrumentation/InstrProfiling/**/* - - llvm/test/Transforms/PGOProfile/**/* - - llvm/test/Transforms/SampleProfile/**/* - - llvm/**/llvm-profdata/**/* - - llvm/**/llvm-profgen/**/* + - changed-files: + - any-glob-to-any-file: + - llvm/**/ProfileData/**/* + - llvm/**/SampleProfile* + - llvm/**/CodeGen/MIRSampleProfile* + - llvm/lib/Transforms/Instrumentation/CGProfile.cpp + - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp + - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp + - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp + - llvm/lib/Transforms/Instrumentation/PGO* + - llvm/lib/Transforms/Instrumentation/ValueProfile* + - llvm/test/Instrumentation/InstrProfiling/**/* + - llvm/test/Transforms/PGOProfile/**/* + - llvm/test/Transforms/SampleProfile/**/* + - llvm/**/llvm-profdata/**/* + - llvm/**/llvm-profgen/**/* vectorizers: - - llvm/lib/Transforms/Vectorize/**/* - - llvm/include/llvm/Transforms/Vectorize/**/* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/Vectorize/**/* + - llvm/include/llvm/Transforms/Vectorize/**/* # IMPORTED FROM CODEOWNERS LTO: - - llvm/*/LTO/** - - llvm/*/Linker/** - - llvm/*/ThinLTO/** - - llvm/lib/Transforms/*/FunctionImport* - - llvm/tools/gold/** + - changed-files: + - any-glob-to-any-file: + - llvm/*/LTO/** + - llvm/*/Linker/** + - llvm/*/ThinLTO/** + - llvm/lib/Transforms/*/FunctionImport* + - llvm/tools/gold/** clang:driver: - - clang/*/Driver/** + - changed-files: + - any-glob-to-any-file: + - clang/*/Driver/** compiler-rt:asan: - - compiler-rt/lib/asan/** - - compiler-rt/include/sanitizer/asan_interface.h - - compiler-rt/test/asan/** - - compiler-rt/lib/asan_abi/** - - compiler-rt/test/asan_abi/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/asan/** + - compiler-rt/include/sanitizer/asan_interface.h + - compiler-rt/test/asan/** + - compiler-rt/lib/asan_abi/** + - compiler-rt/test/asan_abi/** compiler-rt:builtins: - - compiler-rt/lib/builtins/** - - compiler-rt/test/builtins/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/builtins/** + - compiler-rt/test/builtins/** compiler-rt:cfi: - - compiler-rt/lib/cfi/** - - compiler-rt/test/cfi/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/cfi/** + - compiler-rt/test/cfi/** compiler-rt:fuzzer: - - compiler-rt/lib/fuzzer/** - - compiler-rt/include/fuzzer/** - - compiler-rt/test/fuzzer/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/fuzzer/** + - compiler-rt/include/fuzzer/** + - compiler-rt/test/fuzzer/** compiler-rt:hwasan: - - compiler-rt/lib/hwasan/** - - compiler-rt/include/sanitizer/hwasan_interface.h - - compiler-rt/test/hwasan/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/hwasan/** + - compiler-rt/include/sanitizer/hwasan_interface.h + - compiler-rt/test/hwasan/** compiler-rt:lsan: - - compiler-rt/lib/lsan/** - - compiler-rt/include/sanitizer/lsan_interface.h - - compiler-rt/test/lsan/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/lsan/** + - compiler-rt/include/sanitizer/lsan_interface.h + - compiler-rt/test/lsan/** compiler-rt:msan: - - compiler-rt/lib/msan/** - - compiler-rt/include/sanitizer/msan_interface.h - - compiler-rt/test/msan/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/msan/** + - compiler-rt/include/sanitizer/msan_interface.h + - compiler-rt/test/msan/** compiler-rt:sanitizer: - - llvm/lib/Transforms/Instrumentation/*Sanitizer* - - compiler-rt/lib/interception/** - - compiler-rt/lib/*san*/** - - compiler-rt/include/sanitizer/** - - compiler-rt/test/*san*/** - - compiler-rt/lib/fuzzer/** - - compiler-rt/include/fuzzer/** - - compiler-rt/test/fuzzer/** - - compiler-rt/lib/scudo/** - - compiler-rt/test/scudo/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/Instrumentation/*Sanitizer* + - compiler-rt/lib/interception/** + - compiler-rt/lib/*san*/** + - compiler-rt/include/sanitizer/** + - compiler-rt/test/*san*/** + - compiler-rt/lib/fuzzer/** + - compiler-rt/include/fuzzer/** + - compiler-rt/test/fuzzer/** + - compiler-rt/lib/scudo/** + - compiler-rt/test/scudo/** compiler-rt:scudo: - - compiler-rt/lib/scudo/** - - compiler-rt/test/scudo/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/scudo/** + - compiler-rt/test/scudo/** compiler-rt:tsan: - - compiler-rt/lib/tsan/** - - compiler-rt/include/sanitizer/tsan_interface.h - - compiler-rt/include/sanitizer/tsan_interface_atomic.h - - compiler-rt/test/tsan/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/tsan/** + - compiler-rt/include/sanitizer/tsan_interface.h + - compiler-rt/include/sanitizer/tsan_interface_atomic.h + - compiler-rt/test/tsan/** compiler-rt:ubsan: - - compiler-rt/lib/ubsan/** - - compiler-rt/include/sanitizer/ubsan_interface.h - - compiler-rt/test/ubsan/** - - compiler-rt/lib/ubsan_minimal/** - - compiler-rt/test/ubsan_minimal/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/ubsan/** + - compiler-rt/include/sanitizer/ubsan_interface.h + - compiler-rt/test/ubsan/** + - compiler-rt/lib/ubsan_minimal/** + - compiler-rt/test/ubsan_minimal/** xray: - - llvm/tools/llvm-xray/** - - compiler-rt/*/xray/** - - clang/include/clang/Basic/XRay* - - clang/lib/Basic/XRay* - - compiler-rt/*/xray/** - - llvm/include/llvm/XRay/** - - llvm/lib/XRay/** - - llvm/tools/llvm-xray/** - - llvm/unittests/XRay/** - - compiler-rt/*/xray/** + - changed-files: + - any-glob-to-any-file: + - llvm/tools/llvm-xray/** + - compiler-rt/*/xray/** + - clang/include/clang/Basic/XRay* + - clang/lib/Basic/XRay* + - compiler-rt/*/xray/** + - llvm/include/llvm/XRay/** + - llvm/lib/XRay/** + - llvm/tools/llvm-xray/** + - llvm/unittests/XRay/** + - compiler-rt/*/xray/** clang:codegen: - - clang/lib/CodeGen/** - - clang/include/clang/CodeGen/** + - changed-files: + - any-glob-to-any-file: + - clang/lib/CodeGen/** + - clang/include/clang/CodeGen/** mlir: - - mlir/** + - changed-files: + - any-glob-to-any-file: + - mlir/** mlir:core: - - mlir/include/mlir/Support/** - - mlir/lib/Support/** - - mlir/include/mlir/Parser/** - - mlir/lib/Parser/** - - mlir/include/mlir/IR/** - - mlir/lib/IR/** - - mlir/include/mlir/Bytecode/** - - mlir/lib/Bytecode/** - - mlir/include/mlir/AsmParser/** - - mlir/lib/AsmParser/** - - mlir/include/mlir/Pass/** - - mlir/lib/Pass/** - - mlir/include/mlir/Tools/** - - mlir/lib/Tools/** - - mlir/include/mlir/Reducer/** - - mlir/lib/Reducer/** - - mlir/include/mlir/Transforms/** - - mlir/lib/Transforms/** - - mlir/include/mlir/Debug/** - - mlir/lib/Debug/** - - mlir/tools/** + - changed-files: + - any-glob-to-any-file: + - mlir/include/mlir/Support/** + - mlir/lib/Support/** + - mlir/include/mlir/Parser/** + - mlir/lib/Parser/** + - mlir/include/mlir/IR/** + - mlir/lib/IR/** + - mlir/include/mlir/Bytecode/** + - mlir/lib/Bytecode/** + - mlir/include/mlir/AsmParser/** + - mlir/lib/AsmParser/** + - mlir/include/mlir/Pass/** + - mlir/lib/Pass/** + - mlir/include/mlir/Tools/** + - mlir/lib/Tools/** + - mlir/include/mlir/Reducer/** + - mlir/lib/Reducer/** + - mlir/include/mlir/Transforms/** + - mlir/lib/Transforms/** + - mlir/include/mlir/Debug/** + - mlir/lib/Debug/** + - mlir/tools/** mlir:ods: - - mlir/TableGen/** - - mlir/tblgen/** - - mlir/include/mlir/IR/*.td + - changed-files: + - any-glob-to-any-file: + - mlir/TableGen/** + - mlir/tblgen/** + - mlir/include/mlir/IR/*.td mlir:bindings: - - mlir/Bindings/** + - changed-files: + - any-glob-to-any-file: + - mlir/Bindings/** mlir:gpu: - - mlir/**/*GPU*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*GPU*/** mlir:amdgpu: - - mlir/**/AMDGPU/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/AMDGPU/** mlir:amx: - - mlir/**/AMX/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/AMX/** mlir:affine: - - mlir/**/Affine/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Affine/** mlir:arith: - - mlir/**/Arith/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Arith/** mlir:neon: - - mlir/**/ArmNeon/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ArmNeon/** mlir:sme: - - mlir/**/ArmSME/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ArmSME/** mlir:sve: - - mlir/**/ArmSVE/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ArmSVE/** mlir:async: - - mlir/**/Async/** - - mlir/**/Async/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Async/** + - mlir/**/Async/** mlir:bufferization: - - mlir/**/Bufferization/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Bufferization/** mlir:complex: - - mlir/**/Complex/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Complex/** mlir:cf: - - mlir/**/ControlFlow/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ControlFlow/** mlir:dlti: - - mlir/**/DLTI/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/DLTI/** mlir:emitc: - - mlir/**/*EmitC*/** - - mlir/lib/Target/Cpp/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*EmitC*/** + - mlir/lib/Target/Cpp/** mlir:func: - - mlir/**/Func/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Func/** mlir:irdl: - - mlir/**/IRDL/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/IRDL/** mlir:index: - - mlir/**/Index/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Index/** mlir:llvm: - - mlir/**/LLVM* - - mlir/**/LLVM*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/LLVM* + - mlir/**/LLVM*/** mlir:linalg: - - mlir/**/*linalg/** - - mlir/**/*Linalg/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*linalg/** + - mlir/**/*Linalg/** mlir:mlprogram: - - mlir/**/MLProgram/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/MLProgram/** mlir:math: - - mlir/**/Math/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Math/** mlir:memref: - - mlir/**/MemRef/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/MemRef/** mlir:nvgpu: - - mlir/**/NVGPU/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/NVGPU/** mlir:openacc: - - mlir/**/*OpenACC* - - mlir/**/*OpenACC*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*OpenACC* + - mlir/**/*OpenACC*/** mlir:openmp: - - mlir/**/*OpenMP* - - mlir/**/*OpenMP*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*OpenMP* + - mlir/**/*OpenMP*/** mlir:pdl: - - mlir/**/PDL/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/PDL/** mlir:quant: - - mlir/**/Quant/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Quant/** mlir:scf: - - mlir/**/SCF/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/SCF/** mlir:spirv: - - mlir/**/SPIRV/** - - mlir/**/SPIRVTo*/** - - mlir/**/*ToSPIRV/** - - mlir/tools/mlir-spirv-cpu-runner/** - - mlir/tools/mlir-vulkan-runner/** - - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp + - changed-files: + - any-glob-to-any-file: + - mlir/**/SPIRV/** + - mlir/**/SPIRVTo*/** + - mlir/**/*ToSPIRV/** + - mlir/tools/mlir-spirv-cpu-runner/** + - mlir/tools/mlir-vulkan-runner/** + - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp mlir:shape: - - mlir/**/Shape/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Shape/** mlir:sparse: - - mlir/**/SparseTensor/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/SparseTensor/** mlir:tensor: - - mlir/**/Tensor/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Tensor/** mlir:tosa: - - mlir/**/*Tosa*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*Tosa*/** mlir:ub: - - mlir/**/UB/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/UB/** mlir:vector: - - mlir/**/*Vector/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*Vector/** mlir:execution-engine: - - mlir/**/ExecutionEngine/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ExecutionEngine/** mlir:presburger: - - mlir/**/*Presburger*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*Presburger*/** mlir:python: - - mlir/python/**/* + - changed-files: + - any-glob-to-any-file: + - mlir/python/**/* mlir:vectorops: - - mlir/**/Vector/**/* + - changed-files: + - any-glob-to-any-file: + - mlir/**/Vector/**/* coroutines: - - clang/docs/DebuggingCoroutines.rst - - clang/lib/Sema/SemaCoroutine.cpp - - clang/lib/CodeGen/CGCoroutine.cpp - - clang/test/CodeGenCoroutines/** - - llvm/docs/Coroutines.rst - - llvm/include/llvm/Transforms/Coroutines/** - - llvm/lib/Transforms/Coroutines/** - - llvm/test/Transforms/Coroutines/* + - changed-files: + - any-glob-to-any-file: + - clang/docs/DebuggingCoroutines.rst + - clang/lib/Sema/SemaCoroutine.cpp + - clang/lib/CodeGen/CGCoroutine.cpp + - clang/test/CodeGenCoroutines/** + - llvm/docs/Coroutines.rst + - llvm/include/llvm/Transforms/Coroutines/** + - llvm/lib/Transforms/Coroutines/** + - llvm/test/Transforms/Coroutines/* clang:modules: - - clang/docs/StandardCPlusPlusModules.rst - - clang/include/clang/AST/AbstractBasicReader.h - - clang/include/clang/AST/AbstractBasicWriter.h - - clang/include/clang/AST/AbstractTypeReader.h - - clang/include/clang/AST/AbstractTypeWriter.h - - clang/include/clang/AST/PropertiesBase.td - - clang/include/clang/AST/ODRHash.h - - clang/include/clang/AST/TypeProperties.td - - clang/include/clang/Basic/Module.h - - clang/include/clang/Frontend/PrecompiledPreamble.h - - clang/include/clang/Lex/ModuleLoader.h - - clang/include/clang/Lex/ModuleMap.h - - clang/include/clang/Serialization/** - - clang/lib/AST/ODRHash.cpp - - clang/lib/AST/StmtProfile.cpp - - clang/lib/Basic/Module.cpp - - clang/lib/Frontend/ModuleDependencyCollector.cpp - - clang/lib/Frontend/PrecompiledPreamble.cpp - - clang/lib/Lex/ModuleMap.cpp - - clang/lib/Sema/SemaModule.cpp - - clang/lib/Serialization/** - - clang/test/CXX/module/** - - clang/test/Modules/** - - clang/unittests/Serialization/* + - changed-files: + - any-glob-to-any-file: + - clang/docs/StandardCPlusPlusModules.rst + - clang/include/clang/AST/AbstractBasicReader.h + - clang/include/clang/AST/AbstractBasicWriter.h + - clang/include/clang/AST/AbstractTypeReader.h + - clang/include/clang/AST/AbstractTypeWriter.h + - clang/include/clang/AST/PropertiesBase.td + - clang/include/clang/AST/ODRHash.h + - clang/include/clang/AST/TypeProperties.td + - clang/include/clang/Basic/Module.h + - clang/include/clang/Frontend/PrecompiledPreamble.h + - clang/include/clang/Lex/ModuleLoader.h + - clang/include/clang/Lex/ModuleMap.h + - clang/include/clang/Serialization/** + - clang/lib/AST/ODRHash.cpp + - clang/lib/AST/StmtProfile.cpp + - clang/lib/Basic/Module.cpp + - clang/lib/Frontend/ModuleDependencyCollector.cpp + - clang/lib/Frontend/PrecompiledPreamble.cpp + - clang/lib/Lex/ModuleMap.cpp + - clang/lib/Sema/SemaModule.cpp + - clang/lib/Serialization/** + - clang/test/CXX/module/** + - clang/test/Modules/** + - clang/unittests/Serialization/* clang-tidy: - - clang-tools-extra/clang-tidy/** - - clang-tools-extra/docs/clang-tidy/** - - clang-tools-extra/test/clang-tidy/** + - changed-files: + - any-glob-to-any-file: + - clang-tools-extra/clang-tidy/** + - clang-tools-extra/docs/clang-tidy/** + - clang-tools-extra/test/clang-tidy/** clang-tools-extra: - - clang-tools-extra/** + - changed-files: + - any-glob-to-any-file: + - clang-tools-extra/** tools:llvm-mca: - - llvm/tools/llvm-mca/** - - llvm/include/llvm/MCA/** - - llvm/lib/MCA/** + - changed-files: + - any-glob-to-any-file: + - llvm/tools/llvm-mca/** + - llvm/include/llvm/MCA/** + - llvm/lib/MCA/** clang: - - any: - - clang/** - - '!clang/**/Format/**' - - '!clang/tools/clang-format/**' + - changed-files: + - all-globs-to-all-file: + - clang/** + - '!clang/**/Format/**' + - '!clang/tools/clang-format/**' testing-tools: - - llvm/include/llvm/FileCheck/** - - llvm/lib/FileCheck/** - - llvm/test/FileCheck/** - - llvm/unittests/FileCheck/** - - llvm/utils/lit/** - - llvm/utils/split-file/** - - llvm/utils/not/** - - llvm/utils/count/** - - llvm/utils/FileCheck/** - - llvm/docs/CommandGuide/FileCheck.rst - - llvm/docs/CommandGuide/lit.rst - - llvm/docs/TestingGuide.rst - - llvm/test/Other/FileCheck-space.txt - - llvm/utils/UpdateTestChecks/** - - llvm/utils/update*_test_checks.py + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/FileCheck/** + - llvm/lib/FileCheck/** + - llvm/test/FileCheck/** + - llvm/unittests/FileCheck/** + - llvm/utils/lit/** + - llvm/utils/split-file/** + - llvm/utils/not/** + - llvm/utils/count/** + - llvm/utils/FileCheck/** + - llvm/docs/CommandGuide/FileCheck.rst + - llvm/docs/CommandGuide/lit.rst + - llvm/docs/TestingGuide.rst + - llvm/test/Other/FileCheck-space.txt + - llvm/utils/UpdateTestChecks/** + - llvm/utils/update*_test_checks.py debuginfo: - - clang/lib/CodeGen/CGDebugInfo.* - - llvm/include/llvm/BinaryFormat/Dwarf.* - - llvm/include/llvm/CodeGen/*Debug*.* - - llvm/include/llvm/DebugInfo/** - - llvm/include/llvm/Debuginfod/** - - llvm/include/llvm/Frontend/Debug/** - - llvm/include/llvm/IR/Debug*.* - - llvm/include/llvm/Object/*Debug*.* - - llvm/include/llvm/ObjectYAML/*Debug*.* - - llvm/include/llvm/Transforms/Utils/*Debug*.* - - llvm/include/llvm-c/DebugInfo.h - - llvm/lib/BinaryFormat/Dwarf.cpp - - llvm/lib/CodeGen/AsmPrinter/*Debug*.* - - llvm/lib/CodeGen/AsmPrinter/Dwarf*.* - - llvm/lib/CodeGen/AsmPrinter/DIE*.* - - llvm/lib/CodeGen/LiveDebugValues/** - - llvm/lib/CodeGen/*Debug*.* - - llvm/lib/CodeGen/DwarfEHPrepare.cpp - - llvm/lib/DebugInfo/** - - llvm/lib/Debuginfod/** - - llvm/lib/DWARFLinkerParallel/** - - llvm/lib/IR/Debug*.cpp - - llvm/lib/MC/MCDwarf.cpp - - llvm/lib/Transforms/Utils/*Debug*.* - - llvm/test/DebugInfo/** - - llvm/test/tools/dsymutil/** - - llvm/test/tools/llvm-debuginfo-analyzer/** - - llvm/test/tools/llvm-debuginfod/** - - llvm/test/tools/llvm-debuginfod-find/** - - llvm/test/tools/llvm-dwarfdump/** - - llvm/test/tools/llvm-dwarfutil/** - - llvm/test/tools/llvm-dwp/** - - llvm/test/tools/llvm-gsymutil/** - - llvm/test/tools/llvm-pdbuti/** - - llvm/tools/dsymutil/** - - llvm/tools/llvm-debuginfo-analyzer/** - - llvm/tools/llvm-debuginfod/** - - llvm/tools/llvm-debuginfod-find/** - - llvm/tools/llvm-dwarfdump/** - - llvm/tools/llvm-dwarfutil/** - - llvm/tools/llvm-dwp/** - - llvm/tools/llvm-gsymutil/** - - llvm/tools/llvm-pdbutil/** + - changed-files: + - any-glob-to-any-file: + - clang/lib/CodeGen/CGDebugInfo.* + - llvm/include/llvm/BinaryFormat/Dwarf.* + - llvm/include/llvm/CodeGen/*Debug*.* + - llvm/include/llvm/DebugInfo/** + - llvm/include/llvm/Debuginfod/** + - llvm/include/llvm/Frontend/Debug/** + - llvm/include/llvm/IR/Debug*.* + - llvm/include/llvm/Object/*Debug*.* + - llvm/include/llvm/ObjectYAML/*Debug*.* + - llvm/include/llvm/Transforms/Utils/*Debug*.* + - llvm/include/llvm-c/DebugInfo.h + - llvm/lib/BinaryFormat/Dwarf.cpp + - llvm/lib/CodeGen/AsmPrinter/*Debug*.* + - llvm/lib/CodeGen/AsmPrinter/Dwarf*.* + - llvm/lib/CodeGen/AsmPrinter/DIE*.* + - llvm/lib/CodeGen/LiveDebugValues/** + - llvm/lib/CodeGen/*Debug*.* + - llvm/lib/CodeGen/DwarfEHPrepare.cpp + - llvm/lib/DebugInfo/** + - llvm/lib/Debuginfod/** + - llvm/lib/DWARFLinkerParallel/** + - llvm/lib/IR/Debug*.cpp + - llvm/lib/MC/MCDwarf.cpp + - llvm/lib/Transforms/Utils/*Debug*.* + - llvm/test/DebugInfo/** + - llvm/test/tools/dsymutil/** + - llvm/test/tools/llvm-debuginfo-analyzer/** + - llvm/test/tools/llvm-debuginfod/** + - llvm/test/tools/llvm-debuginfod-find/** + - llvm/test/tools/llvm-dwarfdump/** + - llvm/test/tools/llvm-dwarfutil/** + - llvm/test/tools/llvm-dwp/** + - llvm/test/tools/llvm-gsymutil/** + - llvm/test/tools/llvm-pdbuti/** + - llvm/tools/dsymutil/** + - llvm/tools/llvm-debuginfo-analyzer/** + - llvm/tools/llvm-debuginfod/** + - llvm/tools/llvm-debuginfod-find/** + - llvm/tools/llvm-dwarfdump/** + - llvm/tools/llvm-dwarfutil/** + - llvm/tools/llvm-dwp/** + - llvm/tools/llvm-gsymutil/** + - llvm/tools/llvm-pdbutil/** github:workflow: - - .github/workflows/** + - changed-files: + - any-glob-to-any-file: + - .github/workflows/** cmake: - - cmake/** - - llvm/cmake/** - - runtimes/** + - changed-files: + - any-glob-to-any-file: + - cmake/** + - llvm/cmake/** + - runtimes/** flang:driver: - - flang/tools/flang-driver/** - - flang/unittests/Frontend/** - - flang/lib/FrontendTool/** - - flang/lib/Frontend/** - - flang/include/flang/Frontend/** - - flang/include/flang/FrontendTool/** - - flang/test/Driver/** + - changed-files: + - any-glob-to-any-file: + - flang/tools/flang-driver/** + - flang/unittests/Frontend/** + - flang/lib/FrontendTool/** + - flang/lib/Frontend/** + - flang/include/flang/Frontend/** + - flang/include/flang/FrontendTool/** + - flang/test/Driver/** backend:m68k: - - llvm/lib/Target/M68k/** - - clang/lib/Basic/Targets/M68k.* - - clang/lib/CodeGen/Targets/M68k.cpp - - llvm/test/CodeGen/M68k/** - - llvm/test/MC/Disassembler/M68k/** - - llvm/test/MC/M68k/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/M68k/** + - clang/lib/Basic/Targets/M68k.* + - clang/lib/CodeGen/Targets/M68k.cpp + - llvm/test/CodeGen/M68k/** + - llvm/test/MC/Disassembler/M68k/** + - llvm/test/MC/M68k/** libc++: - - libcxx/** - - .github/workflows/libcxx-* + - changed-files: + - any-glob-to-any-file: + - libcxx/** + - .github/workflows/libcxx-* libc++abi: - - libcxxabi/** + - changed-files: + - any-glob-to-any-file: + - libcxxabi/** libunwind: - - libunwind/** + - changed-files: + - any-glob-to-any-file: + - libunwind/** objectyaml: - - llvm/include/llvm/ObjectYAML/** - - llvm/lib/ObjectYAML/** - - llvm/test/tools/obj2yaml/** - - llvm/test/tools/yaml2obj/** - - llvm/tools/obj2yaml/** - - llvm/tools/yaml2obj/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/ObjectYAML/** + - llvm/lib/ObjectYAML/** + - llvm/test/tools/obj2yaml/** + - llvm/test/tools/yaml2obj/** + - llvm/tools/obj2yaml/** + - llvm/tools/yaml2obj/** clang:analysis: - - clang/include/clang/Analysis/** - - clang/lib/Analysis/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Analysis/** + - clang/lib/Analysis/** clang:static analyzer: - - clang/include/clang/StaticAnalyzer/** - - clang/lib/StaticAnalyzer/** - - clang/tools/scan-build/** - - clang/utils/analyzer/** - - clang/docs/analyzer/** - - clang/test/Analysis/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/StaticAnalyzer/** + - clang/lib/StaticAnalyzer/** + - clang/tools/scan-build/** + - clang/utils/analyzer/** + - clang/docs/analyzer/** + - clang/test/Analysis/** pgo: - - llvm/lib/Transforms/Instrumentation/CGProfile.cpp - - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp - - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp - - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp - - llvm/lib/Transforms/Instrumentation/PGO* - - llvm/lib/Transforms/Instrumentation/ValueProfile* - - llvm/test/Instrumentation/InstrProfiling/** - - llvm/test/Transforms/PGOProfile/** - - compiler-rt/lib/profile/** - - compiler-rt/lib/memprof/** - - compiler-rt/test/profile/** - - compiler-rt/test/memprof/** - - llvm/tools/llvm-profdata/** - - llvm/tools/llvm-profgen/** - - llvm/test/tools/llvm-profdata/** - - llvm/test/tools/llvm-profgen/** - - llvm/unittests/ProfileData/* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/Instrumentation/CGProfile.cpp + - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp + - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp + - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp + - llvm/lib/Transforms/Instrumentation/PGO* + - llvm/lib/Transforms/Instrumentation/ValueProfile* + - llvm/test/Instrumentation/InstrProfiling/** + - llvm/test/Transforms/PGOProfile/** + - compiler-rt/lib/profile/** + - compiler-rt/lib/memprof/** + - compiler-rt/test/profile/** + - compiler-rt/test/memprof/** + - llvm/tools/llvm-profdata/** + - llvm/tools/llvm-profgen/** + - llvm/test/tools/llvm-profdata/** + - llvm/test/tools/llvm-profgen/** + - llvm/unittests/ProfileData/* openacc: - - flang/**/OpenACC/** - - flang/include/flang/Lower/OpenACC.h - - flang/docs/OpenACC.md - - flang/lib/Parser/openacc-parsers.cpp - - flang/lib/Lower/OpenACC.cpp - - llvm/**/Frontend/OpenACC/** - - llvm/unittests/Frontend/OpenACCTest.cpp - - mlir/test/Target/LLVMIR/openacc-llvm.mlir - - mlir/**/*OpenACC/** + - changed-files: + - any-glob-to-any-file: + - flang/**/OpenACC/** + - flang/include/flang/Lower/OpenACC.h + - flang/docs/OpenACC.md + - flang/lib/Parser/openacc-parsers.cpp + - flang/lib/Lower/OpenACC.cpp + - llvm/**/Frontend/OpenACC/** + - llvm/unittests/Frontend/OpenACCTest.cpp + - mlir/test/Target/LLVMIR/openacc-llvm.mlir + - mlir/**/*OpenACC/** flang:runtime: - - flang/runtime/** + - changed-files: + - any-glob-to-any-file: + - flang/runtime/** flang:parser: - - flang/**/Parser/** + - changed-files: + - any-glob-to-any-file: + - flang/**/Parser/** flang:semantics: - - flang/**/Evaluate/** - - flang/**/Semantics/** + - changed-files: + - any-glob-to-any-file: + - flang/**/Evaluate/** + - flang/**/Semantics/** flang:fir-hlfir: - - flang/**/Lower/** - - flang/**/Optimizer/** + - changed-files: + - any-glob-to-any-file: + - flang/**/Lower/** + - flang/**/Optimizer/** flang:codegen: - - flang/**/CodeGen/** + - changed-files: + - any-glob-to-any-file: + - flang/**/CodeGen/** llvm:codegen: - - llvm/lib/CodeGen/* - - llvm/lib/CodeGen/MIRParser/* - - llvm/lib/CodeGen/LiveDebugValues/* - - llvm/lib/CodeGen/AsmPrinter/* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/CodeGen/* + - llvm/lib/CodeGen/MIRParser/* + - llvm/lib/CodeGen/LiveDebugValues/* + - llvm/lib/CodeGen/AsmPrinter/* llvm:globalisel: - - llvm/**/GlobalISel/** - - llvm/utils/TableGen/GlobalISel* + - changed-files: + - any-glob-to-any-file: + - llvm/**/GlobalISel/** + - llvm/utils/TableGen/GlobalISel* function-specialization: - - llvm/include/llvm/Transforms/Utils/SCCPSolver.h - - llvm/lib/Transforms/Utils/SCCPSolver.cpp - - llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h - - llvm/lib/Transforms/IPO/FunctionSpecialization.cpp - - llvm/test/Transforms/FunctionSpecialization/* + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/Transforms/Utils/SCCPSolver.h + - llvm/lib/Transforms/Utils/SCCPSolver.cpp + - llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h + - llvm/lib/Transforms/IPO/FunctionSpecialization.cpp + - llvm/test/Transforms/FunctionSpecialization/* libc: - - libc/** - - utils/bazel/llvm-project-overlay/libc/** + - changed-files: + - any-glob-to-any-file: + - libc/** + - utils/bazel/llvm-project-overlay/libc/** clang-format: - - clang/**/Format/** - - clang/tools/clang-format/** + - changed-files: + - any-glob-to-any-file: + - clang/**/Format/** + - clang/tools/clang-format/** flang:openmp: - - flang/test/**/OpenMP/** - - flang/lib/Lower/OpenMP.cpp - - flang/lib/Semantics/resolve-directives.cpp - - flang/lib/Semantics/check-omp-structure.cpp - - flang/lib/Optimizer/Transforms/OMP* - - flang/test/Fir/convert-to-llvm-openmp-and-fir.fir - - flang/test/Lower/OpenMP/** - - flang/test/Transforms/omp* - - mlir/**/*OpenMP* - - mlir/test/Target/LLVMIR/openmp* - - llvm/lib/Frontend/OpenMP/** - - llvm/include/llvm/Frontend/OpenMP/** - - llvm/unittests/Frontend/OpenMP* + - changed-files: + - any-glob-to-any-file: + - flang/test/**/OpenMP/** + - flang/lib/Lower/OpenMP.cpp + - flang/lib/Semantics/resolve-directives.cpp + - flang/lib/Semantics/check-omp-structure.cpp + - flang/lib/Optimizer/Transforms/OMP* + - flang/test/Fir/convert-to-llvm-openmp-and-fir.fir + - flang/test/Lower/OpenMP/** + - flang/test/Transforms/omp* + - mlir/**/*OpenMP* + - mlir/test/Target/LLVMIR/openmp* + - llvm/lib/Frontend/OpenMP/** + - llvm/include/llvm/Frontend/OpenMP/** + - llvm/unittests/Frontend/OpenMP* llvm:ir: - - llvm/lib/IR/** - - llvm/include/llvm/IR/** - - llvm/docs/LangRef.rst - - llvm/unittests/IR/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/IR/** + - llvm/include/llvm/IR/** + - llvm/docs/LangRef.rst + - llvm/unittests/IR/** llvm:SandboxIR: - - llvm/lib/SandboxIR/** - - llvm/include/llvm/SandboxIR/** - - llvm/docs/SandboxIR.md - - llvm/unittests/SandboxIR/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/SandboxIR/** + - llvm/include/llvm/SandboxIR/** + - llvm/docs/SandboxIR.md + - llvm/unittests/SandboxIR/** llvm:analysis: - - llvm/lib/Analysis/** - - llvm/include/llvm/Analysis/** - - llvm/test/Analysis/** - - llvm/unittests/Analysis/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Analysis/** + - llvm/include/llvm/Analysis/** + - llvm/test/Analysis/** + - llvm/unittests/Analysis/** llvm:adt: - - llvm/**/ADT/* + - changed-files: + - any-glob-to-any-file: + - llvm/**/ADT/* llvm:support: - - llvm/**/Support/** + - changed-files: + - any-glob-to-any-file: + - llvm/**/Support/** # Skip llvm/test/MC and llvm/unittests/MC, which includes target-specific directories. llvm:mc: - - llvm/include/llvm/MC/** - - llvm/lib/MC/** - - llvm/tools/llvm-mc/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/MC/** + - llvm/lib/MC/** + - llvm/tools/llvm-mc/** llvm:transforms: - - llvm/lib/Transforms/** - - llvm/include/llvm/Transforms/** - - llvm/test/Transforms/** - - llvm/unittests/Transforms/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/** + - llvm/include/llvm/Transforms/** + - llvm/test/Transforms/** + - llvm/unittests/Transforms/** llvm:instcombine: - - llvm/lib/Analysis/InstructionSimplify.cpp - - llvm/lib/Transforms/InstCombine/** - - llvm/include/llvm/Transforms/InstCombine/ - - llvm/include/llvm/Analysis/InstructionSimplify.h - - llvm/test/Transforms/InstCombine/** - - llvm/test/Transforms/InstSimplify/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Analysis/InstructionSimplify.cpp + - llvm/lib/Transforms/InstCombine/** + - llvm/include/llvm/Transforms/InstCombine/ + - llvm/include/llvm/Analysis/InstructionSimplify.h + - llvm/test/Transforms/InstCombine/** + - llvm/test/Transforms/InstSimplify/** llvm:vectorcombine: - - llvm/lib/Transforms/Vectorize/VectorCombine.cpp - - llvm/test/Transforms/VectorCombine/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/Vectorize/VectorCombine.cpp + - llvm/test/Transforms/VectorCombine/** clangd: - - clang-tools-extra/clangd/** + - changed-files: + - any-glob-to-any-file: + - clang-tools-extra/clangd/** hlsl: - - clang/test/ParserHLSL/** - - clang/test/SemaHLSL/** - - clang/test/AST/HLSL/** - - clang/test/CodeGenHLSL/** - - clang/cmake/caches/HLSL.cmake - - clang/include/clang/Basic/HLSL*.h - - clang/include/clang/Sema/HLSL*.h - - clang/docs/HLSL/** - - clang/lib/Driver/ToolChains/HLSL* - - clang/lib/Parse/ParseHLSL.cpp - - clang/lib/Sema/HLSLExternalSemaSource.cpp - - clang/lib/Sema/SemaHLSL.cpp - - clang/lib/CodeGen/CGHLSLRuntime.* - - clang/lib/CodeGen/CGHLSLBuiltins.cpp - - llvm/include/llvm/Frontend/HLSL/** - - llvm/lib/Frontend/HLSL/** + - changed-files: + - any-glob-to-any-file: + - clang/test/ParserHLSL/** + - clang/test/SemaHLSL/** + - clang/test/AST/HLSL/** + - clang/test/CodeGenHLSL/** + - clang/cmake/caches/HLSL.cmake + - clang/include/clang/Basic/HLSL*.h + - clang/include/clang/Sema/HLSL*.h + - clang/docs/HLSL/** + - clang/lib/Driver/ToolChains/HLSL* + - clang/lib/Parse/ParseHLSL.cpp + - clang/lib/Sema/HLSLExternalSemaSource.cpp + - clang/lib/Sema/SemaHLSL.cpp + - clang/lib/CodeGen/CGHLSLRuntime.* + - clang/lib/CodeGen/CGHLSLBuiltins.cpp + - llvm/include/llvm/Frontend/HLSL/** + - llvm/lib/Frontend/HLSL/** llvm:SelectionDAG: - - llvm/include/llvm/CodeGen/SelectionDAG*.h - - llvm/include/llvm/CodeGen/SDNodeProperties.td - - llvm/include/llvm/Target/TargetSelectionDAG.td - - llvm/lib/CodeGen/SelectionDAG/** - - llvm/utils/TableGen/CodeGenDAG* - - llvm/utils/TableGen/DAGISel* - - llvm/include/llvm/CodeGen/DAGCombine.h - - llvm/include/llvm/CodeGen/ISDOpcodes.h + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/CodeGen/SelectionDAG*.h + - llvm/include/llvm/CodeGen/SDNodeProperties.td + - llvm/include/llvm/Target/TargetSelectionDAG.td + - llvm/lib/CodeGen/SelectionDAG/** + - llvm/utils/TableGen/CodeGenDAG* + - llvm/utils/TableGen/DAGISel* + - llvm/include/llvm/CodeGen/DAGCombine.h + - llvm/include/llvm/CodeGen/ISDOpcodes.h backend:DirectX: - - '**/*DirectX*' - - '**/*DXIL*' - - '**/*dxil*' - - '**/*DirectX*/**' - - '**/*DXIL*/**' - - '**/*dxil*/**' - - '**/*DXContainer*' - - '**/*DXContainer*/**' - - clang/lib/Sema/SemaDirectX.cpp - - clang/include/clang/Sema/SemaDirectX.h - - clang/include/clang/Basic/BuiltinsDirectX.td - - clang/lib/CodeGen/TargetBuiltins/DirectX.cpp - - clang/test/CodeGenDirectX/** - - clang/test/SemaDirectX/** + - changed-files: + - any-glob-to-any-file: + - '**/*DirectX*' + - '**/*DXIL*' + - '**/*dxil*' + - '**/*DirectX*/**' + - '**/*DXIL*/**' + - '**/*dxil*/**' + - '**/*DXContainer*' + - '**/*DXContainer*/**' + - clang/lib/Sema/SemaDirectX.cpp + - clang/include/clang/Sema/SemaDirectX.h + - clang/include/clang/Basic/BuiltinsDirectX.td + - clang/lib/CodeGen/TargetBuiltins/DirectX.cpp + - clang/test/CodeGenDirectX/** + - clang/test/SemaDirectX/** backend:SPIR-V: - - clang/lib/Driver/ToolChains/SPIRV.* - - clang/lib/Sema/SemaSPIRV.cpp - - clang/include/clang/Sema/SemaSPIRV.h - - clang/include/clang/Basic/BuiltinsSPIRV.td - - clang/test/CodeGenSPIRV/** - - clang/test/SemaSPIRV/** - - llvm/lib/Target/SPIRV/** - - llvm/test/CodeGen/SPIRV/** - - llvm/test/Frontend/HLSL/** - - llvm/docs/SPIRVUsage.rst + - changed-files: + - any-glob-to-any-file: + - clang/lib/Driver/ToolChains/SPIRV.* + - clang/lib/Sema/SemaSPIRV.cpp + - clang/include/clang/Sema/SemaSPIRV.h + - clang/include/clang/Basic/BuiltinsSPIRV.td + - clang/test/CodeGenSPIRV/** + - clang/test/SemaSPIRV/** + - llvm/lib/Target/SPIRV/** + - llvm/test/CodeGen/SPIRV/** + - llvm/test/Frontend/HLSL/** + - llvm/docs/SPIRVUsage.rst mlgo: - - llvm/lib/Analysis/ML* - - llvm/include/llvm/Analysis/ML* - - llvm/lib/Analysis/*Runner.cpp - - llvm/include/llvm/Analysis/*Runner.h - - llvm/unittests/Analysis/ML* - - llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp - - llvm/lib/Analysis/TrainingLogger.cpp - - llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h - - llvm/include/llvm/Analysis/Utils/TrainingLogger.h - - llvm/test/Analysis/FunctionPropertiesAnalysis/* - - llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp - - llvm/test/Transforms/inline/ML/** - - llvm/lib/CodeGen/ML* - - llvm/unittests/CodeGen/ML* - - llvm/test/CodeGen/MLRegAlloc/** - - llvm/utils/mlgo-utils/** - - llvm/docs/MLGO.rst - - llvm/include/llvm/Analysis/IR2Vec.h - - llvm/lib/Analysis/IR2Vec.cpp - - llvm/lib/Analysis/models/** - - llvm/include/llvm/CodeGen/MIR2Vec.h - - llvm/lib/CodeGen/MIR2Vec.cpp - - llvm/test/Analysis/IR2Vec/** - - llvm/test/CodeGen/MIR2Vec/** - - llvm/unittests/Analysis/IR2VecTest.cpp - - llvm/unittests/CodeGen/MIR2VecTest.cpp - - llvm/tools/llvm-ir2vec/** - - llvm/docs/CommandGuide/llvm-ir2vec.rst + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Analysis/ML* + - llvm/include/llvm/Analysis/ML* + - llvm/lib/Analysis/*Runner.cpp + - llvm/include/llvm/Analysis/*Runner.h + - llvm/unittests/Analysis/ML* + - llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp + - llvm/lib/Analysis/TrainingLogger.cpp + - llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h + - llvm/include/llvm/Analysis/Utils/TrainingLogger.h + - llvm/test/Analysis/FunctionPropertiesAnalysis/* + - llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp + - llvm/test/Transforms/inline/ML/** + - llvm/lib/CodeGen/ML* + - llvm/unittests/CodeGen/ML* + - llvm/test/CodeGen/MLRegAlloc/** + - llvm/utils/mlgo-utils/** + - llvm/docs/MLGO.rst + - llvm/include/llvm/Analysis/IR2Vec.h + - llvm/lib/Analysis/IR2Vec.cpp + - llvm/lib/Analysis/models/** + - llvm/include/llvm/CodeGen/MIR2Vec.h + - llvm/lib/CodeGen/MIR2Vec.cpp + - llvm/test/Analysis/IR2Vec/** + - llvm/test/CodeGen/MIR2Vec/** + - llvm/unittests/Analysis/IR2VecTest.cpp + - llvm/unittests/CodeGen/MIR2VecTest.cpp + - llvm/tools/llvm-ir2vec/** + - llvm/docs/CommandGuide/llvm-ir2vec.rst tools:llvm-exegesis: - - llvm/tools/llvm-exegesis/** - - llvm/test/tools/llvm-exegesis/** - - llvm/unittests/tools/llvm-exegesis/** + - changed-files: + - any-glob-to-any-file: + - llvm/tools/llvm-exegesis/** + - llvm/test/tools/llvm-exegesis/** + - llvm/unittests/tools/llvm-exegesis/** tools:llvm-reduce: - - llvm/tools/llvm-reduce/** + - changed-files: + - any-glob-to-any-file: + - llvm/tools/llvm-reduce/** platform:windows: - - lld/COFF/** - - clang/lib/Driver/MSVC.cpp - - clang/lib/Driver/MinGW.cpp - - llvm/lib/DebugInfo/CodeView/** - - llvm/lib/DebugInfo/PDB/** - - llvm/lib/WindowsDriver/** - - llvm/lib/Support/Windows/** - - llvm/lib/BinaryFormat/COFF.cpp + - changed-files: + - any-glob-to-any-file: + - lld/COFF/** + - clang/lib/Driver/MSVC.cpp + - clang/lib/Driver/MinGW.cpp + - llvm/lib/DebugInfo/CodeView/** + - llvm/lib/DebugInfo/PDB/** + - llvm/lib/WindowsDriver/** + - llvm/lib/Support/Windows/** + - llvm/lib/BinaryFormat/COFF.cpp llvm:regalloc: - - llvm/**/CodeGen/CalcSpillWeights* - - llvm/**/CodeGen/InlineSpiller* - - llvm/**/CodeGen/InterferenceCache* - - llvm/**/CodeGen/LiveInterval* - - llvm/**/CodeGen/LiveRange* - - llvm/**/CodeGen/LiveReg* - - llvm/**/CodeGen/LiveVariables* - - llvm/**/CodeGen/MachineCopyPropagation* - - llvm/**/CodeGen/PHIElimination* - - llvm/**/CodeGen/ProcessImplicitDefs.cpp - - llvm/**/CodeGen/Register* - - llvm/**/CodeGen/RegUsage* - - llvm/**/CodeGen/RenameIndependentSubregs.cpp - - llvm/**/CodeGen/SlotIndexes.h - - llvm/**/CodeGen/SpillPlacement* - - llvm/**/CodeGen/SplitKit* - - llvm/**/CodeGen/VirtRegMap.h - - llvm/include/PBQP/** - - llvm/include/PBQPRAConstraint.h - - llvm/include/llvm/CodeGen/Spiller.h - - llvm/**/*RegAlloc + - changed-files: + - any-glob-to-any-file: + - llvm/**/CodeGen/CalcSpillWeights* + - llvm/**/CodeGen/InlineSpiller* + - llvm/**/CodeGen/InterferenceCache* + - llvm/**/CodeGen/LiveInterval* + - llvm/**/CodeGen/LiveRange* + - llvm/**/CodeGen/LiveReg* + - llvm/**/CodeGen/LiveVariables* + - llvm/**/CodeGen/MachineCopyPropagation* + - llvm/**/CodeGen/PHIElimination* + - llvm/**/CodeGen/ProcessImplicitDefs.cpp + - llvm/**/CodeGen/Register* + - llvm/**/CodeGen/RegUsage* + - llvm/**/CodeGen/RenameIndependentSubregs.cpp + - llvm/**/CodeGen/SlotIndexes.h + - llvm/**/CodeGen/SpillPlacement* + - llvm/**/CodeGen/SplitKit* + - llvm/**/CodeGen/VirtRegMap.h + - llvm/include/PBQP/** + - llvm/include/PBQPRAConstraint.h + - llvm/include/llvm/CodeGen/Spiller.h + - llvm/**/*RegAlloc lldb: - - lldb/** + - changed-files: + - any-glob-to-any-file: + - lldb/** lldb-dap: - - lldb/tools/lldb-dap/** + - changed-files: + - any-glob-to-any-file: + - lldb/tools/lldb-dap/** backend:AMDGPU: - - '**/*amdgpu*' - - '**/*AMDGPU*' - - '**/*amdgpu*/**' - - '**/*AMDGPU*/**' + - changed-files: + - any-glob-to-any-file: + - '**/*amdgpu*' + - '**/*AMDGPU*' + - '**/*amdgpu*/**' + - '**/*AMDGPU*/**' backend:NVPTX: - - 'llvm/**/*nvvm*' - - 'llvm/**/*NVVM*' - - 'llvm/**/*nvptx*' - - 'llvm/**/*NVPTX*' - - 'llvm/**/*nvvm*/**' - - 'llvm/**/*NVVM*/**' - - 'llvm/**/*nvptx*/**' - - 'llvm/**/*NVPTX*/**' + - changed-files: + - any-glob-to-any-file: + - 'llvm/**/*nvvm*' + - 'llvm/**/*NVVM*' + - 'llvm/**/*nvptx*' + - 'llvm/**/*NVPTX*' + - 'llvm/**/*nvvm*/**' + - 'llvm/**/*NVVM*/**' + - 'llvm/**/*nvptx*/**' + - 'llvm/**/*NVPTX*/**' backend:MIPS: - - '**/*mips*' - - '**/*Mips*' - - '**/*mips*/**' - - '**/*Mips*/**' + - changed-files: + - any-glob-to-any-file: + - '**/*mips*' + - '**/*Mips*' + - '**/*mips*/**' + - '**/*Mips*/**' backend:RISC-V: - - '**/*riscv*' - - '**/*RISCV*' - - '**/*riscv*/**' - - '**/*RISCV*/**' + - changed-files: + - any-glob-to-any-file: + - '**/*riscv*' + - '**/*RISCV*' + - '**/*riscv*/**' + - '**/*RISCV*/**' backend:Xtensa: - - '**/*xtensa*' - - '**/*Xtensa*' - - '**/*xtensa*/**' - - '**/*Xtensa*/**' + - changed-files: + - any-glob-to-any-file: + - '**/*xtensa*' + - '**/*Xtensa*' + - '**/*xtensa*/**' + - '**/*Xtensa*/**' lld:coff: - - lld/**/COFF/** - - lld/Common/** + - changed-files: + - any-glob-to-any-file: + - lld/**/COFF/** + - lld/Common/** lld:elf: - - lld/**/ELF/** - - lld/Common/** + - changed-files: + - any-glob-to-any-file: + - lld/**/ELF/** + - lld/Common/** lld:macho: - - lld/**/MachO/** - - lld/Common/** + - changed-files: + - any-glob-to-any-file: + - lld/**/MachO/** + - lld/Common/** lld:wasm: - - lld/**/wasm/** - - lld/Common/** + - changed-files: + - any-glob-to-any-file: + - lld/**/wasm/** + - lld/Common/** backend:ARC: - - llvm/lib/Target/ARC/** - - clang/lib/Basic/Targets/ARC.h - - clang/lib/Basic/Targets/ARC.cpp - - clang/lib/CodeGen/Targets/ARC.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/ARC/** + - clang/lib/Basic/Targets/ARC.h + - clang/lib/Basic/Targets/ARC.cpp + - clang/lib/CodeGen/Targets/ARC.cpp backend:ARM: - - llvm/include/llvm/IR/IntrinsicsARM.td - - llvm/test/MC/ARM/** - - llvm/lib/Target/ARM/** - - llvm/test/CodeGen/ARM/** - - clang/lib/Basic/Targets/ARM* - - clang/lib/Driver/ToolChains/Arch/ARM.* - - clang/lib/CodeGen/Targets/ARM.cpp - - clang/include/clang/Basic/BuiltinsARM* - - llvm/test/MC/DisasemblerARM/** - - clang/include/clang/Sema/SemaARM.h - - clang/lib/Sema/SemaARM.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsARM.td + - llvm/test/MC/ARM/** + - llvm/lib/Target/ARM/** + - llvm/test/CodeGen/ARM/** + - clang/lib/Basic/Targets/ARM* + - clang/lib/Driver/ToolChains/Arch/ARM.* + - clang/lib/CodeGen/Targets/ARM.cpp + - clang/include/clang/Basic/BuiltinsARM* + - llvm/test/MC/DisasemblerARM/** + - clang/include/clang/Sema/SemaARM.h + - clang/lib/Sema/SemaARM.cpp backend:AArch64: - - llvm/include/llvm/IR/IntrinsicsAArch64.td - - llvm/test/MC/AArch64/** - - llvm/lib/Target/AArch64/** - - llvm/test/CodeGen/AArch64/** - - clang/lib/Basic/Targets/AArch64* - - clang/lib/Driver/ToolChains/Arch/AArch64.* - - clang/lib/CodeGen/Targets/AArch64.cpp - - clang/include/clang/Basic/BuiltinsAArch64* - - llvm/test/MC/Disassembler/AArch64/** - - clang/include/clang/Sema/SemaARM.h - - clang/lib/Sema/SemaARM.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsAArch64.td + - llvm/test/MC/AArch64/** + - llvm/lib/Target/AArch64/** + - llvm/test/CodeGen/AArch64/** + - clang/lib/Basic/Targets/AArch64* + - clang/lib/Driver/ToolChains/Arch/AArch64.* + - clang/lib/CodeGen/Targets/AArch64.cpp + - clang/include/clang/Basic/BuiltinsAArch64* + - llvm/test/MC/Disassembler/AArch64/** + - clang/include/clang/Sema/SemaARM.h + - clang/lib/Sema/SemaARM.cpp backend:CSKY: - - llvm/lib/Target/CSKY/** - - llvm/include/llvm/TargetParser/CSKYTargetParser.def - - llvm/include/llvm/TargetParser/CSKYTargetParser.h - - llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def - - llvm/lib/TargetParser/CSKYTargetParser.cpp - - llvm/lib/Support/CSKYAttributes.cpp - - llvm/lib/Support/CSKYAttributeParser.cpp - - clang/lib/Basic/Targets/CSKY.h - - clang/lib/Basic/Targets/CSKY.cpp - - clang/lib/CodeGen/Targets/CSKY.cpp - - clang/lib/Driver/ToolChains/CSKY* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/CSKY/** + - llvm/include/llvm/TargetParser/CSKYTargetParser.def + - llvm/include/llvm/TargetParser/CSKYTargetParser.h + - llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def + - llvm/lib/TargetParser/CSKYTargetParser.cpp + - llvm/lib/Support/CSKYAttributes.cpp + - llvm/lib/Support/CSKYAttributeParser.cpp + - clang/lib/Basic/Targets/CSKY.h + - clang/lib/Basic/Targets/CSKY.cpp + - clang/lib/CodeGen/Targets/CSKY.cpp + - clang/lib/Driver/ToolChains/CSKY* backend:Hexagon: - - clang/include/clang/Basic/BuiltinsHexagon*.def - - clang/include/clang/Sema/SemaHexagon.h - - clang/lib/Basic/Targets/Hexagon.* - - clang/lib/CodeGen/Targets/Hexagon.cpp - - clang/lib/Driver/ToolChains/Hexagon.* - - clang/lib/Sema/SemaHexagon.cpp - - lld/ELF/Arch/Hexagon.cpp - - lldb/source/Plugins/ABI/Hexagon/** - - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/** - - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def - - llvm/include/llvm/IR/IntrinsicsHexagon* - - llvm/include/llvm/Support/Hexagon* - - llvm/lib/Support/Hexagon* - - llvm/lib/Target/Hexagon/** - - llvm/test/CodeGen/Hexagon/** - - llvm/test/CodeGen/*/Hexagon/** - - llvm/test/DebugInfo/*/Hexagon/** - - llvm/test/Transforms/*/Hexagon - - llvm/test/MC/Disassembler/Hexagon/** - - llvm/test/MC/Hexagon/** - - llvm/test/tools/llvm-objdump/ELF/Hexagon/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Basic/BuiltinsHexagon*.def + - clang/include/clang/Sema/SemaHexagon.h + - clang/lib/Basic/Targets/Hexagon.* + - clang/lib/CodeGen/Targets/Hexagon.cpp + - clang/lib/Driver/ToolChains/Hexagon.* + - clang/lib/Sema/SemaHexagon.cpp + - lld/ELF/Arch/Hexagon.cpp + - lldb/source/Plugins/ABI/Hexagon/** + - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/** + - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def + - llvm/include/llvm/IR/IntrinsicsHexagon* + - llvm/include/llvm/Support/Hexagon* + - llvm/lib/Support/Hexagon* + - llvm/lib/Target/Hexagon/** + - llvm/test/CodeGen/Hexagon/** + - llvm/test/CodeGen/*/Hexagon/** + - llvm/test/DebugInfo/*/Hexagon/** + - llvm/test/Transforms/*/Hexagon + - llvm/test/MC/Disassembler/Hexagon/** + - llvm/test/MC/Hexagon/** + - llvm/test/tools/llvm-objdump/ELF/Hexagon/** backend:Lanai: - - llvm/lib/Target/Lanai/** - - clang/lib/Basic/Targets/Lanai.h - - clang/lib/Basic/Targets/Lanai.cpp - - clang/lib/CodeGen/Targets/Lanai.cpp - - clang/lib/Driver/ToolChains/Lanai* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/Lanai/** + - clang/lib/Basic/Targets/Lanai.h + - clang/lib/Basic/Targets/Lanai.cpp + - clang/lib/CodeGen/Targets/Lanai.cpp + - clang/lib/Driver/ToolChains/Lanai* backend:loongarch: - - llvm/include/llvm/IR/IntrinsicsLoongArch.td - - llvm/test/MC/LoongArch/** - - llvm/lib/Target/LoongArch/** - - llvm/test/CodeGen/LoongArch/** - - clang/lib/Basic/Targets/LoongArch* - - clang/lib/Driver/ToolChains/Arch/LoongArch.* - - clang/lib/CodeGen/Targets/LoongArch.cpp - - clang/include/clang/Basic/BuiltinsLoongArch* - - clang/include/clang/Sema/SemaLoongArch.h - - clang/lib/Sema/SemaLoongArch.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsLoongArch.td + - llvm/test/MC/LoongArch/** + - llvm/lib/Target/LoongArch/** + - llvm/test/CodeGen/LoongArch/** + - clang/lib/Basic/Targets/LoongArch* + - clang/lib/Driver/ToolChains/Arch/LoongArch.* + - clang/lib/CodeGen/Targets/LoongArch.cpp + - clang/include/clang/Basic/BuiltinsLoongArch* + - clang/include/clang/Sema/SemaLoongArch.h + - clang/lib/Sema/SemaLoongArch.cpp backend:MSP430: - - llvm/include/llvm/IR/IntrinsicsMSP430.td - - llvm/test/MC/MSP430/** - - llvm/lib/Target/MSP430/** - - llvm/test/CodeGen/MSP430/** - - clang/lib/Basic/Targets/MSP430* - - clang/lib/Driver/ToolChains/Arch/MSP430.* - - clang/lib/CodeGen/Targets/MSP430.cpp - - clang/include/clang/Basic/BuiltinsMSP430* - - llvm/test/MC/Disassembler/MSP430/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsMSP430.td + - llvm/test/MC/MSP430/** + - llvm/lib/Target/MSP430/** + - llvm/test/CodeGen/MSP430/** + - clang/lib/Basic/Targets/MSP430* + - clang/lib/Driver/ToolChains/Arch/MSP430.* + - clang/lib/CodeGen/Targets/MSP430.cpp + - clang/include/clang/Basic/BuiltinsMSP430* + - llvm/test/MC/Disassembler/MSP430/** backend:Sparc: - - llvm/include/llvm/IR/IntrinsicsSparc.td - - llvm/test/MC/Sparc/** - - llvm/lib/Target/Sparc/** - - llvm/test/CodeGen/Sparc/** - - clang/lib/Basic/Targets/Sparc* - - clang/lib/Driver/ToolChains/Arch/Sparc.* - - clang/lib/CodeGen/Targets/Sparc.cpp - - clang/include/clang/Basic/BuiltinsSparc* - - llvm/test/MC/Disassembler/Sparc/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsSparc.td + - llvm/test/MC/Sparc/** + - llvm/lib/Target/Sparc/** + - llvm/test/CodeGen/Sparc/** + - clang/lib/Basic/Targets/Sparc* + - clang/lib/Driver/ToolChains/Arch/Sparc.* + - clang/lib/CodeGen/Targets/Sparc.cpp + - clang/include/clang/Basic/BuiltinsSparc* + - llvm/test/MC/Disassembler/Sparc/** backend:WebAssembly: - - llvm/lib/Target/WebAssembly/** - - llvm/test/CodeGen/WebAssembly/** - - clang/lib/Basic/Targets/WebAssembly* - - clang/include/clang/Basic/BuiltinsWebAssembly.def - - clang/include/clang/Basic/WebAssemblyReferenceTypes.def - - clang/lib/CodeGen/Targets/WebAssembly* - - llvm/include/llvm/IR/IntinsicsWebAssembly.td - - llvm/include/llvm/Object/Wasm* - - llvm/lib/CodeGen/AsmPrinter/Wasm* - - llvm/lib/CodeGen/Wasm* - - llvm/lib/MC/MCParser/Wasm* - - llvm/lib/MC/Wasm* - - llvm/lib/ObjCopy/wasm/** - - llvm/lib/Object/Wasm* - - clang/lib/Driver/Toolchains/WebAssembly* - - clang/lib/Headers/wasm_simd128.h - - clang/test/CodeGen/WebAssembly/** - - clang/test/SemaCXX/*wasm* - - clang/test/Sema/*wasm* - - llvm/include/llvm/BinaryFormat/Wasm.h - - llvm/unittests/Target/WebAssembly/** - - llvm/test/DebugInfo/WebAssembly/** - - llvm/test/MC/WebAssembly/** - - clang/include/clang/Sema/SemaWasm.h - - clang/lib/Sema/SemaLoongWasm.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/WebAssembly/** + - llvm/test/CodeGen/WebAssembly/** + - clang/lib/Basic/Targets/WebAssembly* + - clang/include/clang/Basic/BuiltinsWebAssembly.def + - clang/include/clang/Basic/WebAssemblyReferenceTypes.def + - clang/lib/CodeGen/Targets/WebAssembly* + - llvm/include/llvm/IR/IntinsicsWebAssembly.td + - llvm/include/llvm/Object/Wasm* + - llvm/lib/CodeGen/AsmPrinter/Wasm* + - llvm/lib/CodeGen/Wasm* + - llvm/lib/MC/MCParser/Wasm* + - llvm/lib/MC/Wasm* + - llvm/lib/ObjCopy/wasm/** + - llvm/lib/Object/Wasm* + - clang/lib/Driver/Toolchains/WebAssembly* + - clang/lib/Headers/wasm_simd128.h + - clang/test/CodeGen/WebAssembly/** + - clang/test/SemaCXX/*wasm* + - clang/test/Sema/*wasm* + - llvm/include/llvm/BinaryFormat/Wasm.h + - llvm/unittests/Target/WebAssembly/** + - llvm/test/DebugInfo/WebAssembly/** + - llvm/test/MC/WebAssembly/** + - clang/include/clang/Sema/SemaWasm.h + - clang/lib/Sema/SemaLoongWasm.cpp backend:X86: - - llvm/include/llvm/IR/IntrinsicsX86.td - - llvm/lib/Target/X86/** - - llvm/test/CodeGen/X86/** - - llvm/test/MC/X86/** - - llvm/test/MC/Disassembler/X86/** - - llvm/test/Analysis/CostModel/X86/** - - llvm/test/tools/llvm-mca/X86/** - - clang/lib/Basic/Targets/X86/** - - clang/lib/Driver/ToolChains/Arch/X86.* - - clang/lib/CodeGen/Targets/X86.* - - clang/lib/Headers/** - - clang/test/CodeGen/X86/** - - clang/include/clang/Basic/BuiltinsX86* - - llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h - - llvm/include/llvm/TargetParser/X86* - - llvm/lib/TargetParser/X86* - - llvm/utils/TableGen/X86* - - clang/include/clang/Sema/SemaX86.h - - clang/lib/Sema/SemaX86.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsX86.td + - llvm/lib/Target/X86/** + - llvm/test/CodeGen/X86/** + - llvm/test/MC/X86/** + - llvm/test/MC/Disassembler/X86/** + - llvm/test/Analysis/CostModel/X86/** + - llvm/test/tools/llvm-mca/X86/** + - clang/lib/Basic/Targets/X86/** + - clang/lib/Driver/ToolChains/Arch/X86.* + - clang/lib/CodeGen/Targets/X86.* + - clang/lib/Headers/** + - clang/test/CodeGen/X86/** + - clang/include/clang/Basic/BuiltinsX86* + - llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h + - llvm/include/llvm/TargetParser/X86* + - llvm/lib/TargetParser/X86* + - llvm/utils/TableGen/X86* + - clang/include/clang/Sema/SemaX86.h + - clang/lib/Sema/SemaX86.cpp backend:PowerPC: - - llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC* - - llvm/include/llvm/BinaryFormat/XCOFF.h - - llvm/include/llvm/IR/IntrinsicsPowerPC.td - - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp - - llvm/lib/Target/PowerPC/** - - llvm/test/Analysis/**/PowerPC/** - - llvm/test/CodeGen/PowerPC/** - - llvm/test/CodeGen/MIR/PowerPC/** - - llvm/test/DebugInfo/XCOFF/** - - llvm/test/DebugInfo/PowerPC/** - - llvm/test/LTO/PowerPC/** - - llvm/test/MC/Disassembler/PowerPC/** - - llvm/test/MC/PowerPC/** - - llvm/test/MC/XCOFF/** - - llvm/test/Transforms/**/PowerPC/** - - clang/include/clang/Basic/BuiltinsPPC.* - - clang/lib/Basic/Targets/PPC.* - - clang/lib/CodeGen/Targets/PPC.cpp - - clang/lib/Driver/ToolChains/PPC* - - clang/lib/Driver/ToolChains/AIX* - - clang/lib/Driver/ToolChains/Arch/PPC.* - - clang/test/CodeGen/PowerPC/** - - clang/include/clang/Sema/SemaPPC.h - - clang/lib/Sema/SemaPPC.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC* + - llvm/include/llvm/BinaryFormat/XCOFF.h + - llvm/include/llvm/IR/IntrinsicsPowerPC.td + - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp + - llvm/lib/Target/PowerPC/** + - llvm/test/Analysis/**/PowerPC/** + - llvm/test/CodeGen/PowerPC/** + - llvm/test/CodeGen/MIR/PowerPC/** + - llvm/test/DebugInfo/XCOFF/** + - llvm/test/DebugInfo/PowerPC/** + - llvm/test/LTO/PowerPC/** + - llvm/test/MC/Disassembler/PowerPC/** + - llvm/test/MC/PowerPC/** + - llvm/test/MC/XCOFF/** + - llvm/test/Transforms/**/PowerPC/** + - clang/include/clang/Basic/BuiltinsPPC.* + - clang/lib/Basic/Targets/PPC.* + - clang/lib/CodeGen/Targets/PPC.cpp + - clang/lib/Driver/ToolChains/PPC* + - clang/lib/Driver/ToolChains/AIX* + - clang/lib/Driver/ToolChains/Arch/PPC.* + - clang/test/CodeGen/PowerPC/** + - clang/include/clang/Sema/SemaPPC.h + - clang/lib/Sema/SemaPPC.cpp backend:SystemZ: - - llvm/include/llvm/BinaryFormat/ELFRelocs/SystemZ* - - llvm/include/llvm/BinaryFormat/GOFF.h - - llvm/include/llvm/IR/IntrinsicsSystemZ.td - - llvm/lib/Target/SystemZ/** - - llvm/test/Analysis/**/SystemZ/** - - llvm/test/CodeGen/SystemZ/** - - llvm/test/DebugInfo/SystemZ/** - - llvm/test/ExecutionEngine/**/SystemZ/** - - llvm/test/MC/Disassembler/SystemZ/** - - llvm/test/MC/GOFF/** - - llvm/test/MC/SystemZ/** - - llvm/test/Transforms/**/SystemZ/** - - clang/include/clang/Basic/BuiltinsSystemZ.* - - clang/lib/Basic/Targets/SystemZ.* - - clang/lib/CodeGen/Targets/SystemZ.cpp - - clang/lib/Driver/ToolChains/ZOS* - - clang/lib/Driver/ToolChains/Arch/SystemZ.* - - clang/test/CodeGen/SystemZ/** - - clang/include/clang/Sema/SemaSystemZ.h - - clang/lib/Sema/SemaSystemZ.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/BinaryFormat/ELFRelocs/SystemZ* + - llvm/include/llvm/BinaryFormat/GOFF.h + - llvm/include/llvm/IR/IntrinsicsSystemZ.td + - llvm/lib/Target/SystemZ/** + - llvm/test/Analysis/**/SystemZ/** + - llvm/test/CodeGen/SystemZ/** + - llvm/test/DebugInfo/SystemZ/** + - llvm/test/ExecutionEngine/**/SystemZ/** + - llvm/test/MC/Disassembler/SystemZ/** + - llvm/test/MC/GOFF/** + - llvm/test/MC/SystemZ/** + - llvm/test/Transforms/**/SystemZ/** + - clang/include/clang/Basic/BuiltinsSystemZ.* + - clang/lib/Basic/Targets/SystemZ.* + - clang/lib/CodeGen/Targets/SystemZ.cpp + - clang/lib/Driver/ToolChains/ZOS* + - clang/lib/Driver/ToolChains/Arch/SystemZ.* + - clang/test/CodeGen/SystemZ/** + - clang/include/clang/Sema/SemaSystemZ.h + - clang/lib/Sema/SemaSystemZ.cpp third-party:unittests: - - third-party/unittests/** + - changed-files: + - any-glob-to-any-file: + - third-party/unittests/** third-party:benchmark: - - third-party/benchmark/** + - changed-files: + - any-glob-to-any-file: + - third-party/benchmark/** llvm:binary-utilities: - - llvm/docs/CommandGuide/llvm-* - - llvm/include/llvm/BinaryFormat/** - - llvm/include/llvm/DebugInfo/Symbolize/** - - llvm/include/llvm/ObjCopy/** - - llvm/include/llvm/Object/** - - llvm/lib/BinaryFormat/** - - llvm/lib/DebugInfo/Symbolize/** - - llvm/lib/ObjCopy/** - - llvm/lib/Object/** - - llvm/test/Object/** - - llvm/test/tools/llvm-ar/** - - llvm/test/tools/llvm-cxxfilt/** - - llvm/test/tools/llvm-nm/** - - llvm/test/tools/llvm-objcopy/** - - llvm/test/tools/llvm-objdump/** - - llvm/test/tools/llvm-readobj/** - - llvm/test/tools/llvm-size/** - - llvm/test/tools/llvm-strings/** - - llvm/test/tools/llvm-symbolizer/** - - llvm/tools/llvm-ar/** - - llvm/tools/llvm-cxxfilt/** - - llvm/tools/llvm-nm/** - - llvm/tools/llvm-objcopy/** - - llvm/tools/llvm-objdump/** - - llvm/tools/llvm-readobj/** - - llvm/tools/llvm-size/** - - llvm/tools/llvm-strings/** - - llvm/tools/llvm-symbolizer/** + - changed-files: + - any-glob-to-any-file: + - llvm/docs/CommandGuide/llvm-* + - llvm/include/llvm/BinaryFormat/** + - llvm/include/llvm/DebugInfo/Symbolize/** + - llvm/include/llvm/ObjCopy/** + - llvm/include/llvm/Object/** + - llvm/lib/BinaryFormat/** + - llvm/lib/DebugInfo/Symbolize/** + - llvm/lib/ObjCopy/** + - llvm/lib/Object/** + - llvm/test/Object/** + - llvm/test/tools/llvm-ar/** + - llvm/test/tools/llvm-cxxfilt/** + - llvm/test/tools/llvm-nm/** + - llvm/test/tools/llvm-objcopy/** + - llvm/test/tools/llvm-objdump/** + - llvm/test/tools/llvm-readobj/** + - llvm/test/tools/llvm-size/** + - llvm/test/tools/llvm-strings/** + - llvm/test/tools/llvm-symbolizer/** + - llvm/tools/llvm-ar/** + - llvm/tools/llvm-cxxfilt/** + - llvm/tools/llvm-nm/** + - llvm/tools/llvm-objcopy/** + - llvm/tools/llvm-objdump/** + - llvm/tools/llvm-readobj/** + - llvm/tools/llvm-size/** + - llvm/tools/llvm-strings/** + - llvm/tools/llvm-symbolizer/** clang:openmp: - - clang/include/clang/Basic/OpenMP* - - clang/include/clang/AST/OpenMPClause.h - - clang/include/clang/AST/DeclOpenMP.h - - clang/include/clang/AST/ExprOpenMP.h - - clang/include/clang/AST/StmtOpenMP.h - - clang/lib/AST/DeclOpenMP.cpp - - clang/lib/AST/OpenMPClause.cpp - - clang/lib/AST/StmtOpenMP.cpp - - clang/lib/Headers/openmp_wrappers/** - - clang/lib/Parse/ParseOpenMP.cpp - - clang/lib/Basic/OpenMPKinds.cpp - - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp - - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h - - clang/lib/CodeGen/CgStmtOpenMP.cpp - - clang/lib/CodeGen/CGOpenMP* - - clang/lib/Sema/SemaOpenMP.cpp - - clang/test/OpenMP/** - - clang/test/AST/ast-dump-openmp-* - - llvm/lib/Frontend/OpenMP/** - - llvm/lib/Transforms/IPO/OpenMPOpt.cpp - - llvm/include/llvm/Frontend/OpenMP/** - - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h - - llvm/unittests/Frontend/OpenMP* - - llvm/test/Transforms/OpenMP/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Basic/OpenMP* + - clang/include/clang/AST/OpenMPClause.h + - clang/include/clang/AST/DeclOpenMP.h + - clang/include/clang/AST/ExprOpenMP.h + - clang/include/clang/AST/StmtOpenMP.h + - clang/lib/AST/DeclOpenMP.cpp + - clang/lib/AST/OpenMPClause.cpp + - clang/lib/AST/StmtOpenMP.cpp + - clang/lib/Headers/openmp_wrappers/** + - clang/lib/Parse/ParseOpenMP.cpp + - clang/lib/Basic/OpenMPKinds.cpp + - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp + - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h + - clang/lib/CodeGen/CgStmtOpenMP.cpp + - clang/lib/CodeGen/CGOpenMP* + - clang/lib/Sema/SemaOpenMP.cpp + - clang/test/OpenMP/** + - clang/test/AST/ast-dump-openmp-* + - llvm/lib/Frontend/OpenMP/** + - llvm/lib/Transforms/IPO/OpenMPOpt.cpp + - llvm/include/llvm/Frontend/OpenMP/** + - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h + - llvm/unittests/Frontend/OpenMP* + - llvm/test/Transforms/OpenMP/** clang:temporal-safety: - - clang/include/clang/Analysis/Analyses/LifetimeSafety/** - - clang/lib/Analysis/LifetimeSafety/** - - clang/unittests/Analysis/LifetimeSafety* - - clang/test/Sema/*lifetime-safety* - - clang/test/Sema/*lifetime-analysis* - - clang/test/Analysis/LifetimeSafety/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Analysis/Analyses/LifetimeSafety/** + - clang/lib/Analysis/LifetimeSafety/** + - clang/unittests/Analysis/LifetimeSafety* + - clang/test/Sema/*lifetime-safety* + - clang/test/Sema/*lifetime-analysis* + - clang/test/Analysis/LifetimeSafety/** clang:as-a-library: - - clang/tools/libclang/** - - clang/bindings/** - - clang/include/clang-c/** - - clang/test/LibClang/** - - clang/unittest/libclang/** + - changed-files: + - any-glob-to-any-file: + - clang/tools/libclang/** + - clang/bindings/** + - clang/include/clang-c/** + - clang/test/LibClang/** + - clang/unittest/libclang/** openmp:libomp: - - any: ['openmp/**', '!openmp/libomptarget/**'] + - changed-files: + - any-glob-to-any-file: + - 'openmp/**' openmp:libomptarget: - - any: ['openmp/**', '!openmp/runtime/**'] + - changed-files: + - all-globs-to-all-file: + - openmp/** + - '!openmp/runtime/**'' bazel: - - utils/bazel/** + - changed-files: + - any-glob-to-any-file: + - utils/bazel/** offload: - - offload/** + - changed-files: + - any-glob-to-any-file: + - offload/** tablegen: - - llvm/include/TableGen/** - - llvm/lib/TableGen/** - - llvm/utils/TableGen/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/TableGen/** + - llvm/lib/TableGen/** + - llvm/utils/TableGen/** infrastructure: - - .ci/** + - changed-files: + - any-glob-to-any-file: + - .ci/** diff --git a/.github/workflows/new-prs.yml b/.github/workflows/new-prs.yml index e1f2e754c1a3d..dc8cd100f3e68 100644 --- a/.github/workflows/new-prs.yml +++ b/.github/workflows/new-prs.yml @@ -67,7 +67,7 @@ jobs: github.event.pull_request.draft == false && github.event.pull_request.commits < 10 steps: - - uses: actions/labeler@ac9175f8a1f3625fd0d4fb234536d26811351594 # v4.3.0 + - uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1 with: configuration-path: .github/new-prs-labeler.yml # workaround for https://github.com/actions/labeler/issues/112 From 6c02bcb24465cfacb79cfbc918a437ec2d8e3ada Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sun, 9 Nov 2025 18:22:44 -0800 Subject: [PATCH 05/11] [Github] Make Windows container use zstd (#167022) This enables much faster image unpack times. We benchmarked 20-30% improvements when testing this initially. Use skopeo to copy the image as it just works over the docker-archive/OCI container formats and does not need to unpack the image to upload it. --- .github/workflows/build-ci-container-windows.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-ci-container-windows.yml b/.github/workflows/build-ci-container-windows.yml index b6c46b70030ab..3996948bb44e0 100644 --- a/.github/workflows/build-ci-container-windows.yml +++ b/.github/workflows/build-ci-container-windows.yml @@ -56,7 +56,7 @@ jobs: - build-ci-container-windows permissions: packages: write - runs-on: windows-2022 + runs-on: ubuntu-24.04 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: @@ -66,8 +66,12 @@ jobs: name: container - name: Push Container run: | - docker load -i ${{ needs.build-ci-container-windows.outputs.container-filename }} - docker tag ${{ needs.build-ci-container-windows.outputs.container-name-tag }} ${{ needs.build-ci-container-windows.outputs.container-name }}:latest - docker login -u ${{ github.actor }} -p $env:GITHUB_TOKEN ghcr.io - docker push ${{ needs.build-ci-container-windows.outputs.container-name-tag }} - docker push ${{ needs.build-ci-container-windows.outputs.container-name }}:latest + sudo apt-get update + sudo apt-get install -y skopeo + skopeo login -u ${{ github.actor }} -p ${{ secrets.GITHUB_TOKEN }} ghcr.io + skopeo copy docker-archive:${{ needs.build-ci-container-windows.outputs.container-filename }} \ + --dest-compress-format zstd \ + docker://${{ needs.build-ci-container-windows.outputs.container-name-tag }} + skopeo copy docker-archive:${{ needs.build-ci-container-windows.outputs.container-filename }} \ + --dest-compress-format zstd \ + docker://${{ needs.build-ci-container-windows.outputs.container-name }}:latest From b3d62645158cd6f463f2e1c878f6d63b9dc4b164 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 10 Nov 2025 02:24:59 +0000 Subject: [PATCH 06/11] Revert "[Github] Update PR labeller to v6.0.1 (#167246)" This reverts commit 10da6ab5362158c1f63e0c8eaa893c55b49dc3f4. This also caused workflow failures. 1. https://github.com/llvm/llvm-project/actions/runs/19218607216 --- .github/new-prs-labeler.yml | 1942 ++++++++++++++------------------- .github/workflows/new-prs.yml | 2 +- 2 files changed, 813 insertions(+), 1131 deletions(-) diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index bb0eef5842b0f..efdc42d349195 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -1,1449 +1,1131 @@ BOLT: - - changed-files: - - any-glob-to-any-file: - - bolt/**/* + - bolt/**/* ClangIR: - - changed-files: - - any-glob-to-any-file: - - clang/include/clang/CIR/**/* - - clang/lib/CIR/**/* - - clang/tools/cir-*/**/* - - clang/test/CIR/**/* + - clang/include/clang/CIR/**/* + - clang/lib/CIR/**/* + - clang/tools/cir-*/**/* + - clang/test/CIR/**/* clang:bytecode: - - changed-files: - - any-glob-to-any-file: - - clang/docs/ConstantInterpreter.rst - - clang/lib/AST/ByteCode/**/* - - clang/test/AST/ByteCode/**/* - - clang/unittests/AST/ByteCode/**/* + - clang/docs/ConstantInterpreter.rst + - clang/lib/AST/ByteCode/**/* + - clang/test/AST/ByteCode/**/* + - clang/unittests/AST/ByteCode/**/* clang:dataflow: - - changed-files: - - any-glob-to-any-file: - - clang/include/clang/Analysis/FlowSensitive/**/* - - clang/lib/Analysis/FlowSensitive/**/* - - clang/unittests/Analysis/FlowSensitive/**/* - - clang/docs/DataFlowAnalysisIntro.md - - clang/docs/DataFlowAnalysisIntroImages/**/* + - clang/include/clang/Analysis/FlowSensitive/**/* + - clang/lib/Analysis/FlowSensitive/**/* + - clang/unittests/Analysis/FlowSensitive/**/* + - clang/docs/DataFlowAnalysisIntro.md + - clang/docs/DataFlowAnalysisIntroImages/**/* clang:frontend: - - changed-files: - - any-glob-to-any-file: - - clang/lib/AST/**/* - - clang/include/clang/AST/**/* - - clang/lib/Basic/**/* - - clang/include/clang/Basic/**/* - - clang/lib/Interpreter/**/* - - clang/include/clang/Interpreter/**/* - - clang/lib/Lex/**/* - - clang/include/clang/Lex/**/* - - clang/lib/Parse/**/* - - clang/include/clang/Parse/**/* - - clang/lib/Sema/**/* - - clang/include/clang/Sema/**/* + - clang/lib/AST/**/* + - clang/include/clang/AST/**/* + - clang/lib/Basic/**/* + - clang/include/clang/Basic/**/* + - clang/lib/Interpreter/**/* + - clang/include/clang/Interpreter/**/* + - clang/lib/Lex/**/* + - clang/include/clang/Lex/**/* + - clang/lib/Parse/**/* + - clang/include/clang/Parse/**/* + - clang/lib/Sema/**/* + - clang/include/clang/Sema/**/* clang:headers: - - changed-files: - - any-glob-to-any-file: - - clang/lib/Headers/**/* + - clang/lib/Headers/**/* compiler-rt: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/**/* + - compiler-rt/**/* flang: - - changed-files: - - any-glob-to-any-file: - - flang/**/* + - flang/**/* flang:frontend: - - changed-files: - - any-glob-to-any-file: - - flang/Parser/**/* - - flang/Evaluate/**/* - - flang/Semantics/**/* + - flang/Parser/**/* + - flang/Evaluate/**/* + - flang/Semantics/**/* libclc: - - changed-files: - - any-glob-to-any-file: - - libclc/** + - libclc/** HLSL: - - changed-files: - - any-glob-to-any-file: - - clang/*HLSL*/**/* - - clang/**/*HLSL* - - llvm/**/Frontend/HLSL/**/* + - clang/*HLSL*/**/* + - clang/**/*HLSL* + - llvm/**/Frontend/HLSL/**/* lld: - - changed-files: - - any-glob-to-any-file: - - lld/**/* + - lld/**/* llvm-lit: - - changed-files: - - any-glob-to-any-file: - - llvm/utils/lit/**/* + - llvm/utils/lit/**/* PGO: - - changed-files: - - any-glob-to-any-file: - - llvm/**/ProfileData/**/* - - llvm/**/SampleProfile* - - llvm/**/CodeGen/MIRSampleProfile* - - llvm/lib/Transforms/Instrumentation/CGProfile.cpp - - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp - - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp - - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp - - llvm/lib/Transforms/Instrumentation/PGO* - - llvm/lib/Transforms/Instrumentation/ValueProfile* - - llvm/test/Instrumentation/InstrProfiling/**/* - - llvm/test/Transforms/PGOProfile/**/* - - llvm/test/Transforms/SampleProfile/**/* - - llvm/**/llvm-profdata/**/* - - llvm/**/llvm-profgen/**/* + - llvm/**/ProfileData/**/* + - llvm/**/SampleProfile* + - llvm/**/CodeGen/MIRSampleProfile* + - llvm/lib/Transforms/Instrumentation/CGProfile.cpp + - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp + - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp + - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp + - llvm/lib/Transforms/Instrumentation/PGO* + - llvm/lib/Transforms/Instrumentation/ValueProfile* + - llvm/test/Instrumentation/InstrProfiling/**/* + - llvm/test/Transforms/PGOProfile/**/* + - llvm/test/Transforms/SampleProfile/**/* + - llvm/**/llvm-profdata/**/* + - llvm/**/llvm-profgen/**/* vectorizers: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Transforms/Vectorize/**/* - - llvm/include/llvm/Transforms/Vectorize/**/* + - llvm/lib/Transforms/Vectorize/**/* + - llvm/include/llvm/Transforms/Vectorize/**/* # IMPORTED FROM CODEOWNERS LTO: - - changed-files: - - any-glob-to-any-file: - - llvm/*/LTO/** - - llvm/*/Linker/** - - llvm/*/ThinLTO/** - - llvm/lib/Transforms/*/FunctionImport* - - llvm/tools/gold/** + - llvm/*/LTO/** + - llvm/*/Linker/** + - llvm/*/ThinLTO/** + - llvm/lib/Transforms/*/FunctionImport* + - llvm/tools/gold/** clang:driver: - - changed-files: - - any-glob-to-any-file: - - clang/*/Driver/** + - clang/*/Driver/** compiler-rt:asan: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/asan/** - - compiler-rt/include/sanitizer/asan_interface.h - - compiler-rt/test/asan/** - - compiler-rt/lib/asan_abi/** - - compiler-rt/test/asan_abi/** + - compiler-rt/lib/asan/** + - compiler-rt/include/sanitizer/asan_interface.h + - compiler-rt/test/asan/** + - compiler-rt/lib/asan_abi/** + - compiler-rt/test/asan_abi/** compiler-rt:builtins: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/builtins/** - - compiler-rt/test/builtins/** + - compiler-rt/lib/builtins/** + - compiler-rt/test/builtins/** compiler-rt:cfi: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/cfi/** - - compiler-rt/test/cfi/** + - compiler-rt/lib/cfi/** + - compiler-rt/test/cfi/** compiler-rt:fuzzer: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/fuzzer/** - - compiler-rt/include/fuzzer/** - - compiler-rt/test/fuzzer/** + - compiler-rt/lib/fuzzer/** + - compiler-rt/include/fuzzer/** + - compiler-rt/test/fuzzer/** compiler-rt:hwasan: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/hwasan/** - - compiler-rt/include/sanitizer/hwasan_interface.h - - compiler-rt/test/hwasan/** + - compiler-rt/lib/hwasan/** + - compiler-rt/include/sanitizer/hwasan_interface.h + - compiler-rt/test/hwasan/** compiler-rt:lsan: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/lsan/** - - compiler-rt/include/sanitizer/lsan_interface.h - - compiler-rt/test/lsan/** + - compiler-rt/lib/lsan/** + - compiler-rt/include/sanitizer/lsan_interface.h + - compiler-rt/test/lsan/** compiler-rt:msan: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/msan/** - - compiler-rt/include/sanitizer/msan_interface.h - - compiler-rt/test/msan/** + - compiler-rt/lib/msan/** + - compiler-rt/include/sanitizer/msan_interface.h + - compiler-rt/test/msan/** compiler-rt:sanitizer: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Transforms/Instrumentation/*Sanitizer* - - compiler-rt/lib/interception/** - - compiler-rt/lib/*san*/** - - compiler-rt/include/sanitizer/** - - compiler-rt/test/*san*/** - - compiler-rt/lib/fuzzer/** - - compiler-rt/include/fuzzer/** - - compiler-rt/test/fuzzer/** - - compiler-rt/lib/scudo/** - - compiler-rt/test/scudo/** + - llvm/lib/Transforms/Instrumentation/*Sanitizer* + - compiler-rt/lib/interception/** + - compiler-rt/lib/*san*/** + - compiler-rt/include/sanitizer/** + - compiler-rt/test/*san*/** + - compiler-rt/lib/fuzzer/** + - compiler-rt/include/fuzzer/** + - compiler-rt/test/fuzzer/** + - compiler-rt/lib/scudo/** + - compiler-rt/test/scudo/** compiler-rt:scudo: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/scudo/** - - compiler-rt/test/scudo/** + - compiler-rt/lib/scudo/** + - compiler-rt/test/scudo/** compiler-rt:tsan: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/tsan/** - - compiler-rt/include/sanitizer/tsan_interface.h - - compiler-rt/include/sanitizer/tsan_interface_atomic.h - - compiler-rt/test/tsan/** + - compiler-rt/lib/tsan/** + - compiler-rt/include/sanitizer/tsan_interface.h + - compiler-rt/include/sanitizer/tsan_interface_atomic.h + - compiler-rt/test/tsan/** compiler-rt:ubsan: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/ubsan/** - - compiler-rt/include/sanitizer/ubsan_interface.h - - compiler-rt/test/ubsan/** - - compiler-rt/lib/ubsan_minimal/** - - compiler-rt/test/ubsan_minimal/** + - compiler-rt/lib/ubsan/** + - compiler-rt/include/sanitizer/ubsan_interface.h + - compiler-rt/test/ubsan/** + - compiler-rt/lib/ubsan_minimal/** + - compiler-rt/test/ubsan_minimal/** xray: - - changed-files: - - any-glob-to-any-file: - - llvm/tools/llvm-xray/** - - compiler-rt/*/xray/** - - clang/include/clang/Basic/XRay* - - clang/lib/Basic/XRay* - - compiler-rt/*/xray/** - - llvm/include/llvm/XRay/** - - llvm/lib/XRay/** - - llvm/tools/llvm-xray/** - - llvm/unittests/XRay/** - - compiler-rt/*/xray/** + - llvm/tools/llvm-xray/** + - compiler-rt/*/xray/** + - clang/include/clang/Basic/XRay* + - clang/lib/Basic/XRay* + - compiler-rt/*/xray/** + - llvm/include/llvm/XRay/** + - llvm/lib/XRay/** + - llvm/tools/llvm-xray/** + - llvm/unittests/XRay/** + - compiler-rt/*/xray/** clang:codegen: - - changed-files: - - any-glob-to-any-file: - - clang/lib/CodeGen/** - - clang/include/clang/CodeGen/** + - clang/lib/CodeGen/** + - clang/include/clang/CodeGen/** mlir: - - changed-files: - - any-glob-to-any-file: - - mlir/** + - mlir/** mlir:core: - - changed-files: - - any-glob-to-any-file: - - mlir/include/mlir/Support/** - - mlir/lib/Support/** - - mlir/include/mlir/Parser/** - - mlir/lib/Parser/** - - mlir/include/mlir/IR/** - - mlir/lib/IR/** - - mlir/include/mlir/Bytecode/** - - mlir/lib/Bytecode/** - - mlir/include/mlir/AsmParser/** - - mlir/lib/AsmParser/** - - mlir/include/mlir/Pass/** - - mlir/lib/Pass/** - - mlir/include/mlir/Tools/** - - mlir/lib/Tools/** - - mlir/include/mlir/Reducer/** - - mlir/lib/Reducer/** - - mlir/include/mlir/Transforms/** - - mlir/lib/Transforms/** - - mlir/include/mlir/Debug/** - - mlir/lib/Debug/** - - mlir/tools/** + - mlir/include/mlir/Support/** + - mlir/lib/Support/** + - mlir/include/mlir/Parser/** + - mlir/lib/Parser/** + - mlir/include/mlir/IR/** + - mlir/lib/IR/** + - mlir/include/mlir/Bytecode/** + - mlir/lib/Bytecode/** + - mlir/include/mlir/AsmParser/** + - mlir/lib/AsmParser/** + - mlir/include/mlir/Pass/** + - mlir/lib/Pass/** + - mlir/include/mlir/Tools/** + - mlir/lib/Tools/** + - mlir/include/mlir/Reducer/** + - mlir/lib/Reducer/** + - mlir/include/mlir/Transforms/** + - mlir/lib/Transforms/** + - mlir/include/mlir/Debug/** + - mlir/lib/Debug/** + - mlir/tools/** mlir:ods: - - changed-files: - - any-glob-to-any-file: - - mlir/TableGen/** - - mlir/tblgen/** - - mlir/include/mlir/IR/*.td + - mlir/TableGen/** + - mlir/tblgen/** + - mlir/include/mlir/IR/*.td mlir:bindings: - - changed-files: - - any-glob-to-any-file: - - mlir/Bindings/** + - mlir/Bindings/** mlir:gpu: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*GPU*/** + - mlir/**/*GPU*/** mlir:amdgpu: - - changed-files: - - any-glob-to-any-file: - - mlir/**/AMDGPU/** + - mlir/**/AMDGPU/** mlir:amx: - - changed-files: - - any-glob-to-any-file: - - mlir/**/AMX/** + - mlir/**/AMX/** mlir:affine: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Affine/** + - mlir/**/Affine/** mlir:arith: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Arith/** + - mlir/**/Arith/** mlir:neon: - - changed-files: - - any-glob-to-any-file: - - mlir/**/ArmNeon/** + - mlir/**/ArmNeon/** mlir:sme: - - changed-files: - - any-glob-to-any-file: - - mlir/**/ArmSME/** + - mlir/**/ArmSME/** mlir:sve: - - changed-files: - - any-glob-to-any-file: - - mlir/**/ArmSVE/** + - mlir/**/ArmSVE/** mlir:async: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Async/** - - mlir/**/Async/** + - mlir/**/Async/** + - mlir/**/Async/** mlir:bufferization: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Bufferization/** + - mlir/**/Bufferization/** mlir:complex: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Complex/** + - mlir/**/Complex/** mlir:cf: - - changed-files: - - any-glob-to-any-file: - - mlir/**/ControlFlow/** + - mlir/**/ControlFlow/** mlir:dlti: - - changed-files: - - any-glob-to-any-file: - - mlir/**/DLTI/** + - mlir/**/DLTI/** mlir:emitc: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*EmitC*/** - - mlir/lib/Target/Cpp/** + - mlir/**/*EmitC*/** + - mlir/lib/Target/Cpp/** mlir:func: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Func/** + - mlir/**/Func/** mlir:irdl: - - changed-files: - - any-glob-to-any-file: - - mlir/**/IRDL/** + - mlir/**/IRDL/** mlir:index: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Index/** + - mlir/**/Index/** mlir:llvm: - - changed-files: - - any-glob-to-any-file: - - mlir/**/LLVM* - - mlir/**/LLVM*/** + - mlir/**/LLVM* + - mlir/**/LLVM*/** mlir:linalg: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*linalg/** - - mlir/**/*Linalg/** + - mlir/**/*linalg/** + - mlir/**/*Linalg/** mlir:mlprogram: - - changed-files: - - any-glob-to-any-file: - - mlir/**/MLProgram/** + - mlir/**/MLProgram/** mlir:math: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Math/** + - mlir/**/Math/** mlir:memref: - - changed-files: - - any-glob-to-any-file: - - mlir/**/MemRef/** + - mlir/**/MemRef/** mlir:nvgpu: - - changed-files: - - any-glob-to-any-file: - - mlir/**/NVGPU/** + - mlir/**/NVGPU/** mlir:openacc: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*OpenACC* - - mlir/**/*OpenACC*/** + - mlir/**/*OpenACC* + - mlir/**/*OpenACC*/** mlir:openmp: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*OpenMP* - - mlir/**/*OpenMP*/** + - mlir/**/*OpenMP* + - mlir/**/*OpenMP*/** mlir:pdl: - - changed-files: - - any-glob-to-any-file: - - mlir/**/PDL/** + - mlir/**/PDL/** mlir:quant: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Quant/** + - mlir/**/Quant/** mlir:scf: - - changed-files: - - any-glob-to-any-file: - - mlir/**/SCF/** + - mlir/**/SCF/** mlir:spirv: - - changed-files: - - any-glob-to-any-file: - - mlir/**/SPIRV/** - - mlir/**/SPIRVTo*/** - - mlir/**/*ToSPIRV/** - - mlir/tools/mlir-spirv-cpu-runner/** - - mlir/tools/mlir-vulkan-runner/** - - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp + - mlir/**/SPIRV/** + - mlir/**/SPIRVTo*/** + - mlir/**/*ToSPIRV/** + - mlir/tools/mlir-spirv-cpu-runner/** + - mlir/tools/mlir-vulkan-runner/** + - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp mlir:shape: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Shape/** + - mlir/**/Shape/** mlir:sparse: - - changed-files: - - any-glob-to-any-file: - - mlir/**/SparseTensor/** + - mlir/**/SparseTensor/** mlir:tensor: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Tensor/** + - mlir/**/Tensor/** mlir:tosa: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*Tosa*/** + - mlir/**/*Tosa*/** mlir:ub: - - changed-files: - - any-glob-to-any-file: - - mlir/**/UB/** + - mlir/**/UB/** mlir:vector: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*Vector/** + - mlir/**/*Vector/** mlir:execution-engine: - - changed-files: - - any-glob-to-any-file: - - mlir/**/ExecutionEngine/** + - mlir/**/ExecutionEngine/** mlir:presburger: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*Presburger*/** + - mlir/**/*Presburger*/** mlir:python: - - changed-files: - - any-glob-to-any-file: - - mlir/python/**/* + - mlir/python/**/* mlir:vectorops: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Vector/**/* + - mlir/**/Vector/**/* coroutines: - - changed-files: - - any-glob-to-any-file: - - clang/docs/DebuggingCoroutines.rst - - clang/lib/Sema/SemaCoroutine.cpp - - clang/lib/CodeGen/CGCoroutine.cpp - - clang/test/CodeGenCoroutines/** - - llvm/docs/Coroutines.rst - - llvm/include/llvm/Transforms/Coroutines/** - - llvm/lib/Transforms/Coroutines/** - - llvm/test/Transforms/Coroutines/* + - clang/docs/DebuggingCoroutines.rst + - clang/lib/Sema/SemaCoroutine.cpp + - clang/lib/CodeGen/CGCoroutine.cpp + - clang/test/CodeGenCoroutines/** + - llvm/docs/Coroutines.rst + - llvm/include/llvm/Transforms/Coroutines/** + - llvm/lib/Transforms/Coroutines/** + - llvm/test/Transforms/Coroutines/* clang:modules: - - changed-files: - - any-glob-to-any-file: - - clang/docs/StandardCPlusPlusModules.rst - - clang/include/clang/AST/AbstractBasicReader.h - - clang/include/clang/AST/AbstractBasicWriter.h - - clang/include/clang/AST/AbstractTypeReader.h - - clang/include/clang/AST/AbstractTypeWriter.h - - clang/include/clang/AST/PropertiesBase.td - - clang/include/clang/AST/ODRHash.h - - clang/include/clang/AST/TypeProperties.td - - clang/include/clang/Basic/Module.h - - clang/include/clang/Frontend/PrecompiledPreamble.h - - clang/include/clang/Lex/ModuleLoader.h - - clang/include/clang/Lex/ModuleMap.h - - clang/include/clang/Serialization/** - - clang/lib/AST/ODRHash.cpp - - clang/lib/AST/StmtProfile.cpp - - clang/lib/Basic/Module.cpp - - clang/lib/Frontend/ModuleDependencyCollector.cpp - - clang/lib/Frontend/PrecompiledPreamble.cpp - - clang/lib/Lex/ModuleMap.cpp - - clang/lib/Sema/SemaModule.cpp - - clang/lib/Serialization/** - - clang/test/CXX/module/** - - clang/test/Modules/** - - clang/unittests/Serialization/* + - clang/docs/StandardCPlusPlusModules.rst + - clang/include/clang/AST/AbstractBasicReader.h + - clang/include/clang/AST/AbstractBasicWriter.h + - clang/include/clang/AST/AbstractTypeReader.h + - clang/include/clang/AST/AbstractTypeWriter.h + - clang/include/clang/AST/PropertiesBase.td + - clang/include/clang/AST/ODRHash.h + - clang/include/clang/AST/TypeProperties.td + - clang/include/clang/Basic/Module.h + - clang/include/clang/Frontend/PrecompiledPreamble.h + - clang/include/clang/Lex/ModuleLoader.h + - clang/include/clang/Lex/ModuleMap.h + - clang/include/clang/Serialization/** + - clang/lib/AST/ODRHash.cpp + - clang/lib/AST/StmtProfile.cpp + - clang/lib/Basic/Module.cpp + - clang/lib/Frontend/ModuleDependencyCollector.cpp + - clang/lib/Frontend/PrecompiledPreamble.cpp + - clang/lib/Lex/ModuleMap.cpp + - clang/lib/Sema/SemaModule.cpp + - clang/lib/Serialization/** + - clang/test/CXX/module/** + - clang/test/Modules/** + - clang/unittests/Serialization/* clang-tidy: - - changed-files: - - any-glob-to-any-file: - - clang-tools-extra/clang-tidy/** - - clang-tools-extra/docs/clang-tidy/** - - clang-tools-extra/test/clang-tidy/** + - clang-tools-extra/clang-tidy/** + - clang-tools-extra/docs/clang-tidy/** + - clang-tools-extra/test/clang-tidy/** clang-tools-extra: - - changed-files: - - any-glob-to-any-file: - - clang-tools-extra/** + - clang-tools-extra/** tools:llvm-mca: - - changed-files: - - any-glob-to-any-file: - - llvm/tools/llvm-mca/** - - llvm/include/llvm/MCA/** - - llvm/lib/MCA/** + - llvm/tools/llvm-mca/** + - llvm/include/llvm/MCA/** + - llvm/lib/MCA/** clang: - - changed-files: - - all-globs-to-all-file: - - clang/** - - '!clang/**/Format/**' - - '!clang/tools/clang-format/**' + - any: + - clang/** + - '!clang/**/Format/**' + - '!clang/tools/clang-format/**' testing-tools: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/FileCheck/** - - llvm/lib/FileCheck/** - - llvm/test/FileCheck/** - - llvm/unittests/FileCheck/** - - llvm/utils/lit/** - - llvm/utils/split-file/** - - llvm/utils/not/** - - llvm/utils/count/** - - llvm/utils/FileCheck/** - - llvm/docs/CommandGuide/FileCheck.rst - - llvm/docs/CommandGuide/lit.rst - - llvm/docs/TestingGuide.rst - - llvm/test/Other/FileCheck-space.txt - - llvm/utils/UpdateTestChecks/** - - llvm/utils/update*_test_checks.py + - llvm/include/llvm/FileCheck/** + - llvm/lib/FileCheck/** + - llvm/test/FileCheck/** + - llvm/unittests/FileCheck/** + - llvm/utils/lit/** + - llvm/utils/split-file/** + - llvm/utils/not/** + - llvm/utils/count/** + - llvm/utils/FileCheck/** + - llvm/docs/CommandGuide/FileCheck.rst + - llvm/docs/CommandGuide/lit.rst + - llvm/docs/TestingGuide.rst + - llvm/test/Other/FileCheck-space.txt + - llvm/utils/UpdateTestChecks/** + - llvm/utils/update*_test_checks.py debuginfo: - - changed-files: - - any-glob-to-any-file: - - clang/lib/CodeGen/CGDebugInfo.* - - llvm/include/llvm/BinaryFormat/Dwarf.* - - llvm/include/llvm/CodeGen/*Debug*.* - - llvm/include/llvm/DebugInfo/** - - llvm/include/llvm/Debuginfod/** - - llvm/include/llvm/Frontend/Debug/** - - llvm/include/llvm/IR/Debug*.* - - llvm/include/llvm/Object/*Debug*.* - - llvm/include/llvm/ObjectYAML/*Debug*.* - - llvm/include/llvm/Transforms/Utils/*Debug*.* - - llvm/include/llvm-c/DebugInfo.h - - llvm/lib/BinaryFormat/Dwarf.cpp - - llvm/lib/CodeGen/AsmPrinter/*Debug*.* - - llvm/lib/CodeGen/AsmPrinter/Dwarf*.* - - llvm/lib/CodeGen/AsmPrinter/DIE*.* - - llvm/lib/CodeGen/LiveDebugValues/** - - llvm/lib/CodeGen/*Debug*.* - - llvm/lib/CodeGen/DwarfEHPrepare.cpp - - llvm/lib/DebugInfo/** - - llvm/lib/Debuginfod/** - - llvm/lib/DWARFLinkerParallel/** - - llvm/lib/IR/Debug*.cpp - - llvm/lib/MC/MCDwarf.cpp - - llvm/lib/Transforms/Utils/*Debug*.* - - llvm/test/DebugInfo/** - - llvm/test/tools/dsymutil/** - - llvm/test/tools/llvm-debuginfo-analyzer/** - - llvm/test/tools/llvm-debuginfod/** - - llvm/test/tools/llvm-debuginfod-find/** - - llvm/test/tools/llvm-dwarfdump/** - - llvm/test/tools/llvm-dwarfutil/** - - llvm/test/tools/llvm-dwp/** - - llvm/test/tools/llvm-gsymutil/** - - llvm/test/tools/llvm-pdbuti/** - - llvm/tools/dsymutil/** - - llvm/tools/llvm-debuginfo-analyzer/** - - llvm/tools/llvm-debuginfod/** - - llvm/tools/llvm-debuginfod-find/** - - llvm/tools/llvm-dwarfdump/** - - llvm/tools/llvm-dwarfutil/** - - llvm/tools/llvm-dwp/** - - llvm/tools/llvm-gsymutil/** - - llvm/tools/llvm-pdbutil/** + - clang/lib/CodeGen/CGDebugInfo.* + - llvm/include/llvm/BinaryFormat/Dwarf.* + - llvm/include/llvm/CodeGen/*Debug*.* + - llvm/include/llvm/DebugInfo/** + - llvm/include/llvm/Debuginfod/** + - llvm/include/llvm/Frontend/Debug/** + - llvm/include/llvm/IR/Debug*.* + - llvm/include/llvm/Object/*Debug*.* + - llvm/include/llvm/ObjectYAML/*Debug*.* + - llvm/include/llvm/Transforms/Utils/*Debug*.* + - llvm/include/llvm-c/DebugInfo.h + - llvm/lib/BinaryFormat/Dwarf.cpp + - llvm/lib/CodeGen/AsmPrinter/*Debug*.* + - llvm/lib/CodeGen/AsmPrinter/Dwarf*.* + - llvm/lib/CodeGen/AsmPrinter/DIE*.* + - llvm/lib/CodeGen/LiveDebugValues/** + - llvm/lib/CodeGen/*Debug*.* + - llvm/lib/CodeGen/DwarfEHPrepare.cpp + - llvm/lib/DebugInfo/** + - llvm/lib/Debuginfod/** + - llvm/lib/DWARFLinkerParallel/** + - llvm/lib/IR/Debug*.cpp + - llvm/lib/MC/MCDwarf.cpp + - llvm/lib/Transforms/Utils/*Debug*.* + - llvm/test/DebugInfo/** + - llvm/test/tools/dsymutil/** + - llvm/test/tools/llvm-debuginfo-analyzer/** + - llvm/test/tools/llvm-debuginfod/** + - llvm/test/tools/llvm-debuginfod-find/** + - llvm/test/tools/llvm-dwarfdump/** + - llvm/test/tools/llvm-dwarfutil/** + - llvm/test/tools/llvm-dwp/** + - llvm/test/tools/llvm-gsymutil/** + - llvm/test/tools/llvm-pdbuti/** + - llvm/tools/dsymutil/** + - llvm/tools/llvm-debuginfo-analyzer/** + - llvm/tools/llvm-debuginfod/** + - llvm/tools/llvm-debuginfod-find/** + - llvm/tools/llvm-dwarfdump/** + - llvm/tools/llvm-dwarfutil/** + - llvm/tools/llvm-dwp/** + - llvm/tools/llvm-gsymutil/** + - llvm/tools/llvm-pdbutil/** github:workflow: - - changed-files: - - any-glob-to-any-file: - - .github/workflows/** + - .github/workflows/** cmake: - - changed-files: - - any-glob-to-any-file: - - cmake/** - - llvm/cmake/** - - runtimes/** + - cmake/** + - llvm/cmake/** + - runtimes/** flang:driver: - - changed-files: - - any-glob-to-any-file: - - flang/tools/flang-driver/** - - flang/unittests/Frontend/** - - flang/lib/FrontendTool/** - - flang/lib/Frontend/** - - flang/include/flang/Frontend/** - - flang/include/flang/FrontendTool/** - - flang/test/Driver/** + - flang/tools/flang-driver/** + - flang/unittests/Frontend/** + - flang/lib/FrontendTool/** + - flang/lib/Frontend/** + - flang/include/flang/Frontend/** + - flang/include/flang/FrontendTool/** + - flang/test/Driver/** backend:m68k: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Target/M68k/** - - clang/lib/Basic/Targets/M68k.* - - clang/lib/CodeGen/Targets/M68k.cpp - - llvm/test/CodeGen/M68k/** - - llvm/test/MC/Disassembler/M68k/** - - llvm/test/MC/M68k/** + - llvm/lib/Target/M68k/** + - clang/lib/Basic/Targets/M68k.* + - clang/lib/CodeGen/Targets/M68k.cpp + - llvm/test/CodeGen/M68k/** + - llvm/test/MC/Disassembler/M68k/** + - llvm/test/MC/M68k/** libc++: - - changed-files: - - any-glob-to-any-file: - - libcxx/** - - .github/workflows/libcxx-* + - libcxx/** + - .github/workflows/libcxx-* libc++abi: - - changed-files: - - any-glob-to-any-file: - - libcxxabi/** + - libcxxabi/** libunwind: - - changed-files: - - any-glob-to-any-file: - - libunwind/** + - libunwind/** objectyaml: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/ObjectYAML/** - - llvm/lib/ObjectYAML/** - - llvm/test/tools/obj2yaml/** - - llvm/test/tools/yaml2obj/** - - llvm/tools/obj2yaml/** - - llvm/tools/yaml2obj/** + - llvm/include/llvm/ObjectYAML/** + - llvm/lib/ObjectYAML/** + - llvm/test/tools/obj2yaml/** + - llvm/test/tools/yaml2obj/** + - llvm/tools/obj2yaml/** + - llvm/tools/yaml2obj/** clang:analysis: - - changed-files: - - any-glob-to-any-file: - - clang/include/clang/Analysis/** - - clang/lib/Analysis/** + - clang/include/clang/Analysis/** + - clang/lib/Analysis/** clang:static analyzer: - - changed-files: - - any-glob-to-any-file: - - clang/include/clang/StaticAnalyzer/** - - clang/lib/StaticAnalyzer/** - - clang/tools/scan-build/** - - clang/utils/analyzer/** - - clang/docs/analyzer/** - - clang/test/Analysis/** + - clang/include/clang/StaticAnalyzer/** + - clang/lib/StaticAnalyzer/** + - clang/tools/scan-build/** + - clang/utils/analyzer/** + - clang/docs/analyzer/** + - clang/test/Analysis/** pgo: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Transforms/Instrumentation/CGProfile.cpp - - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp - - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp - - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp - - llvm/lib/Transforms/Instrumentation/PGO* - - llvm/lib/Transforms/Instrumentation/ValueProfile* - - llvm/test/Instrumentation/InstrProfiling/** - - llvm/test/Transforms/PGOProfile/** - - compiler-rt/lib/profile/** - - compiler-rt/lib/memprof/** - - compiler-rt/test/profile/** - - compiler-rt/test/memprof/** - - llvm/tools/llvm-profdata/** - - llvm/tools/llvm-profgen/** - - llvm/test/tools/llvm-profdata/** - - llvm/test/tools/llvm-profgen/** - - llvm/unittests/ProfileData/* + - llvm/lib/Transforms/Instrumentation/CGProfile.cpp + - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp + - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp + - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp + - llvm/lib/Transforms/Instrumentation/PGO* + - llvm/lib/Transforms/Instrumentation/ValueProfile* + - llvm/test/Instrumentation/InstrProfiling/** + - llvm/test/Transforms/PGOProfile/** + - compiler-rt/lib/profile/** + - compiler-rt/lib/memprof/** + - compiler-rt/test/profile/** + - compiler-rt/test/memprof/** + - llvm/tools/llvm-profdata/** + - llvm/tools/llvm-profgen/** + - llvm/test/tools/llvm-profdata/** + - llvm/test/tools/llvm-profgen/** + - llvm/unittests/ProfileData/* openacc: - - changed-files: - - any-glob-to-any-file: - - flang/**/OpenACC/** - - flang/include/flang/Lower/OpenACC.h - - flang/docs/OpenACC.md - - flang/lib/Parser/openacc-parsers.cpp - - flang/lib/Lower/OpenACC.cpp - - llvm/**/Frontend/OpenACC/** - - llvm/unittests/Frontend/OpenACCTest.cpp - - mlir/test/Target/LLVMIR/openacc-llvm.mlir - - mlir/**/*OpenACC/** + - flang/**/OpenACC/** + - flang/include/flang/Lower/OpenACC.h + - flang/docs/OpenACC.md + - flang/lib/Parser/openacc-parsers.cpp + - flang/lib/Lower/OpenACC.cpp + - llvm/**/Frontend/OpenACC/** + - llvm/unittests/Frontend/OpenACCTest.cpp + - mlir/test/Target/LLVMIR/openacc-llvm.mlir + - mlir/**/*OpenACC/** flang:runtime: - - changed-files: - - any-glob-to-any-file: - - flang/runtime/** + - flang/runtime/** flang:parser: - - changed-files: - - any-glob-to-any-file: - - flang/**/Parser/** + - flang/**/Parser/** flang:semantics: - - changed-files: - - any-glob-to-any-file: - - flang/**/Evaluate/** - - flang/**/Semantics/** + - flang/**/Evaluate/** + - flang/**/Semantics/** flang:fir-hlfir: - - changed-files: - - any-glob-to-any-file: - - flang/**/Lower/** - - flang/**/Optimizer/** + - flang/**/Lower/** + - flang/**/Optimizer/** flang:codegen: - - changed-files: - - any-glob-to-any-file: - - flang/**/CodeGen/** + - flang/**/CodeGen/** llvm:codegen: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/CodeGen/* - - llvm/lib/CodeGen/MIRParser/* - - llvm/lib/CodeGen/LiveDebugValues/* - - llvm/lib/CodeGen/AsmPrinter/* + - llvm/lib/CodeGen/* + - llvm/lib/CodeGen/MIRParser/* + - llvm/lib/CodeGen/LiveDebugValues/* + - llvm/lib/CodeGen/AsmPrinter/* llvm:globalisel: - - changed-files: - - any-glob-to-any-file: - - llvm/**/GlobalISel/** - - llvm/utils/TableGen/GlobalISel* + - llvm/**/GlobalISel/** + - llvm/utils/TableGen/GlobalISel* function-specialization: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/Transforms/Utils/SCCPSolver.h - - llvm/lib/Transforms/Utils/SCCPSolver.cpp - - llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h - - llvm/lib/Transforms/IPO/FunctionSpecialization.cpp - - llvm/test/Transforms/FunctionSpecialization/* + - llvm/include/llvm/Transforms/Utils/SCCPSolver.h + - llvm/lib/Transforms/Utils/SCCPSolver.cpp + - llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h + - llvm/lib/Transforms/IPO/FunctionSpecialization.cpp + - llvm/test/Transforms/FunctionSpecialization/* libc: - - changed-files: - - any-glob-to-any-file: - - libc/** - - utils/bazel/llvm-project-overlay/libc/** + - libc/** + - utils/bazel/llvm-project-overlay/libc/** clang-format: - - changed-files: - - any-glob-to-any-file: - - clang/**/Format/** - - clang/tools/clang-format/** + - clang/**/Format/** + - clang/tools/clang-format/** flang:openmp: - - changed-files: - - any-glob-to-any-file: - - flang/test/**/OpenMP/** - - flang/lib/Lower/OpenMP.cpp - - flang/lib/Semantics/resolve-directives.cpp - - flang/lib/Semantics/check-omp-structure.cpp - - flang/lib/Optimizer/Transforms/OMP* - - flang/test/Fir/convert-to-llvm-openmp-and-fir.fir - - flang/test/Lower/OpenMP/** - - flang/test/Transforms/omp* - - mlir/**/*OpenMP* - - mlir/test/Target/LLVMIR/openmp* - - llvm/lib/Frontend/OpenMP/** - - llvm/include/llvm/Frontend/OpenMP/** - - llvm/unittests/Frontend/OpenMP* + - flang/test/**/OpenMP/** + - flang/lib/Lower/OpenMP.cpp + - flang/lib/Semantics/resolve-directives.cpp + - flang/lib/Semantics/check-omp-structure.cpp + - flang/lib/Optimizer/Transforms/OMP* + - flang/test/Fir/convert-to-llvm-openmp-and-fir.fir + - flang/test/Lower/OpenMP/** + - flang/test/Transforms/omp* + - mlir/**/*OpenMP* + - mlir/test/Target/LLVMIR/openmp* + - llvm/lib/Frontend/OpenMP/** + - llvm/include/llvm/Frontend/OpenMP/** + - llvm/unittests/Frontend/OpenMP* llvm:ir: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/IR/** - - llvm/include/llvm/IR/** - - llvm/docs/LangRef.rst - - llvm/unittests/IR/** + - llvm/lib/IR/** + - llvm/include/llvm/IR/** + - llvm/docs/LangRef.rst + - llvm/unittests/IR/** llvm:SandboxIR: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/SandboxIR/** - - llvm/include/llvm/SandboxIR/** - - llvm/docs/SandboxIR.md - - llvm/unittests/SandboxIR/** + - llvm/lib/SandboxIR/** + - llvm/include/llvm/SandboxIR/** + - llvm/docs/SandboxIR.md + - llvm/unittests/SandboxIR/** llvm:analysis: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Analysis/** - - llvm/include/llvm/Analysis/** - - llvm/test/Analysis/** - - llvm/unittests/Analysis/** + - llvm/lib/Analysis/** + - llvm/include/llvm/Analysis/** + - llvm/test/Analysis/** + - llvm/unittests/Analysis/** llvm:adt: - - changed-files: - - any-glob-to-any-file: - - llvm/**/ADT/* + - llvm/**/ADT/* llvm:support: - - changed-files: - - any-glob-to-any-file: - - llvm/**/Support/** + - llvm/**/Support/** # Skip llvm/test/MC and llvm/unittests/MC, which includes target-specific directories. llvm:mc: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/MC/** - - llvm/lib/MC/** - - llvm/tools/llvm-mc/** + - llvm/include/llvm/MC/** + - llvm/lib/MC/** + - llvm/tools/llvm-mc/** llvm:transforms: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Transforms/** - - llvm/include/llvm/Transforms/** - - llvm/test/Transforms/** - - llvm/unittests/Transforms/** + - llvm/lib/Transforms/** + - llvm/include/llvm/Transforms/** + - llvm/test/Transforms/** + - llvm/unittests/Transforms/** llvm:instcombine: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Analysis/InstructionSimplify.cpp - - llvm/lib/Transforms/InstCombine/** - - llvm/include/llvm/Transforms/InstCombine/ - - llvm/include/llvm/Analysis/InstructionSimplify.h - - llvm/test/Transforms/InstCombine/** - - llvm/test/Transforms/InstSimplify/** + - llvm/lib/Analysis/InstructionSimplify.cpp + - llvm/lib/Transforms/InstCombine/** + - llvm/include/llvm/Transforms/InstCombine/ + - llvm/include/llvm/Analysis/InstructionSimplify.h + - llvm/test/Transforms/InstCombine/** + - llvm/test/Transforms/InstSimplify/** llvm:vectorcombine: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Transforms/Vectorize/VectorCombine.cpp - - llvm/test/Transforms/VectorCombine/** + - llvm/lib/Transforms/Vectorize/VectorCombine.cpp + - llvm/test/Transforms/VectorCombine/** clangd: - - changed-files: - - any-glob-to-any-file: - - clang-tools-extra/clangd/** + - clang-tools-extra/clangd/** hlsl: - - changed-files: - - any-glob-to-any-file: - - clang/test/ParserHLSL/** - - clang/test/SemaHLSL/** - - clang/test/AST/HLSL/** - - clang/test/CodeGenHLSL/** - - clang/cmake/caches/HLSL.cmake - - clang/include/clang/Basic/HLSL*.h - - clang/include/clang/Sema/HLSL*.h - - clang/docs/HLSL/** - - clang/lib/Driver/ToolChains/HLSL* - - clang/lib/Parse/ParseHLSL.cpp - - clang/lib/Sema/HLSLExternalSemaSource.cpp - - clang/lib/Sema/SemaHLSL.cpp - - clang/lib/CodeGen/CGHLSLRuntime.* - - clang/lib/CodeGen/CGHLSLBuiltins.cpp - - llvm/include/llvm/Frontend/HLSL/** - - llvm/lib/Frontend/HLSL/** + - clang/test/ParserHLSL/** + - clang/test/SemaHLSL/** + - clang/test/AST/HLSL/** + - clang/test/CodeGenHLSL/** + - clang/cmake/caches/HLSL.cmake + - clang/include/clang/Basic/HLSL*.h + - clang/include/clang/Sema/HLSL*.h + - clang/docs/HLSL/** + - clang/lib/Driver/ToolChains/HLSL* + - clang/lib/Parse/ParseHLSL.cpp + - clang/lib/Sema/HLSLExternalSemaSource.cpp + - clang/lib/Sema/SemaHLSL.cpp + - clang/lib/CodeGen/CGHLSLRuntime.* + - clang/lib/CodeGen/CGHLSLBuiltins.cpp + - llvm/include/llvm/Frontend/HLSL/** + - llvm/lib/Frontend/HLSL/** llvm:SelectionDAG: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/CodeGen/SelectionDAG*.h - - llvm/include/llvm/CodeGen/SDNodeProperties.td - - llvm/include/llvm/Target/TargetSelectionDAG.td - - llvm/lib/CodeGen/SelectionDAG/** - - llvm/utils/TableGen/CodeGenDAG* - - llvm/utils/TableGen/DAGISel* - - llvm/include/llvm/CodeGen/DAGCombine.h - - llvm/include/llvm/CodeGen/ISDOpcodes.h + - llvm/include/llvm/CodeGen/SelectionDAG*.h + - llvm/include/llvm/CodeGen/SDNodeProperties.td + - llvm/include/llvm/Target/TargetSelectionDAG.td + - llvm/lib/CodeGen/SelectionDAG/** + - llvm/utils/TableGen/CodeGenDAG* + - llvm/utils/TableGen/DAGISel* + - llvm/include/llvm/CodeGen/DAGCombine.h + - llvm/include/llvm/CodeGen/ISDOpcodes.h backend:DirectX: - - changed-files: - - any-glob-to-any-file: - - '**/*DirectX*' - - '**/*DXIL*' - - '**/*dxil*' - - '**/*DirectX*/**' - - '**/*DXIL*/**' - - '**/*dxil*/**' - - '**/*DXContainer*' - - '**/*DXContainer*/**' - - clang/lib/Sema/SemaDirectX.cpp - - clang/include/clang/Sema/SemaDirectX.h - - clang/include/clang/Basic/BuiltinsDirectX.td - - clang/lib/CodeGen/TargetBuiltins/DirectX.cpp - - clang/test/CodeGenDirectX/** - - clang/test/SemaDirectX/** + - '**/*DirectX*' + - '**/*DXIL*' + - '**/*dxil*' + - '**/*DirectX*/**' + - '**/*DXIL*/**' + - '**/*dxil*/**' + - '**/*DXContainer*' + - '**/*DXContainer*/**' + - clang/lib/Sema/SemaDirectX.cpp + - clang/include/clang/Sema/SemaDirectX.h + - clang/include/clang/Basic/BuiltinsDirectX.td + - clang/lib/CodeGen/TargetBuiltins/DirectX.cpp + - clang/test/CodeGenDirectX/** + - clang/test/SemaDirectX/** backend:SPIR-V: - - changed-files: - - any-glob-to-any-file: - - clang/lib/Driver/ToolChains/SPIRV.* - - clang/lib/Sema/SemaSPIRV.cpp - - clang/include/clang/Sema/SemaSPIRV.h - - clang/include/clang/Basic/BuiltinsSPIRV.td - - clang/test/CodeGenSPIRV/** - - clang/test/SemaSPIRV/** - - llvm/lib/Target/SPIRV/** - - llvm/test/CodeGen/SPIRV/** - - llvm/test/Frontend/HLSL/** - - llvm/docs/SPIRVUsage.rst + - clang/lib/Driver/ToolChains/SPIRV.* + - clang/lib/Sema/SemaSPIRV.cpp + - clang/include/clang/Sema/SemaSPIRV.h + - clang/include/clang/Basic/BuiltinsSPIRV.td + - clang/test/CodeGenSPIRV/** + - clang/test/SemaSPIRV/** + - llvm/lib/Target/SPIRV/** + - llvm/test/CodeGen/SPIRV/** + - llvm/test/Frontend/HLSL/** + - llvm/docs/SPIRVUsage.rst mlgo: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Analysis/ML* - - llvm/include/llvm/Analysis/ML* - - llvm/lib/Analysis/*Runner.cpp - - llvm/include/llvm/Analysis/*Runner.h - - llvm/unittests/Analysis/ML* - - llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp - - llvm/lib/Analysis/TrainingLogger.cpp - - llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h - - llvm/include/llvm/Analysis/Utils/TrainingLogger.h - - llvm/test/Analysis/FunctionPropertiesAnalysis/* - - llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp - - llvm/test/Transforms/inline/ML/** - - llvm/lib/CodeGen/ML* - - llvm/unittests/CodeGen/ML* - - llvm/test/CodeGen/MLRegAlloc/** - - llvm/utils/mlgo-utils/** - - llvm/docs/MLGO.rst - - llvm/include/llvm/Analysis/IR2Vec.h - - llvm/lib/Analysis/IR2Vec.cpp - - llvm/lib/Analysis/models/** - - llvm/include/llvm/CodeGen/MIR2Vec.h - - llvm/lib/CodeGen/MIR2Vec.cpp - - llvm/test/Analysis/IR2Vec/** - - llvm/test/CodeGen/MIR2Vec/** - - llvm/unittests/Analysis/IR2VecTest.cpp - - llvm/unittests/CodeGen/MIR2VecTest.cpp - - llvm/tools/llvm-ir2vec/** - - llvm/docs/CommandGuide/llvm-ir2vec.rst + - llvm/lib/Analysis/ML* + - llvm/include/llvm/Analysis/ML* + - llvm/lib/Analysis/*Runner.cpp + - llvm/include/llvm/Analysis/*Runner.h + - llvm/unittests/Analysis/ML* + - llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp + - llvm/lib/Analysis/TrainingLogger.cpp + - llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h + - llvm/include/llvm/Analysis/Utils/TrainingLogger.h + - llvm/test/Analysis/FunctionPropertiesAnalysis/* + - llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp + - llvm/test/Transforms/inline/ML/** + - llvm/lib/CodeGen/ML* + - llvm/unittests/CodeGen/ML* + - llvm/test/CodeGen/MLRegAlloc/** + - llvm/utils/mlgo-utils/** + - llvm/docs/MLGO.rst + - llvm/include/llvm/Analysis/IR2Vec.h + - llvm/lib/Analysis/IR2Vec.cpp + - llvm/lib/Analysis/models/** + - llvm/include/llvm/CodeGen/MIR2Vec.h + - llvm/lib/CodeGen/MIR2Vec.cpp + - llvm/test/Analysis/IR2Vec/** + - llvm/test/CodeGen/MIR2Vec/** + - llvm/unittests/Analysis/IR2VecTest.cpp + - llvm/unittests/CodeGen/MIR2VecTest.cpp + - llvm/tools/llvm-ir2vec/** + - llvm/docs/CommandGuide/llvm-ir2vec.rst tools:llvm-exegesis: - - changed-files: - - any-glob-to-any-file: - - llvm/tools/llvm-exegesis/** - - llvm/test/tools/llvm-exegesis/** - - llvm/unittests/tools/llvm-exegesis/** + - llvm/tools/llvm-exegesis/** + - llvm/test/tools/llvm-exegesis/** + - llvm/unittests/tools/llvm-exegesis/** tools:llvm-reduce: - - changed-files: - - any-glob-to-any-file: - - llvm/tools/llvm-reduce/** + - llvm/tools/llvm-reduce/** platform:windows: - - changed-files: - - any-glob-to-any-file: - - lld/COFF/** - - clang/lib/Driver/MSVC.cpp - - clang/lib/Driver/MinGW.cpp - - llvm/lib/DebugInfo/CodeView/** - - llvm/lib/DebugInfo/PDB/** - - llvm/lib/WindowsDriver/** - - llvm/lib/Support/Windows/** - - llvm/lib/BinaryFormat/COFF.cpp + - lld/COFF/** + - clang/lib/Driver/MSVC.cpp + - clang/lib/Driver/MinGW.cpp + - llvm/lib/DebugInfo/CodeView/** + - llvm/lib/DebugInfo/PDB/** + - llvm/lib/WindowsDriver/** + - llvm/lib/Support/Windows/** + - llvm/lib/BinaryFormat/COFF.cpp llvm:regalloc: - - changed-files: - - any-glob-to-any-file: - - llvm/**/CodeGen/CalcSpillWeights* - - llvm/**/CodeGen/InlineSpiller* - - llvm/**/CodeGen/InterferenceCache* - - llvm/**/CodeGen/LiveInterval* - - llvm/**/CodeGen/LiveRange* - - llvm/**/CodeGen/LiveReg* - - llvm/**/CodeGen/LiveVariables* - - llvm/**/CodeGen/MachineCopyPropagation* - - llvm/**/CodeGen/PHIElimination* - - llvm/**/CodeGen/ProcessImplicitDefs.cpp - - llvm/**/CodeGen/Register* - - llvm/**/CodeGen/RegUsage* - - llvm/**/CodeGen/RenameIndependentSubregs.cpp - - llvm/**/CodeGen/SlotIndexes.h - - llvm/**/CodeGen/SpillPlacement* - - llvm/**/CodeGen/SplitKit* - - llvm/**/CodeGen/VirtRegMap.h - - llvm/include/PBQP/** - - llvm/include/PBQPRAConstraint.h - - llvm/include/llvm/CodeGen/Spiller.h - - llvm/**/*RegAlloc + - llvm/**/CodeGen/CalcSpillWeights* + - llvm/**/CodeGen/InlineSpiller* + - llvm/**/CodeGen/InterferenceCache* + - llvm/**/CodeGen/LiveInterval* + - llvm/**/CodeGen/LiveRange* + - llvm/**/CodeGen/LiveReg* + - llvm/**/CodeGen/LiveVariables* + - llvm/**/CodeGen/MachineCopyPropagation* + - llvm/**/CodeGen/PHIElimination* + - llvm/**/CodeGen/ProcessImplicitDefs.cpp + - llvm/**/CodeGen/Register* + - llvm/**/CodeGen/RegUsage* + - llvm/**/CodeGen/RenameIndependentSubregs.cpp + - llvm/**/CodeGen/SlotIndexes.h + - llvm/**/CodeGen/SpillPlacement* + - llvm/**/CodeGen/SplitKit* + - llvm/**/CodeGen/VirtRegMap.h + - llvm/include/PBQP/** + - llvm/include/PBQPRAConstraint.h + - llvm/include/llvm/CodeGen/Spiller.h + - llvm/**/*RegAlloc lldb: - - changed-files: - - any-glob-to-any-file: - - lldb/** + - lldb/** lldb-dap: - - changed-files: - - any-glob-to-any-file: - - lldb/tools/lldb-dap/** + - lldb/tools/lldb-dap/** backend:AMDGPU: - - changed-files: - - any-glob-to-any-file: - - '**/*amdgpu*' - - '**/*AMDGPU*' - - '**/*amdgpu*/**' - - '**/*AMDGPU*/**' + - '**/*amdgpu*' + - '**/*AMDGPU*' + - '**/*amdgpu*/**' + - '**/*AMDGPU*/**' backend:NVPTX: - - changed-files: - - any-glob-to-any-file: - - 'llvm/**/*nvvm*' - - 'llvm/**/*NVVM*' - - 'llvm/**/*nvptx*' - - 'llvm/**/*NVPTX*' - - 'llvm/**/*nvvm*/**' - - 'llvm/**/*NVVM*/**' - - 'llvm/**/*nvptx*/**' - - 'llvm/**/*NVPTX*/**' + - 'llvm/**/*nvvm*' + - 'llvm/**/*NVVM*' + - 'llvm/**/*nvptx*' + - 'llvm/**/*NVPTX*' + - 'llvm/**/*nvvm*/**' + - 'llvm/**/*NVVM*/**' + - 'llvm/**/*nvptx*/**' + - 'llvm/**/*NVPTX*/**' backend:MIPS: - - changed-files: - - any-glob-to-any-file: - - '**/*mips*' - - '**/*Mips*' - - '**/*mips*/**' - - '**/*Mips*/**' + - '**/*mips*' + - '**/*Mips*' + - '**/*mips*/**' + - '**/*Mips*/**' backend:RISC-V: - - changed-files: - - any-glob-to-any-file: - - '**/*riscv*' - - '**/*RISCV*' - - '**/*riscv*/**' - - '**/*RISCV*/**' + - '**/*riscv*' + - '**/*RISCV*' + - '**/*riscv*/**' + - '**/*RISCV*/**' backend:Xtensa: - - changed-files: - - any-glob-to-any-file: - - '**/*xtensa*' - - '**/*Xtensa*' - - '**/*xtensa*/**' - - '**/*Xtensa*/**' + - '**/*xtensa*' + - '**/*Xtensa*' + - '**/*xtensa*/**' + - '**/*Xtensa*/**' lld:coff: - - changed-files: - - any-glob-to-any-file: - - lld/**/COFF/** - - lld/Common/** + - lld/**/COFF/** + - lld/Common/** lld:elf: - - changed-files: - - any-glob-to-any-file: - - lld/**/ELF/** - - lld/Common/** + - lld/**/ELF/** + - lld/Common/** lld:macho: - - changed-files: - - any-glob-to-any-file: - - lld/**/MachO/** - - lld/Common/** + - lld/**/MachO/** + - lld/Common/** lld:wasm: - - changed-files: - - any-glob-to-any-file: - - lld/**/wasm/** - - lld/Common/** + - lld/**/wasm/** + - lld/Common/** backend:ARC: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Target/ARC/** - - clang/lib/Basic/Targets/ARC.h - - clang/lib/Basic/Targets/ARC.cpp - - clang/lib/CodeGen/Targets/ARC.cpp + - llvm/lib/Target/ARC/** + - clang/lib/Basic/Targets/ARC.h + - clang/lib/Basic/Targets/ARC.cpp + - clang/lib/CodeGen/Targets/ARC.cpp backend:ARM: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/IR/IntrinsicsARM.td - - llvm/test/MC/ARM/** - - llvm/lib/Target/ARM/** - - llvm/test/CodeGen/ARM/** - - clang/lib/Basic/Targets/ARM* - - clang/lib/Driver/ToolChains/Arch/ARM.* - - clang/lib/CodeGen/Targets/ARM.cpp - - clang/include/clang/Basic/BuiltinsARM* - - llvm/test/MC/DisasemblerARM/** - - clang/include/clang/Sema/SemaARM.h - - clang/lib/Sema/SemaARM.cpp + - llvm/include/llvm/IR/IntrinsicsARM.td + - llvm/test/MC/ARM/** + - llvm/lib/Target/ARM/** + - llvm/test/CodeGen/ARM/** + - clang/lib/Basic/Targets/ARM* + - clang/lib/Driver/ToolChains/Arch/ARM.* + - clang/lib/CodeGen/Targets/ARM.cpp + - clang/include/clang/Basic/BuiltinsARM* + - llvm/test/MC/DisasemblerARM/** + - clang/include/clang/Sema/SemaARM.h + - clang/lib/Sema/SemaARM.cpp backend:AArch64: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/IR/IntrinsicsAArch64.td - - llvm/test/MC/AArch64/** - - llvm/lib/Target/AArch64/** - - llvm/test/CodeGen/AArch64/** - - clang/lib/Basic/Targets/AArch64* - - clang/lib/Driver/ToolChains/Arch/AArch64.* - - clang/lib/CodeGen/Targets/AArch64.cpp - - clang/include/clang/Basic/BuiltinsAArch64* - - llvm/test/MC/Disassembler/AArch64/** - - clang/include/clang/Sema/SemaARM.h - - clang/lib/Sema/SemaARM.cpp + - llvm/include/llvm/IR/IntrinsicsAArch64.td + - llvm/test/MC/AArch64/** + - llvm/lib/Target/AArch64/** + - llvm/test/CodeGen/AArch64/** + - clang/lib/Basic/Targets/AArch64* + - clang/lib/Driver/ToolChains/Arch/AArch64.* + - clang/lib/CodeGen/Targets/AArch64.cpp + - clang/include/clang/Basic/BuiltinsAArch64* + - llvm/test/MC/Disassembler/AArch64/** + - clang/include/clang/Sema/SemaARM.h + - clang/lib/Sema/SemaARM.cpp backend:CSKY: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Target/CSKY/** - - llvm/include/llvm/TargetParser/CSKYTargetParser.def - - llvm/include/llvm/TargetParser/CSKYTargetParser.h - - llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def - - llvm/lib/TargetParser/CSKYTargetParser.cpp - - llvm/lib/Support/CSKYAttributes.cpp - - llvm/lib/Support/CSKYAttributeParser.cpp - - clang/lib/Basic/Targets/CSKY.h - - clang/lib/Basic/Targets/CSKY.cpp - - clang/lib/CodeGen/Targets/CSKY.cpp - - clang/lib/Driver/ToolChains/CSKY* + - llvm/lib/Target/CSKY/** + - llvm/include/llvm/TargetParser/CSKYTargetParser.def + - llvm/include/llvm/TargetParser/CSKYTargetParser.h + - llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def + - llvm/lib/TargetParser/CSKYTargetParser.cpp + - llvm/lib/Support/CSKYAttributes.cpp + - llvm/lib/Support/CSKYAttributeParser.cpp + - clang/lib/Basic/Targets/CSKY.h + - clang/lib/Basic/Targets/CSKY.cpp + - clang/lib/CodeGen/Targets/CSKY.cpp + - clang/lib/Driver/ToolChains/CSKY* backend:Hexagon: - - changed-files: - - any-glob-to-any-file: - - clang/include/clang/Basic/BuiltinsHexagon*.def - - clang/include/clang/Sema/SemaHexagon.h - - clang/lib/Basic/Targets/Hexagon.* - - clang/lib/CodeGen/Targets/Hexagon.cpp - - clang/lib/Driver/ToolChains/Hexagon.* - - clang/lib/Sema/SemaHexagon.cpp - - lld/ELF/Arch/Hexagon.cpp - - lldb/source/Plugins/ABI/Hexagon/** - - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/** - - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def - - llvm/include/llvm/IR/IntrinsicsHexagon* - - llvm/include/llvm/Support/Hexagon* - - llvm/lib/Support/Hexagon* - - llvm/lib/Target/Hexagon/** - - llvm/test/CodeGen/Hexagon/** - - llvm/test/CodeGen/*/Hexagon/** - - llvm/test/DebugInfo/*/Hexagon/** - - llvm/test/Transforms/*/Hexagon - - llvm/test/MC/Disassembler/Hexagon/** - - llvm/test/MC/Hexagon/** - - llvm/test/tools/llvm-objdump/ELF/Hexagon/** + - clang/include/clang/Basic/BuiltinsHexagon*.def + - clang/include/clang/Sema/SemaHexagon.h + - clang/lib/Basic/Targets/Hexagon.* + - clang/lib/CodeGen/Targets/Hexagon.cpp + - clang/lib/Driver/ToolChains/Hexagon.* + - clang/lib/Sema/SemaHexagon.cpp + - lld/ELF/Arch/Hexagon.cpp + - lldb/source/Plugins/ABI/Hexagon/** + - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/** + - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def + - llvm/include/llvm/IR/IntrinsicsHexagon* + - llvm/include/llvm/Support/Hexagon* + - llvm/lib/Support/Hexagon* + - llvm/lib/Target/Hexagon/** + - llvm/test/CodeGen/Hexagon/** + - llvm/test/CodeGen/*/Hexagon/** + - llvm/test/DebugInfo/*/Hexagon/** + - llvm/test/Transforms/*/Hexagon + - llvm/test/MC/Disassembler/Hexagon/** + - llvm/test/MC/Hexagon/** + - llvm/test/tools/llvm-objdump/ELF/Hexagon/** backend:Lanai: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Target/Lanai/** - - clang/lib/Basic/Targets/Lanai.h - - clang/lib/Basic/Targets/Lanai.cpp - - clang/lib/CodeGen/Targets/Lanai.cpp - - clang/lib/Driver/ToolChains/Lanai* + - llvm/lib/Target/Lanai/** + - clang/lib/Basic/Targets/Lanai.h + - clang/lib/Basic/Targets/Lanai.cpp + - clang/lib/CodeGen/Targets/Lanai.cpp + - clang/lib/Driver/ToolChains/Lanai* backend:loongarch: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/IR/IntrinsicsLoongArch.td - - llvm/test/MC/LoongArch/** - - llvm/lib/Target/LoongArch/** - - llvm/test/CodeGen/LoongArch/** - - clang/lib/Basic/Targets/LoongArch* - - clang/lib/Driver/ToolChains/Arch/LoongArch.* - - clang/lib/CodeGen/Targets/LoongArch.cpp - - clang/include/clang/Basic/BuiltinsLoongArch* - - clang/include/clang/Sema/SemaLoongArch.h - - clang/lib/Sema/SemaLoongArch.cpp + - llvm/include/llvm/IR/IntrinsicsLoongArch.td + - llvm/test/MC/LoongArch/** + - llvm/lib/Target/LoongArch/** + - llvm/test/CodeGen/LoongArch/** + - clang/lib/Basic/Targets/LoongArch* + - clang/lib/Driver/ToolChains/Arch/LoongArch.* + - clang/lib/CodeGen/Targets/LoongArch.cpp + - clang/include/clang/Basic/BuiltinsLoongArch* + - clang/include/clang/Sema/SemaLoongArch.h + - clang/lib/Sema/SemaLoongArch.cpp backend:MSP430: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/IR/IntrinsicsMSP430.td - - llvm/test/MC/MSP430/** - - llvm/lib/Target/MSP430/** - - llvm/test/CodeGen/MSP430/** - - clang/lib/Basic/Targets/MSP430* - - clang/lib/Driver/ToolChains/Arch/MSP430.* - - clang/lib/CodeGen/Targets/MSP430.cpp - - clang/include/clang/Basic/BuiltinsMSP430* - - llvm/test/MC/Disassembler/MSP430/** + - llvm/include/llvm/IR/IntrinsicsMSP430.td + - llvm/test/MC/MSP430/** + - llvm/lib/Target/MSP430/** + - llvm/test/CodeGen/MSP430/** + - clang/lib/Basic/Targets/MSP430* + - clang/lib/Driver/ToolChains/Arch/MSP430.* + - clang/lib/CodeGen/Targets/MSP430.cpp + - clang/include/clang/Basic/BuiltinsMSP430* + - llvm/test/MC/Disassembler/MSP430/** backend:Sparc: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/IR/IntrinsicsSparc.td - - llvm/test/MC/Sparc/** - - llvm/lib/Target/Sparc/** - - llvm/test/CodeGen/Sparc/** - - clang/lib/Basic/Targets/Sparc* - - clang/lib/Driver/ToolChains/Arch/Sparc.* - - clang/lib/CodeGen/Targets/Sparc.cpp - - clang/include/clang/Basic/BuiltinsSparc* - - llvm/test/MC/Disassembler/Sparc/** + - llvm/include/llvm/IR/IntrinsicsSparc.td + - llvm/test/MC/Sparc/** + - llvm/lib/Target/Sparc/** + - llvm/test/CodeGen/Sparc/** + - clang/lib/Basic/Targets/Sparc* + - clang/lib/Driver/ToolChains/Arch/Sparc.* + - clang/lib/CodeGen/Targets/Sparc.cpp + - clang/include/clang/Basic/BuiltinsSparc* + - llvm/test/MC/Disassembler/Sparc/** backend:WebAssembly: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Target/WebAssembly/** - - llvm/test/CodeGen/WebAssembly/** - - clang/lib/Basic/Targets/WebAssembly* - - clang/include/clang/Basic/BuiltinsWebAssembly.def - - clang/include/clang/Basic/WebAssemblyReferenceTypes.def - - clang/lib/CodeGen/Targets/WebAssembly* - - llvm/include/llvm/IR/IntinsicsWebAssembly.td - - llvm/include/llvm/Object/Wasm* - - llvm/lib/CodeGen/AsmPrinter/Wasm* - - llvm/lib/CodeGen/Wasm* - - llvm/lib/MC/MCParser/Wasm* - - llvm/lib/MC/Wasm* - - llvm/lib/ObjCopy/wasm/** - - llvm/lib/Object/Wasm* - - clang/lib/Driver/Toolchains/WebAssembly* - - clang/lib/Headers/wasm_simd128.h - - clang/test/CodeGen/WebAssembly/** - - clang/test/SemaCXX/*wasm* - - clang/test/Sema/*wasm* - - llvm/include/llvm/BinaryFormat/Wasm.h - - llvm/unittests/Target/WebAssembly/** - - llvm/test/DebugInfo/WebAssembly/** - - llvm/test/MC/WebAssembly/** - - clang/include/clang/Sema/SemaWasm.h - - clang/lib/Sema/SemaLoongWasm.cpp + - llvm/lib/Target/WebAssembly/** + - llvm/test/CodeGen/WebAssembly/** + - clang/lib/Basic/Targets/WebAssembly* + - clang/include/clang/Basic/BuiltinsWebAssembly.def + - clang/include/clang/Basic/WebAssemblyReferenceTypes.def + - clang/lib/CodeGen/Targets/WebAssembly* + - llvm/include/llvm/IR/IntinsicsWebAssembly.td + - llvm/include/llvm/Object/Wasm* + - llvm/lib/CodeGen/AsmPrinter/Wasm* + - llvm/lib/CodeGen/Wasm* + - llvm/lib/MC/MCParser/Wasm* + - llvm/lib/MC/Wasm* + - llvm/lib/ObjCopy/wasm/** + - llvm/lib/Object/Wasm* + - clang/lib/Driver/Toolchains/WebAssembly* + - clang/lib/Headers/wasm_simd128.h + - clang/test/CodeGen/WebAssembly/** + - clang/test/SemaCXX/*wasm* + - clang/test/Sema/*wasm* + - llvm/include/llvm/BinaryFormat/Wasm.h + - llvm/unittests/Target/WebAssembly/** + - llvm/test/DebugInfo/WebAssembly/** + - llvm/test/MC/WebAssembly/** + - clang/include/clang/Sema/SemaWasm.h + - clang/lib/Sema/SemaLoongWasm.cpp backend:X86: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/IR/IntrinsicsX86.td - - llvm/lib/Target/X86/** - - llvm/test/CodeGen/X86/** - - llvm/test/MC/X86/** - - llvm/test/MC/Disassembler/X86/** - - llvm/test/Analysis/CostModel/X86/** - - llvm/test/tools/llvm-mca/X86/** - - clang/lib/Basic/Targets/X86/** - - clang/lib/Driver/ToolChains/Arch/X86.* - - clang/lib/CodeGen/Targets/X86.* - - clang/lib/Headers/** - - clang/test/CodeGen/X86/** - - clang/include/clang/Basic/BuiltinsX86* - - llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h - - llvm/include/llvm/TargetParser/X86* - - llvm/lib/TargetParser/X86* - - llvm/utils/TableGen/X86* - - clang/include/clang/Sema/SemaX86.h - - clang/lib/Sema/SemaX86.cpp + - llvm/include/llvm/IR/IntrinsicsX86.td + - llvm/lib/Target/X86/** + - llvm/test/CodeGen/X86/** + - llvm/test/MC/X86/** + - llvm/test/MC/Disassembler/X86/** + - llvm/test/Analysis/CostModel/X86/** + - llvm/test/tools/llvm-mca/X86/** + - clang/lib/Basic/Targets/X86/** + - clang/lib/Driver/ToolChains/Arch/X86.* + - clang/lib/CodeGen/Targets/X86.* + - clang/lib/Headers/** + - clang/test/CodeGen/X86/** + - clang/include/clang/Basic/BuiltinsX86* + - llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h + - llvm/include/llvm/TargetParser/X86* + - llvm/lib/TargetParser/X86* + - llvm/utils/TableGen/X86* + - clang/include/clang/Sema/SemaX86.h + - clang/lib/Sema/SemaX86.cpp backend:PowerPC: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC* - - llvm/include/llvm/BinaryFormat/XCOFF.h - - llvm/include/llvm/IR/IntrinsicsPowerPC.td - - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp - - llvm/lib/Target/PowerPC/** - - llvm/test/Analysis/**/PowerPC/** - - llvm/test/CodeGen/PowerPC/** - - llvm/test/CodeGen/MIR/PowerPC/** - - llvm/test/DebugInfo/XCOFF/** - - llvm/test/DebugInfo/PowerPC/** - - llvm/test/LTO/PowerPC/** - - llvm/test/MC/Disassembler/PowerPC/** - - llvm/test/MC/PowerPC/** - - llvm/test/MC/XCOFF/** - - llvm/test/Transforms/**/PowerPC/** - - clang/include/clang/Basic/BuiltinsPPC.* - - clang/lib/Basic/Targets/PPC.* - - clang/lib/CodeGen/Targets/PPC.cpp - - clang/lib/Driver/ToolChains/PPC* - - clang/lib/Driver/ToolChains/AIX* - - clang/lib/Driver/ToolChains/Arch/PPC.* - - clang/test/CodeGen/PowerPC/** - - clang/include/clang/Sema/SemaPPC.h - - clang/lib/Sema/SemaPPC.cpp + - llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC* + - llvm/include/llvm/BinaryFormat/XCOFF.h + - llvm/include/llvm/IR/IntrinsicsPowerPC.td + - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp + - llvm/lib/Target/PowerPC/** + - llvm/test/Analysis/**/PowerPC/** + - llvm/test/CodeGen/PowerPC/** + - llvm/test/CodeGen/MIR/PowerPC/** + - llvm/test/DebugInfo/XCOFF/** + - llvm/test/DebugInfo/PowerPC/** + - llvm/test/LTO/PowerPC/** + - llvm/test/MC/Disassembler/PowerPC/** + - llvm/test/MC/PowerPC/** + - llvm/test/MC/XCOFF/** + - llvm/test/Transforms/**/PowerPC/** + - clang/include/clang/Basic/BuiltinsPPC.* + - clang/lib/Basic/Targets/PPC.* + - clang/lib/CodeGen/Targets/PPC.cpp + - clang/lib/Driver/ToolChains/PPC* + - clang/lib/Driver/ToolChains/AIX* + - clang/lib/Driver/ToolChains/Arch/PPC.* + - clang/test/CodeGen/PowerPC/** + - clang/include/clang/Sema/SemaPPC.h + - clang/lib/Sema/SemaPPC.cpp backend:SystemZ: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/BinaryFormat/ELFRelocs/SystemZ* - - llvm/include/llvm/BinaryFormat/GOFF.h - - llvm/include/llvm/IR/IntrinsicsSystemZ.td - - llvm/lib/Target/SystemZ/** - - llvm/test/Analysis/**/SystemZ/** - - llvm/test/CodeGen/SystemZ/** - - llvm/test/DebugInfo/SystemZ/** - - llvm/test/ExecutionEngine/**/SystemZ/** - - llvm/test/MC/Disassembler/SystemZ/** - - llvm/test/MC/GOFF/** - - llvm/test/MC/SystemZ/** - - llvm/test/Transforms/**/SystemZ/** - - clang/include/clang/Basic/BuiltinsSystemZ.* - - clang/lib/Basic/Targets/SystemZ.* - - clang/lib/CodeGen/Targets/SystemZ.cpp - - clang/lib/Driver/ToolChains/ZOS* - - clang/lib/Driver/ToolChains/Arch/SystemZ.* - - clang/test/CodeGen/SystemZ/** - - clang/include/clang/Sema/SemaSystemZ.h - - clang/lib/Sema/SemaSystemZ.cpp + - llvm/include/llvm/BinaryFormat/ELFRelocs/SystemZ* + - llvm/include/llvm/BinaryFormat/GOFF.h + - llvm/include/llvm/IR/IntrinsicsSystemZ.td + - llvm/lib/Target/SystemZ/** + - llvm/test/Analysis/**/SystemZ/** + - llvm/test/CodeGen/SystemZ/** + - llvm/test/DebugInfo/SystemZ/** + - llvm/test/ExecutionEngine/**/SystemZ/** + - llvm/test/MC/Disassembler/SystemZ/** + - llvm/test/MC/GOFF/** + - llvm/test/MC/SystemZ/** + - llvm/test/Transforms/**/SystemZ/** + - clang/include/clang/Basic/BuiltinsSystemZ.* + - clang/lib/Basic/Targets/SystemZ.* + - clang/lib/CodeGen/Targets/SystemZ.cpp + - clang/lib/Driver/ToolChains/ZOS* + - clang/lib/Driver/ToolChains/Arch/SystemZ.* + - clang/test/CodeGen/SystemZ/** + - clang/include/clang/Sema/SemaSystemZ.h + - clang/lib/Sema/SemaSystemZ.cpp third-party:unittests: - - changed-files: - - any-glob-to-any-file: - - third-party/unittests/** + - third-party/unittests/** third-party:benchmark: - - changed-files: - - any-glob-to-any-file: - - third-party/benchmark/** + - third-party/benchmark/** llvm:binary-utilities: - - changed-files: - - any-glob-to-any-file: - - llvm/docs/CommandGuide/llvm-* - - llvm/include/llvm/BinaryFormat/** - - llvm/include/llvm/DebugInfo/Symbolize/** - - llvm/include/llvm/ObjCopy/** - - llvm/include/llvm/Object/** - - llvm/lib/BinaryFormat/** - - llvm/lib/DebugInfo/Symbolize/** - - llvm/lib/ObjCopy/** - - llvm/lib/Object/** - - llvm/test/Object/** - - llvm/test/tools/llvm-ar/** - - llvm/test/tools/llvm-cxxfilt/** - - llvm/test/tools/llvm-nm/** - - llvm/test/tools/llvm-objcopy/** - - llvm/test/tools/llvm-objdump/** - - llvm/test/tools/llvm-readobj/** - - llvm/test/tools/llvm-size/** - - llvm/test/tools/llvm-strings/** - - llvm/test/tools/llvm-symbolizer/** - - llvm/tools/llvm-ar/** - - llvm/tools/llvm-cxxfilt/** - - llvm/tools/llvm-nm/** - - llvm/tools/llvm-objcopy/** - - llvm/tools/llvm-objdump/** - - llvm/tools/llvm-readobj/** - - llvm/tools/llvm-size/** - - llvm/tools/llvm-strings/** - - llvm/tools/llvm-symbolizer/** + - llvm/docs/CommandGuide/llvm-* + - llvm/include/llvm/BinaryFormat/** + - llvm/include/llvm/DebugInfo/Symbolize/** + - llvm/include/llvm/ObjCopy/** + - llvm/include/llvm/Object/** + - llvm/lib/BinaryFormat/** + - llvm/lib/DebugInfo/Symbolize/** + - llvm/lib/ObjCopy/** + - llvm/lib/Object/** + - llvm/test/Object/** + - llvm/test/tools/llvm-ar/** + - llvm/test/tools/llvm-cxxfilt/** + - llvm/test/tools/llvm-nm/** + - llvm/test/tools/llvm-objcopy/** + - llvm/test/tools/llvm-objdump/** + - llvm/test/tools/llvm-readobj/** + - llvm/test/tools/llvm-size/** + - llvm/test/tools/llvm-strings/** + - llvm/test/tools/llvm-symbolizer/** + - llvm/tools/llvm-ar/** + - llvm/tools/llvm-cxxfilt/** + - llvm/tools/llvm-nm/** + - llvm/tools/llvm-objcopy/** + - llvm/tools/llvm-objdump/** + - llvm/tools/llvm-readobj/** + - llvm/tools/llvm-size/** + - llvm/tools/llvm-strings/** + - llvm/tools/llvm-symbolizer/** clang:openmp: - - changed-files: - - any-glob-to-any-file: - - clang/include/clang/Basic/OpenMP* - - clang/include/clang/AST/OpenMPClause.h - - clang/include/clang/AST/DeclOpenMP.h - - clang/include/clang/AST/ExprOpenMP.h - - clang/include/clang/AST/StmtOpenMP.h - - clang/lib/AST/DeclOpenMP.cpp - - clang/lib/AST/OpenMPClause.cpp - - clang/lib/AST/StmtOpenMP.cpp - - clang/lib/Headers/openmp_wrappers/** - - clang/lib/Parse/ParseOpenMP.cpp - - clang/lib/Basic/OpenMPKinds.cpp - - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp - - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h - - clang/lib/CodeGen/CgStmtOpenMP.cpp - - clang/lib/CodeGen/CGOpenMP* - - clang/lib/Sema/SemaOpenMP.cpp - - clang/test/OpenMP/** - - clang/test/AST/ast-dump-openmp-* - - llvm/lib/Frontend/OpenMP/** - - llvm/lib/Transforms/IPO/OpenMPOpt.cpp - - llvm/include/llvm/Frontend/OpenMP/** - - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h - - llvm/unittests/Frontend/OpenMP* - - llvm/test/Transforms/OpenMP/** + - clang/include/clang/Basic/OpenMP* + - clang/include/clang/AST/OpenMPClause.h + - clang/include/clang/AST/DeclOpenMP.h + - clang/include/clang/AST/ExprOpenMP.h + - clang/include/clang/AST/StmtOpenMP.h + - clang/lib/AST/DeclOpenMP.cpp + - clang/lib/AST/OpenMPClause.cpp + - clang/lib/AST/StmtOpenMP.cpp + - clang/lib/Headers/openmp_wrappers/** + - clang/lib/Parse/ParseOpenMP.cpp + - clang/lib/Basic/OpenMPKinds.cpp + - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp + - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h + - clang/lib/CodeGen/CgStmtOpenMP.cpp + - clang/lib/CodeGen/CGOpenMP* + - clang/lib/Sema/SemaOpenMP.cpp + - clang/test/OpenMP/** + - clang/test/AST/ast-dump-openmp-* + - llvm/lib/Frontend/OpenMP/** + - llvm/lib/Transforms/IPO/OpenMPOpt.cpp + - llvm/include/llvm/Frontend/OpenMP/** + - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h + - llvm/unittests/Frontend/OpenMP* + - llvm/test/Transforms/OpenMP/** clang:temporal-safety: - - changed-files: - - any-glob-to-any-file: - - clang/include/clang/Analysis/Analyses/LifetimeSafety/** - - clang/lib/Analysis/LifetimeSafety/** - - clang/unittests/Analysis/LifetimeSafety* - - clang/test/Sema/*lifetime-safety* - - clang/test/Sema/*lifetime-analysis* - - clang/test/Analysis/LifetimeSafety/** + - clang/include/clang/Analysis/Analyses/LifetimeSafety/** + - clang/lib/Analysis/LifetimeSafety/** + - clang/unittests/Analysis/LifetimeSafety* + - clang/test/Sema/*lifetime-safety* + - clang/test/Sema/*lifetime-analysis* + - clang/test/Analysis/LifetimeSafety/** clang:as-a-library: - - changed-files: - - any-glob-to-any-file: - - clang/tools/libclang/** - - clang/bindings/** - - clang/include/clang-c/** - - clang/test/LibClang/** - - clang/unittest/libclang/** + - clang/tools/libclang/** + - clang/bindings/** + - clang/include/clang-c/** + - clang/test/LibClang/** + - clang/unittest/libclang/** openmp:libomp: - - changed-files: - - any-glob-to-any-file: - - 'openmp/**' + - any: ['openmp/**', '!openmp/libomptarget/**'] openmp:libomptarget: - - changed-files: - - all-globs-to-all-file: - - openmp/** - - '!openmp/runtime/**'' + - any: ['openmp/**', '!openmp/runtime/**'] bazel: - - changed-files: - - any-glob-to-any-file: - - utils/bazel/** + - utils/bazel/** offload: - - changed-files: - - any-glob-to-any-file: - - offload/** + - offload/** tablegen: - - changed-files: - - any-glob-to-any-file: - - llvm/include/TableGen/** - - llvm/lib/TableGen/** - - llvm/utils/TableGen/** + - llvm/include/TableGen/** + - llvm/lib/TableGen/** + - llvm/utils/TableGen/** infrastructure: - - changed-files: - - any-glob-to-any-file: - - .ci/** + - .ci/** diff --git a/.github/workflows/new-prs.yml b/.github/workflows/new-prs.yml index dc8cd100f3e68..e1f2e754c1a3d 100644 --- a/.github/workflows/new-prs.yml +++ b/.github/workflows/new-prs.yml @@ -67,7 +67,7 @@ jobs: github.event.pull_request.draft == false && github.event.pull_request.commits < 10 steps: - - uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1 + - uses: actions/labeler@ac9175f8a1f3625fd0d4fb234536d26811351594 # v4.3.0 with: configuration-path: .github/new-prs-labeler.yml # workaround for https://github.com/actions/labeler/issues/112 From f734cebc396bfb0a3523d205071764f689432ab4 Mon Sep 17 00:00:00 2001 From: ZhaoQi Date: Mon, 10 Nov 2025 11:43:37 +0800 Subject: [PATCH 07/11] [LoongArch] Initial implementation for `enableMemCmpExpansion` hook (#166526) After overriding `TargetTransformInfo::enableMemCmpExpansion` in this commit, `MergeICmps` and `ExpandMemCmp` passes will be enabled on LoongArch. --- .../LoongArchTargetTransformInfo.cpp | 23 +- .../LoongArch/LoongArchTargetTransformInfo.h | 3 +- .../CodeGen/LoongArch/expandmemcmp-optsize.ll | 2146 ++++++++--- llvm/test/CodeGen/LoongArch/expandmemcmp.ll | 3309 +++++++++++++---- llvm/test/CodeGen/LoongArch/memcmp.ll | 27 +- 5 files changed, 4255 insertions(+), 1253 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp index f548a8dd0532b..5107c8def3799 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp @@ -111,4 +111,25 @@ bool LoongArchTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const { } } -// TODO: Implement more hooks to provide TTI machinery for LoongArch. +LoongArchTTIImpl::TTI::MemCmpExpansionOptions +LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { + TTI::MemCmpExpansionOptions Options; + + if (!ST->hasUAL()) + return Options; + + Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); + Options.NumLoadsPerBlock = Options.MaxNumLoads; + Options.AllowOverlappingLoads = true; + + // TODO: Support for vectors. + if (ST->is64Bit()) { + Options.LoadSizes = {8, 4, 2, 1}; + Options.AllowedTailExpansions = {3, 5, 6}; + } else { + Options.LoadSizes = {4, 2, 1}; + Options.AllowedTailExpansions = {3}; + } + + return Options; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h index e3f16c7804994..9b479f9dc0dc5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h @@ -55,7 +55,8 @@ class LoongArchTTIImpl : public BasicTTIImplBase { bool shouldExpandReduction(const IntrinsicInst *II) const override; - // TODO: Implement more hooks to provide TTI machinery for LoongArch. + TTI::MemCmpExpansionOptions + enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override; }; } // end namespace llvm diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll index 4f7c8967c3049..36670fa801b36 100644 --- a/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll +++ b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll @@ -38,260 +38,488 @@ entry: } define signext i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: bcmp_size_1: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 1 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_1: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 1 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1) ret i32 %bcmp } define signext i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: bcmp_size_2: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 2 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_2: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 2 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2) ret i32 %bcmp } define signext i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: bcmp_size_3: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 3 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_3: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 3 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3) ret i32 %bcmp } define signext i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: bcmp_size_4: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_4: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) ret i32 %bcmp } define signext i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: bcmp_size_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 5 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 5 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5) ret i32 %bcmp } define signext i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: bcmp_size_6: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 6 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_6: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 6 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6) ret i32 %bcmp } define signext i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: bcmp_size_7: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 7 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_7: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 7 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7) ret i32 %bcmp } define signext i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: bcmp_size_8: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 8 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_8: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 8 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 8) ret i32 %bcmp } define signext i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: bcmp_size_15: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 15 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_15: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 15 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 15) ret i32 %bcmp } define signext i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: bcmp_size_16: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 16 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_16: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 16 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) ret i32 %bcmp @@ -308,16 +536,36 @@ define signext i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { ; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; -; LA64-LABEL: bcmp_size_31: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 31 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 31) ret i32 %bcmp @@ -334,16 +582,36 @@ define signext i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind optsize { ; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; -; LA64-LABEL: bcmp_size_32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 32 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 32) ret i32 %bcmp @@ -478,28 +746,44 @@ entry: } define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: bcmp_eq_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl bcmp -; LA32-NEXT: sltui $a0, $a0, 1 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_eq_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: sltui $a0, $a0, 1 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) %ret = icmp eq i32 %bcmp, 0 @@ -507,28 +791,38 @@ entry: } define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: bcmp_lt_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl bcmp -; LA32-NEXT: srli.w $a0, $a0, 31 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_lt_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: slti $a0, $a0, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) %ret = icmp slt i32 %bcmp, 0 @@ -536,28 +830,44 @@ entry: } define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: bcmp_gt_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl bcmp -; LA32-NEXT: slt $a0, $zero, $a0 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_gt_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: slt $a0, $zero, $a0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) %ret = icmp sgt i32 %bcmp, 0 @@ -575,260 +885,898 @@ entry: } define signext i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: memcmp_size_1: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 1 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_1: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 1 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 1) ret i32 %memcmp } define signext i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: memcmp_size_2: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 2 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: and $a1, $a1, $a2 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_2: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 2 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.h $a0, $a0, 0 +; LA64-UAL-NEXT: ld.h $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2h $a0, $a0 +; LA64-UAL-NEXT: revb.2h $a1, $a1 +; LA64-UAL-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-UAL-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 2) ret i32 %memcmp } define signext i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: memcmp_size_3: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 3 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: lu12i.w $a4, 15 +; LA32-UAL-NEXT: ori $a4, $a4, 3840 +; LA32-UAL-NEXT: and $a5, $a0, $a4 +; LA32-UAL-NEXT: or $a2, $a5, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: and $a2, $a1, $a4 +; LA32-UAL-NEXT: or $a2, $a2, $a3 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a2, $a1 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_3: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 3 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 16 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 16 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 3) ret i32 %memcmp } define signext i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: memcmp_size_4: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_4: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) ret i32 %memcmp } define signext i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: memcmp_size_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 5 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: srli.w $a4, $a2, 8 +; LA32-UAL-NEXT: lu12i.w $a5, 15 +; LA32-UAL-NEXT: ori $a5, $a5, 3840 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a2, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a6, $a2, $a5 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a2, $a2, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a6 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: srli.w $a4, $a3, 8 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a5, $a3, $a5 +; LA32-UAL-NEXT: slli.w $a5, $a5, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: bne $a2, $a3, .LBB26_2 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB26_2: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a2, $a3 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 5 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 5) ret i32 %memcmp } define signext i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: memcmp_size_6: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 6 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a6, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a7, $a3, $a6 +; LA32-UAL-NEXT: slli.w $a7, $a7, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a7 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a6, $a4, $a6 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB27_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a3, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a3, $a0, $a2 +; LA32-UAL-NEXT: and $a4, $a1, $a2 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB27_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB27_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_6: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 6 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 6) ret i32 %memcmp } define signext i32 @memcmp_size_7(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: memcmp_size_7: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 7 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB28_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB28_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB28_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_7: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 7 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a2, $a2 +; LA64-UAL-NEXT: addi.w $a4, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a3 +; LA64-UAL-NEXT: addi.w $a5, $a3, 0 +; LA64-UAL-NEXT: bne $a4, $a5, .LBB28_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: revb.2w $a2, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a3, 0 +; LA64-UAL-NEXT: bne $a0, $a1, .LBB28_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB28_3: # %res_block +; LA64-UAL-NEXT: addi.w $a0, $a3, 0 +; LA64-UAL-NEXT: addi.w $a1, $a2, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 7) ret i32 %memcmp } define signext i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: memcmp_size_8: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 8 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB29_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_8: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 8 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 8) ret i32 %memcmp } define signext i32 @memcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: memcmp_size_15: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 15 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB30_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_15: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 15 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB30_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB30_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB30_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 15) ret i32 %memcmp } define signext i32 @memcmp_size_16(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: memcmp_size_16: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 16 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB31_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_16: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 16 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB31_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB31_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB31_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) ret i32 %memcmp @@ -845,16 +1793,50 @@ define signext i32 @memcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { ; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; -; LA64-LABEL: memcmp_size_31: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 31 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB32_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 31) ret i32 %memcmp @@ -871,16 +1853,50 @@ define signext i32 @memcmp_size_32(ptr %s1, ptr %s2) nounwind optsize { ; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; -; LA64-LABEL: memcmp_size_32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 32 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB33_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 32) ret i32 %memcmp @@ -1017,16 +2033,16 @@ entry: define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize { ; LA32-UAL-LABEL: memcmp_eq_zero: ; LA32-UAL: # %bb.0: # %entry -; LA32-UAL-NEXT: ld.w $a1, $a1, 0 ; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 ; LA32-UAL-NEXT: xor $a0, $a0, $a1 ; LA32-UAL-NEXT: sltui $a0, $a0, 1 ; LA32-UAL-NEXT: ret ; ; LA64-UAL-LABEL: memcmp_eq_zero: ; LA64-UAL: # %bb.0: # %entry -; LA64-UAL-NEXT: ld.w $a1, $a1, 0 ; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 ; LA64-UAL-NEXT: xor $a0, $a0, $a1 ; LA64-UAL-NEXT: sltui $a0, $a0, 1 ; LA64-UAL-NEXT: ret @@ -1089,28 +2105,66 @@ entry: } define i1 @memcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: memcmp_lt_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl memcmp -; LA32-NEXT: srli.w $a0, $a0, 31 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a0, $a1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_lt_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: slti $a0, $a0, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) %ret = icmp slt i32 %memcmp, 0 @@ -1118,28 +2172,66 @@ entry: } define i1 @memcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize { -; LA32-LABEL: memcmp_gt_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl memcmp -; LA32-NEXT: slt $a0, $zero, $a0 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_gt_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: slt $a0, $zero, $a0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) %ret = icmp sgt i32 %memcmp, 0 diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll index 8b8f32e6877cc..c1bf850baa8c3 100644 --- a/llvm/test/CodeGen/LoongArch/expandmemcmp.ll +++ b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll @@ -38,312 +38,652 @@ entry: } define signext i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_size_1: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 1 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_1: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 1 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1) ret i32 %bcmp } define signext i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_size_2: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 2 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_2: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 2 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2) ret i32 %bcmp } define signext i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_size_3: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 3 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_3: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 3 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3) ret i32 %bcmp } define signext i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_size_4: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_4: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) ret i32 %bcmp } define signext i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_size_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 5 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 5 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5) ret i32 %bcmp } define signext i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_size_6: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 6 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_6: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 6 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6) ret i32 %bcmp } define signext i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_size_7: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 7 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_7: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 7 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7) ret i32 %bcmp } define signext i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_size_8: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 8 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_8: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 8 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 8) ret i32 %bcmp } define signext i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_size_15: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 15 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_15: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 15 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 15) ret i32 %bcmp } define signext i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_size_16: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 16 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_16: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 16 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) ret i32 %bcmp } define signext i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_size_31: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 31 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_31: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $t0, $a0, 12 +; LA32-UAL-NEXT: ld.w $t1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a5, $t0, $t1 +; LA32-UAL-NEXT: ld.w $a6, $a0, 16 +; LA32-UAL-NEXT: ld.w $a7, $a1, 16 +; LA32-UAL-NEXT: ld.w $t0, $a0, 20 +; LA32-UAL-NEXT: ld.w $t1, $a1, 20 +; LA32-UAL-NEXT: ld.w $t2, $a0, 24 +; LA32-UAL-NEXT: ld.w $t3, $a1, 24 +; LA32-UAL-NEXT: ld.w $a0, $a0, 27 +; LA32-UAL-NEXT: ld.w $a1, $a1, 27 +; LA32-UAL-NEXT: xor $a6, $a6, $a7 +; LA32-UAL-NEXT: xor $a7, $t0, $t1 +; LA32-UAL-NEXT: xor $t0, $t2, $t3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a2, $a4, $a5 +; LA32-UAL-NEXT: or $a3, $a6, $a7 +; LA32-UAL-NEXT: or $a0, $t0, $a0 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a0, $a3, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_31: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 31 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_31: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 31 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 31) ret i32 %bcmp } define signext i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_size_32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 32 -; LA32-NEXT: bl bcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_size_32: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $t0, $a0, 12 +; LA32-UAL-NEXT: ld.w $t1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a5, $t0, $t1 +; LA32-UAL-NEXT: ld.w $a6, $a0, 16 +; LA32-UAL-NEXT: ld.w $a7, $a1, 16 +; LA32-UAL-NEXT: ld.w $t0, $a0, 20 +; LA32-UAL-NEXT: ld.w $t1, $a1, 20 +; LA32-UAL-NEXT: ld.w $t2, $a0, 24 +; LA32-UAL-NEXT: ld.w $t3, $a1, 24 +; LA32-UAL-NEXT: ld.w $a0, $a0, 28 +; LA32-UAL-NEXT: ld.w $a1, $a1, 28 +; LA32-UAL-NEXT: xor $a6, $a6, $a7 +; LA32-UAL-NEXT: xor $a7, $t0, $t1 +; LA32-UAL-NEXT: xor $t0, $t2, $t3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a2, $a4, $a5 +; LA32-UAL-NEXT: or $a3, $a6, $a7 +; LA32-UAL-NEXT: or $a0, $t0, $a0 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a0, $a3, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_size_32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 32 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_32: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 32 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 32) ret i32 %bcmp @@ -360,16 +700,52 @@ define signext i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind { ; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; -; LA64-LABEL: bcmp_size_63: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 63 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_63: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $t0, $a0, 24 +; LA64-UAL-NEXT: ld.d $t1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a5, $t0, $t1 +; LA64-UAL-NEXT: ld.d $a6, $a0, 32 +; LA64-UAL-NEXT: ld.d $a7, $a1, 32 +; LA64-UAL-NEXT: ld.d $t0, $a0, 40 +; LA64-UAL-NEXT: ld.d $t1, $a1, 40 +; LA64-UAL-NEXT: ld.d $t2, $a0, 48 +; LA64-UAL-NEXT: ld.d $t3, $a1, 48 +; LA64-UAL-NEXT: ld.d $a0, $a0, 55 +; LA64-UAL-NEXT: ld.d $a1, $a1, 55 +; LA64-UAL-NEXT: xor $a6, $a6, $a7 +; LA64-UAL-NEXT: xor $a7, $t0, $t1 +; LA64-UAL-NEXT: xor $t0, $t2, $t3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a2, $a4, $a5 +; LA64-UAL-NEXT: or $a3, $a6, $a7 +; LA64-UAL-NEXT: or $a0, $t0, $a0 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: or $a0, $a3, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_63: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 63 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 63) ret i32 %bcmp @@ -386,16 +762,52 @@ define signext i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind { ; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; -; LA64-LABEL: bcmp_size_64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 64 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_size_64: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $t0, $a0, 24 +; LA64-UAL-NEXT: ld.d $t1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a5, $t0, $t1 +; LA64-UAL-NEXT: ld.d $a6, $a0, 32 +; LA64-UAL-NEXT: ld.d $a7, $a1, 32 +; LA64-UAL-NEXT: ld.d $t0, $a0, 40 +; LA64-UAL-NEXT: ld.d $t1, $a1, 40 +; LA64-UAL-NEXT: ld.d $t2, $a0, 48 +; LA64-UAL-NEXT: ld.d $t3, $a1, 48 +; LA64-UAL-NEXT: ld.d $a0, $a0, 56 +; LA64-UAL-NEXT: ld.d $a1, $a1, 56 +; LA64-UAL-NEXT: xor $a6, $a6, $a7 +; LA64-UAL-NEXT: xor $a7, $t0, $t1 +; LA64-UAL-NEXT: xor $t0, $t2, $t3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a2, $a4, $a5 +; LA64-UAL-NEXT: or $a3, $a6, $a7 +; LA64-UAL-NEXT: or $a0, $t0, $a0 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: or $a0, $a3, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_64: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 64 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 64) ret i32 %bcmp @@ -478,28 +890,60 @@ entry: } define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_eq_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 16 -; LA32-NEXT: bl bcmp -; LA32-NEXT: sltui $a0, $a0, 1 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_eq_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 16 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: sltui $a0, $a0, 1 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) %ret = icmp eq i32 %bcmp, 0 @@ -507,28 +951,38 @@ entry: } define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_lt_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl bcmp -; LA32-NEXT: srli.w $a0, $a0, 31 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_lt_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: slti $a0, $a0, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) %ret = icmp slt i32 %bcmp, 0 @@ -536,28 +990,44 @@ entry: } define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_gt_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl bcmp -; LA32-NEXT: slt $a0, $zero, $a0 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_gt_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: slt $a0, $zero, $a0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) %ret = icmp sgt i32 %bcmp, 0 @@ -565,28 +1035,46 @@ entry: } define i1 @bcmp_le_zero(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_le_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl bcmp -; LA32-NEXT: slti $a0, $a0, 1 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_le_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: slti $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_le_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: slti $a0, $a0, 1 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_le_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: slti $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_le_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: slti $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_le_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) %ret = icmp slt i32 %bcmp, 1 @@ -594,30 +1082,40 @@ entry: } define i1 @bcmp_ge_zero(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: bcmp_ge_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl bcmp -; LA32-NEXT: addi.w $a1, $zero, -1 -; LA32-NEXT: slt $a0, $a1, $a0 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: bcmp_ge_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ori $a0, $zero, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: bcmp_ge_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: addi.w $a1, $zero, -1 -; LA64-NEXT: slt $a0, $a1, $a0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: bcmp_ge_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ori $a0, $zero, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_ge_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA32-NUAL-NEXT: slt $a0, $a1, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_ge_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA64-NUAL-NEXT: slt $a0, $a1, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) %ret = icmp sgt i32 %bcmp, -1 @@ -635,312 +1133,1394 @@ entry: } define signext i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_size_1: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 1 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_1: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 1 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 1) ret i32 %memcmp } define signext i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_size_2: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 2 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: and $a1, $a1, $a2 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_2: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 2 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.h $a0, $a0, 0 +; LA64-UAL-NEXT: ld.h $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2h $a0, $a0 +; LA64-UAL-NEXT: revb.2h $a1, $a1 +; LA64-UAL-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-UAL-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 2) ret i32 %memcmp } define signext i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_size_3: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 3 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: lu12i.w $a4, 15 +; LA32-UAL-NEXT: ori $a4, $a4, 3840 +; LA32-UAL-NEXT: and $a5, $a0, $a4 +; LA32-UAL-NEXT: or $a2, $a5, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: and $a2, $a1, $a4 +; LA32-UAL-NEXT: or $a2, $a2, $a3 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a2, $a1 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_3: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 3 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 16 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 16 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 3) ret i32 %memcmp } define signext i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_size_4: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_4: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) ret i32 %memcmp } define signext i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_size_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 5 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: srli.w $a4, $a2, 8 +; LA32-UAL-NEXT: lu12i.w $a5, 15 +; LA32-UAL-NEXT: ori $a5, $a5, 3840 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a2, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a6, $a2, $a5 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a2, $a2, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a6 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: srli.w $a4, $a3, 8 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a5, $a3, $a5 +; LA32-UAL-NEXT: slli.w $a5, $a5, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: bne $a2, $a3, .LBB28_2 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB28_2: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a2, $a3 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 5 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 5) ret i32 %memcmp } define signext i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_size_6: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 6 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a6, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a7, $a3, $a6 +; LA32-UAL-NEXT: slli.w $a7, $a7, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a7 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a6, $a4, $a6 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a3, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a3, $a0, $a2 +; LA32-UAL-NEXT: and $a4, $a1, $a2 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB29_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_6: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 6 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 6) ret i32 %memcmp } define signext i32 @memcmp_size_7(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_size_7: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 7 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB30_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_7: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 7 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a2, $a2 +; LA64-UAL-NEXT: addi.w $a4, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a3 +; LA64-UAL-NEXT: addi.w $a5, $a3, 0 +; LA64-UAL-NEXT: bne $a4, $a5, .LBB30_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: revb.2w $a2, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a3, 0 +; LA64-UAL-NEXT: bne $a0, $a1, .LBB30_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB30_3: # %res_block +; LA64-UAL-NEXT: addi.w $a0, $a3, 0 +; LA64-UAL-NEXT: addi.w $a1, $a2, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 7) ret i32 %memcmp } define signext i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_size_8: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 8 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB31_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_8: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 8 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 8) ret i32 %memcmp } define signext i32 @memcmp_size_15(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_size_15: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 15 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB32_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_15: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 15 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB32_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 15) ret i32 %memcmp } define signext i32 @memcmp_size_16(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_size_16: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 16 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB33_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_16: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 16 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB33_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) ret i32 %memcmp } define signext i32 @memcmp_size_31(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_size_31: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 31 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_31: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a3, $a0, 12 +; LA32-UAL-NEXT: ld.w $a4, $a1, 12 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.4: # %loadbb4 +; LA32-UAL-NEXT: ld.w $a3, $a0, 16 +; LA32-UAL-NEXT: ld.w $a4, $a1, 16 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.5: # %loadbb5 +; LA32-UAL-NEXT: ld.w $a3, $a0, 20 +; LA32-UAL-NEXT: ld.w $a4, $a1, 20 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.6: # %loadbb6 +; LA32-UAL-NEXT: ld.w $a3, $a0, 24 +; LA32-UAL-NEXT: ld.w $a4, $a1, 24 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.7: # %loadbb7 +; LA32-UAL-NEXT: ld.w $a0, $a0, 27 +; LA32-UAL-NEXT: ld.w $a1, $a1, 27 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.8: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB34_9: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_31: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 31 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB34_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_31: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 31 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 31) ret i32 %memcmp } define signext i32 @memcmp_size_32(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_size_32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 32 -; LA32-NEXT: bl memcmp -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_size_32: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a3, $a0, 12 +; LA32-UAL-NEXT: ld.w $a4, $a1, 12 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.4: # %loadbb4 +; LA32-UAL-NEXT: ld.w $a3, $a0, 16 +; LA32-UAL-NEXT: ld.w $a4, $a1, 16 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.5: # %loadbb5 +; LA32-UAL-NEXT: ld.w $a3, $a0, 20 +; LA32-UAL-NEXT: ld.w $a4, $a1, 20 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.6: # %loadbb6 +; LA32-UAL-NEXT: ld.w $a3, $a0, 24 +; LA32-UAL-NEXT: ld.w $a4, $a1, 24 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.7: # %loadbb7 +; LA32-UAL-NEXT: ld.w $a0, $a0, 28 +; LA32-UAL-NEXT: ld.w $a1, $a1, 28 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.8: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB35_9: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_size_32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 32 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB35_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_32: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 32 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 32) ret i32 %memcmp @@ -957,16 +2537,74 @@ define signext i32 @memcmp_size_63(ptr %s1, ptr %s2) nounwind { ; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; -; LA64-LABEL: memcmp_size_63: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 63 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_63: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a2, $a0, 24 +; LA64-UAL-NEXT: ld.d $a3, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.4: # %loadbb4 +; LA64-UAL-NEXT: ld.d $a2, $a0, 32 +; LA64-UAL-NEXT: ld.d $a3, $a1, 32 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.5: # %loadbb5 +; LA64-UAL-NEXT: ld.d $a2, $a0, 40 +; LA64-UAL-NEXT: ld.d $a3, $a1, 40 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.6: # %loadbb6 +; LA64-UAL-NEXT: ld.d $a2, $a0, 48 +; LA64-UAL-NEXT: ld.d $a3, $a1, 48 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.7: # %loadbb7 +; LA64-UAL-NEXT: ld.d $a0, $a0, 55 +; LA64-UAL-NEXT: ld.d $a1, $a1, 55 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.8: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB36_9: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_63: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 63 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 63) ret i32 %memcmp @@ -983,16 +2621,74 @@ define signext i32 @memcmp_size_64(ptr %s1, ptr %s2) nounwind { ; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; -; LA64-LABEL: memcmp_size_64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 64 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_size_64: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a2, $a0, 24 +; LA64-UAL-NEXT: ld.d $a3, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.4: # %loadbb4 +; LA64-UAL-NEXT: ld.d $a2, $a0, 32 +; LA64-UAL-NEXT: ld.d $a3, $a1, 32 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.5: # %loadbb5 +; LA64-UAL-NEXT: ld.d $a2, $a0, 40 +; LA64-UAL-NEXT: ld.d $a3, $a1, 40 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.6: # %loadbb6 +; LA64-UAL-NEXT: ld.d $a2, $a0, 48 +; LA64-UAL-NEXT: ld.d $a3, $a1, 48 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.7: # %loadbb7 +; LA64-UAL-NEXT: ld.d $a0, $a0, 56 +; LA64-UAL-NEXT: ld.d $a1, $a1, 56 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.8: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB37_9: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_64: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 64 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 64) ret i32 %memcmp @@ -1075,28 +2771,60 @@ entry: } define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_eq_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 16 -; LA32-NEXT: bl memcmp -; LA32-NEXT: sltui $a0, $a0, 1 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_eq_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 16 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: sltui $a0, $a0, 1 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) %ret = icmp eq i32 %memcmp, 0 @@ -1104,28 +2832,66 @@ entry: } define i1 @memcmp_lt_zero(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_lt_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl memcmp -; LA32-NEXT: srli.w $a0, $a0, 31 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a0, $a1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_lt_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: slti $a0, $a0, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) %ret = icmp slt i32 %memcmp, 0 @@ -1133,28 +2899,66 @@ entry: } define i1 @memcmp_gt_zero(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_gt_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl memcmp -; LA32-NEXT: slt $a0, $zero, $a0 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_gt_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: slt $a0, $zero, $a0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) %ret = icmp sgt i32 %memcmp, 0 @@ -1162,28 +2966,68 @@ entry: } define i1 @memcmp_le_zero(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_le_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl memcmp -; LA32-NEXT: slti $a0, $a0, 1 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_le_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: xori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_le_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: slti $a0, $a0, 1 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_le_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: xori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_le_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: slti $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_le_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) %ret = icmp slt i32 %memcmp, 1 @@ -1191,37 +3035,72 @@ entry: } define i1 @memcmp_ge_zero(ptr %s1, ptr %s2) nounwind { -; LA32-LABEL: memcmp_ge_zero: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: ori $a2, $zero, 4 -; LA32-NEXT: bl memcmp -; LA32-NEXT: addi.w $a1, $zero, -1 -; LA32-NEXT: slt $a0, $a1, $a0 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-UAL-LABEL: memcmp_ge_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a0, $a1 +; LA32-UAL-NEXT: xori $a0, $a0, 1 +; LA32-UAL-NEXT: ret ; -; LA64-LABEL: memcmp_ge_zero: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: ori $a2, $zero, 4 -; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: addi.w $a1, $zero, -1 -; LA64-NEXT: slt $a0, $a1, $a0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-UAL-LABEL: memcmp_ge_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a0, $a1 +; LA64-UAL-NEXT: xori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_ge_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA32-NUAL-NEXT: slt $a0, $a1, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_ge_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA64-NUAL-NEXT: slt $a0, $a1, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret entry: %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) %ret = icmp sgt i32 %memcmp, -1 ret i1 %ret } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; LA32-NUAL: {{.*}} -; LA32-UAL: {{.*}} -; LA64-NUAL: {{.*}} -; LA64-UAL: {{.*}} diff --git a/llvm/test/CodeGen/LoongArch/memcmp.ll b/llvm/test/CodeGen/LoongArch/memcmp.ll index c4aaf9a75a852..c3811c0357793 100644 --- a/llvm/test/CodeGen/LoongArch/memcmp.ll +++ b/llvm/test/CodeGen/LoongArch/memcmp.ll @@ -7,15 +7,24 @@ define signext i32 @test1(ptr %buffer1, ptr %buffer2) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -8 -; CHECK-NEXT: ori $a2, $zero, 16 -; CHECK-NEXT: pcaddu18i $ra, %call36(memcmp) -; CHECK-NEXT: jirl $ra, $ra, 0 -; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ld.d $a2, $a0, 0 +; CHECK-NEXT: ld.d $a3, $a1, 0 +; CHECK-NEXT: revb.d $a2, $a2 +; CHECK-NEXT: revb.d $a3, $a3 +; CHECK-NEXT: bne $a2, $a3, .LBB0_3 +; CHECK-NEXT: # %bb.1: # %loadbb1 +; CHECK-NEXT: ld.d $a0, $a0, 8 +; CHECK-NEXT: ld.d $a1, $a1, 8 +; CHECK-NEXT: revb.d $a2, $a0 +; CHECK-NEXT: revb.d $a3, $a1 +; CHECK-NEXT: bne $a2, $a3, .LBB0_3 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_3: # %res_block +; CHECK-NEXT: sltu $a0, $a2, $a3 +; CHECK-NEXT: sub.d $a0, $zero, $a0 +; CHECK-NEXT: ori $a0, $a0, 1 ; CHECK-NEXT: ret entry: %call = call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 16) From b67796f3fca1345b5ba383282cf3299e992049bd Mon Sep 17 00:00:00 2001 From: Brandon Wu Date: Mon, 10 Nov 2025 12:06:17 +0800 Subject: [PATCH 08/11] [llvm][RISCV] Support Zvfbfa codegen for fneg, fabs and copysign (#166944) This is first patch for Zvfbfa codegen and I'm going to break it down to several patches to make it easier to reivew. The codegen supports both scalable vector and fixed length vector on both native operations and vp intrinsics. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 78 +- llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td | 83 ++- .../rvv/fixed-vectors-vcopysign-sdnode.ll | 56 ++ .../RISCV/rvv/fixed-vectors-vcopysign-vp.ll | 180 ++++- .../RISCV/rvv/fixed-vectors-vfabs-sdnode.ll | 66 ++ .../RISCV/rvv/fixed-vectors-vfabs-vp.ll | 292 +++++++- .../RISCV/rvv/fixed-vectors-vfneg-sdnode.ll | 66 ++ .../RISCV/rvv/fixed-vectors-vfneg-vp.ll | 268 ++++++- llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll | 492 +++++++++++- llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll | 222 ++++-- llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll | 428 ++++++++++- .../CodeGen/RISCV/rvv/vfcopysign-sdnode.ll | 702 ++++++++++++++++-- llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll | 198 ++++- llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll | 392 +++++++++- 14 files changed, 3344 insertions(+), 179 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1977d3372c5f6..a3ccbd8d4a8aa 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -87,6 +87,11 @@ static cl::opt "be combined with a shift"), cl::init(true)); +// TODO: Support more ops +static const unsigned ZvfbfaVPOps[] = {ISD::VP_FNEG, ISD::VP_FABS, + ISD::VP_FCOPYSIGN}; +static const unsigned ZvfbfaOps[] = {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN}; + RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -1208,6 +1213,61 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } }; + // Sets common actions for zvfbfa, some of instructions are supported + // natively so that we don't need to promote them. + const auto SetZvfbfaActions = [&](MVT VT) { + setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); + setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, + Custom); + setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); + setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom); + setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom); + setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, + Custom); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom); + setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS, + ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, + ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE, + ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE, + ISD::VECTOR_COMPRESS}, + VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); + + setOperationAction(ISD::FCOPYSIGN, VT, Legal); + setOperationAction(ZvfbfaVPOps, VT, Custom); + + MVT EltVT = VT.getVectorElementType(); + if (isTypeLegal(EltVT)) + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT, + ISD::EXTRACT_VECTOR_ELT}, + VT, Custom); + else + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, + EltVT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, + ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD, + ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, + ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, + ISD::VP_SCATTER}, + VT, Custom); + setOperationAction(ISD::VP_LOAD_FF, VT, Custom); + + // Expand FP operations that need libcalls. + setOperationAction(FloatingPointLibCallOps, VT, Expand); + + // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal. + if (getLMUL(VT) == RISCVVType::LMUL_8) { + setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom); + setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom); + } else { + MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); + setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT); + setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT); + } + }; + if (Subtarget.hasVInstructionsF16()) { for (MVT VT : F16VecVTs) { if (!isTypeLegal(VT)) @@ -1222,7 +1282,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } } - if (Subtarget.hasVInstructionsBF16Minimal()) { + if (Subtarget.hasVInstructionsBF16()) { + for (MVT VT : BF16VecVTs) { + if (!isTypeLegal(VT)) + continue; + SetZvfbfaActions(VT); + } + } else if (Subtarget.hasVInstructionsBF16Minimal()) { for (MVT VT : BF16VecVTs) { if (!isTypeLegal(VT)) continue; @@ -1501,6 +1567,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // available. setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom); } + if (Subtarget.hasStdExtZvfbfa()) { + setOperationAction(ZvfbfaOps, VT, Custom); + setOperationAction(ZvfbfaVPOps, VT, Custom); + } setOperationAction( {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, Custom); @@ -7245,7 +7315,11 @@ static bool isPromotedOpNeedingSplit(SDValue Op, return (Op.getValueType() == MVT::nxv32f16 && (Subtarget.hasVInstructionsF16Minimal() && !Subtarget.hasVInstructionsF16())) || - Op.getValueType() == MVT::nxv32bf16; + (Op.getValueType() == MVT::nxv32bf16 && + Subtarget.hasVInstructionsBF16Minimal() && + (!Subtarget.hasVInstructionsBF16() || + (!llvm::is_contained(ZvfbfaOps, Op.getOpcode()) && + !llvm::is_contained(ZvfbfaVPOps, Op.getOpcode())))); } static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td index b9c5b75983b1f..ffb2ac0756da4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td @@ -701,5 +701,86 @@ let Predicates = [HasStdExtZvfbfa] in { FRM_DYN, fvti.AVL, fvti.Log2SEW, TA_MA)>; } -} + + foreach vti = AllBF16Vectors in { + // 13.12. Vector Floating-Point Sign-Injection Instructions + def : Pat<(fabs (vti.Vector vti.RegClass:$rs)), + (!cast("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>; + // Handle fneg with VFSGNJN using the same input for both operands. + def : Pat<(fneg (vti.Vector vti.RegClass:$rs)), + (!cast("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>; + + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2))), + (!cast("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))), + (!cast("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; + + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector (fneg vti.RegClass:$rs2)))), + (!cast("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))), + (!cast("PseudoVFSGNJN_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; + + // 13.12. Vector Floating-Point Sign-Injection Instructions + def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, + vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TA_MA)>; + // Handle fneg with VFSGNJN using the same input for both operands. + def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, + vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TA_MA)>; + + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2), + vti.RegClass:$passthru, + (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") + vti.RegClass:$passthru, vti.RegClass:$rs1, + vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TAIL_AGNOSTIC)>; + + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (riscv_fneg_vl vti.RegClass:$rs2, + (vti.Mask true_mask), + VLOpFrag), + srcvalue, + (vti.Mask true_mask), + VLOpFrag), + (!cast("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, + vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>; + + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (SplatFPOp vti.ScalarRegClass:$rs2), + vti.RegClass:$passthru, + (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") + vti.RegClass:$passthru, vti.RegClass:$rs1, + vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TAIL_AGNOSTIC)>; + } + } } // Predicates = [HasStdExtZvfbfa] diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll new file mode 100644 index 0000000000000..9cfed6a659c64 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define <2 x bfloat> @copysign_v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs) { +; CHECK-LABEL: copysign_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs) + ret <2 x bfloat> %r +} + +define <4 x bfloat> @copysign_v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs) { +; CHECK-LABEL: copysign_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs) + ret <4 x bfloat> %r +} + +define <8 x bfloat> @copysign_v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs) { +; CHECK-LABEL: copysign_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs) + ret <8 x bfloat> %r +} + +define <16 x bfloat> @copysign_v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs) { +; CHECK-LABEL: copysign_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v10 +; CHECK-NEXT: ret + %r = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs) + ret <16 x bfloat> %r +} + +define <32 x bfloat> @copysign_v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs) { +; CHECK-LABEL: copysign_v32bf32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v12 +; CHECK-NEXT: ret + %r = call <32 x bfloat> @llvm.copysign.v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs) + ret <32 x bfloat> %r +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll index a2178e1c571da..2455d872ae7f0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll @@ -1,8 +1,172 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFH %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFH %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s + +define <2 x bfloat> @vfsgnj_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 %evl) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfsgnj_vv_v2bf16_unmasked(<2 x bfloat> %va, <2 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> splat (i1 true), i32 %evl) + ret <2 x bfloat> %v +} + +define <4 x bfloat> @vfsgnj_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 %evl) + ret <4 x bfloat> %v +} + +define <4 x bfloat> @vfsgnj_vv_v4bf16_unmasked(<4 x bfloat> %va, <4 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> splat (i1 true), i32 %evl) + ret <4 x bfloat> %v +} + +define <8 x bfloat> @vfsgnj_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 %evl) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @vfsgnj_vv_v8bf16_unmasked(<8 x bfloat> %va, <8 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> splat (i1 true), i32 %evl) + ret <8 x bfloat> %v +} + +define <16 x bfloat> @vfsgnj_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v10, v0.t +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10, v0.t +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.copysign.v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 %evl) + ret <16 x bfloat> %v +} + +define <16 x bfloat> @vfsgnj_vv_v16bf16_unmasked(<16 x bfloat> %va, <16 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.copysign.v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> splat (i1 true), i32 %evl) + ret <16 x bfloat> %v +} declare <2 x half> @llvm.vp.copysign.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) @@ -311,10 +475,10 @@ define <32 x double> @vfsgnj_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: bltu a2, a1, .LBB26_2 +; CHECK-NEXT: bltu a2, a1, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: .LBB34_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 @@ -346,10 +510,10 @@ define <32 x double> @vfsgnj_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: vle64.v v0, (a0) ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: bltu a2, a1, .LBB27_2 +; CHECK-NEXT: bltu a2, a1, .LBB35_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: .LBB35_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v0 ; CHECK-NEXT: addi a0, a2, -16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll new file mode 100644 index 0000000000000..27c00de3c3487 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define <1 x bfloat> @v1bf16(<1 x bfloat> %v) { +; CHECK-LABEL: v1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <1 x bfloat> @llvm.fabs.v1bf16(<1 x bfloat> %v) + ret <1 x bfloat> %r +} + +define <2 x bfloat> @v2bf16(<2 x bfloat> %v) { +; CHECK-LABEL: v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> %v) + ret <2 x bfloat> %r +} + +define <4 x bfloat> @v4bf16(<4 x bfloat> %v) { +; CHECK-LABEL: v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> %v) + ret <4 x bfloat> %r +} + +define <8 x bfloat> @v8bf16(<8 x bfloat> %v) { +; CHECK-LABEL: v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> %v) + ret <8 x bfloat> %r +} + +define <16 x bfloat> @v16bf16(<16 x bfloat> %v) { +; CHECK-LABEL: v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <16 x bfloat> @llvm.fabs.v16bf16(<16 x bfloat> %v) + ret <16 x bfloat> %r +} + +define <32 x bfloat> @v32bf16(<32 x bfloat> %v) { +; CHECK-LABEL: v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <32 x bfloat> @llvm.fabs.v32bf16(<32 x bfloat> %v) + ret <32 x bfloat> %r +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll index 08f486b601328..01bd706ed31f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll @@ -1,12 +1,224 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define <2 x bfloat> @vfabs_vv_v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fabs.v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 %evl) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfabs_vv_v2bf16_unmasked(<2 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fabs.v2bf16(<2 x bfloat> %va, <2 x i1> splat (i1 true), i32 %evl) + ret <2 x bfloat> %v +} + +define <4 x bfloat> @vfabs_vv_v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.fabs.v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 %evl) + ret <4 x bfloat> %v +} + +define <4 x bfloat> @vfabs_vv_v4bf16_unmasked(<4 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.fabs.v4bf16(<4 x bfloat> %va, <4 x i1> splat (i1 true), i32 %evl) + ret <4 x bfloat> %v +} + +define <8 x bfloat> @vfabs_vv_v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.fabs.v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 %evl) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @vfabs_vv_v8bf16_unmasked(<8 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.fabs.v8bf16(<8 x bfloat> %va, <8 x i1> splat (i1 true), i32 %evl) + ret <8 x bfloat> %v +} + +define <16 x bfloat> @vfabs_vv_v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.fabs.v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 %evl) + ret <16 x bfloat> %v +} + +define <16 x bfloat> @vfabs_vv_v16bf16_unmasked(<16 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.fabs.v16bf16(<16 x bfloat> %va, <16 x i1> splat (i1 true), i32 %evl) + ret <16 x bfloat> %v +} declare <2 x half> @llvm.vp.fabs.v2f16(<2 x half>, <2 x i1>, i32) @@ -24,6 +236,14 @@ define <2 x half> @vfabs_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -42,6 +262,14 @@ define <2 x half> @vfabs_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -62,6 +290,14 @@ define <4 x half> @vfabs_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -80,6 +316,14 @@ define <4 x half> @vfabs_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -100,6 +344,14 @@ define <8 x half> @vfabs_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -118,6 +370,14 @@ define <8 x half> @vfabs_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -138,6 +398,14 @@ define <16 x half> @vfabs_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -156,6 +424,14 @@ define <16 x half> @vfabs_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -367,10 +643,10 @@ define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 +; CHECK-NEXT: bltu a0, a2, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: .LBB34_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 @@ -390,10 +666,10 @@ define <32 x double> @vfabs_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %e ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 +; CHECK-NEXT: bltu a0, a2, .LBB35_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: .LBB35_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: addi a1, a0, -16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll new file mode 100644 index 0000000000000..b3b9a62600f46 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define <1 x bfloat> @v1bf16(<1 x bfloat> %va) { +; CHECK-LABEL: v1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <1 x bfloat> %va + ret <1 x bfloat> %vb +} + +define <2 x bfloat> @v2bf16(<2 x bfloat> %va) { +; CHECK-LABEL: v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <2 x bfloat> %va + ret <2 x bfloat> %vb +} + +define <4 x bfloat> @v4bf16(<4 x bfloat> %va) { +; CHECK-LABEL: v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <4 x bfloat> %va + ret <4 x bfloat> %vb +} + +define <8 x bfloat> @v8bf16(<8 x bfloat> %va) { +; CHECK-LABEL: v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <8 x bfloat> %va + ret <8 x bfloat> %vb +} + +define <16 x bfloat> @v16bf16(<16 x bfloat> %va) { +; CHECK-LABEL: v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <16 x bfloat> %va + ret <16 x bfloat> %vb +} + +define <32 x bfloat> @v32bf16(<32 x bfloat> %va) { +; CHECK-LABEL: v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <32 x bfloat> %va + ret <32 x bfloat> %vb +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll index 968fd9f9bab80..dede0e707d929 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll @@ -1,12 +1,208 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define <2 x bfloat> @vfneg_vv_v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 %evl) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfneg_vv_v2bf16_unmasked(<2 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> %va, <2 x i1> splat (i1 true), i32 %evl) + ret <2 x bfloat> %v +} + +define <4 x bfloat> @vfneg_vv_v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 %evl) + ret <4 x bfloat> %v +} + +define <4 x bfloat> @vfneg_vv_v4bf16_unmasked(<4 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> %va, <4 x i1> splat (i1 true), i32 %evl) + ret <4 x bfloat> %v +} + +define <8 x bfloat> @vfneg_vv_v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 %evl) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @vfneg_vv_v8bf16_unmasked(<8 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> %va, <8 x i1> splat (i1 true), i32 %evl) + ret <8 x bfloat> %v +} + +define <16 x bfloat> @vfneg_vv_v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 %evl) + ret <16 x bfloat> %v +} + +define <16 x bfloat> @vfneg_vv_v16bf16_unmasked(<16 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> %va, <16 x i1> splat (i1 true), i32 %evl) + ret <16 x bfloat> %v +} declare <2 x half> @llvm.vp.fneg.v2f16(<2 x half>, <2 x i1>, i32) @@ -23,6 +219,13 @@ define <2 x half> @vfneg_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -40,6 +243,13 @@ define <2 x half> @vfneg_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -59,6 +269,13 @@ define <4 x half> @vfneg_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -76,6 +293,13 @@ define <4 x half> @vfneg_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -95,6 +319,13 @@ define <8 x half> @vfneg_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -112,6 +343,13 @@ define <8 x half> @vfneg_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -131,6 +369,13 @@ define <16 x half> @vfneg_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -148,6 +393,13 @@ define <16 x half> @vfneg_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -359,10 +611,10 @@ define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 +; CHECK-NEXT: bltu a0, a2, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: .LBB34_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 @@ -382,10 +634,10 @@ define <32 x double> @vfneg_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %e ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 +; CHECK-NEXT: bltu a0, a2, .LBB35_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: .LBB35_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: addi a1, a0, -16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll index ccf82b93d6b75..2f5fde3bb3b20 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll @@ -1,12 +1,376 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define @vfsgnj_vv_nxv1bf16( %va, %vb, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv1bf16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv1bf16_unmasked( %va, %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv1bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv1bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv1bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv1bf16( %va, %vb, splat (i1 true), i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv2bf16( %va, %vb, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv2bf16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv2bf16_unmasked( %va, %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv2bf16( %va, %vb, splat (i1 true), i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv4bf16( %va, %vb, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv4bf16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv4bf16_unmasked( %va, %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv4bf16( %va, %vb, splat (i1 true), i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv8bf16( %va, %vb, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v10, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v10, v10, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv8bf16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv8bf16_unmasked( %va, %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v10, v10, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv8bf16( %va, %vb, splat (i1 true), i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv16bf16( %va, %vb, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v12, v12, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v12, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v12, v12, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv16bf16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv16bf16_unmasked( %va, %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v12, v12, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v12, v12, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv16bf16( %va, %vb, splat (i1 true), i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv32bf16( %va, %vb, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v16, v16, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v16, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v16, v16, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v16, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv32bf16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vfsgnj_vv_nxv32bf16_unmasked( %va, %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv32bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v16, v16, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv32bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v16, v16, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv32bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.copysign.nxv32bf16( %va, %vb, splat (i1 true), i32 %evl) + ret %v +} declare @llvm.vp.copysign.nxv1f16(, , , i32) @@ -26,6 +390,16 @@ define @vfsgnj_vv_nxv1f16( %va, @llvm.vp.copysign.nxv1f16( %va, %vb, %m, i32 %evl) ret %v } @@ -46,6 +420,16 @@ define @vfsgnj_vv_nxv1f16_unmasked( %va, ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv1f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.copysign.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -68,6 +452,16 @@ define @vfsgnj_vv_nxv2f16( %va, @llvm.vp.copysign.nxv2f16( %va, %vb, %m, i32 %evl) ret %v } @@ -88,6 +482,16 @@ define @vfsgnj_vv_nxv2f16_unmasked( %va, ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.copysign.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -110,6 +514,16 @@ define @vfsgnj_vv_nxv4f16( %va, @llvm.vp.copysign.nxv4f16( %va, %vb, %m, i32 %evl) ret %v } @@ -130,6 +544,16 @@ define @vfsgnj_vv_nxv4f16_unmasked( %va, ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.copysign.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -152,6 +576,16 @@ define @vfsgnj_vv_nxv8f16( %va, @llvm.vp.copysign.nxv8f16( %va, %vb, %m, i32 %evl) ret %v } @@ -172,6 +606,16 @@ define @vfsgnj_vv_nxv8f16_unmasked( %va, ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v10, v10, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.copysign.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -194,6 +638,16 @@ define @vfsgnj_vv_nxv16f16( %va, @llvm.vp.copysign.nxv16f16( %va, %vb, %m, i32 %evl) ret %v } @@ -214,6 +668,16 @@ define @vfsgnj_vv_nxv16f16_unmasked( %v ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v12 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFA-NEXT: vand.vx v12, v12, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.copysign.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -236,6 +700,16 @@ define @vfsgnj_vv_nxv32f16( %va, @llvm.vp.copysign.nxv32f16( %va, %vb, %m, i32 %evl) ret %v } @@ -256,6 +730,16 @@ define @vfsgnj_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v16 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv32f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFBFA-NEXT: vand.vx v16, v16, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.copysign.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll index 1d8638844af7f..28426ad018b83 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll @@ -11,75 +11,165 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA define @nxv1bf16( %v) { -; CHECK-LABEL: nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv1bf16( %v) ret %r } define @nxv2bf16( %v) { -; CHECK-LABEL: nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv2bf16( %v) ret %r } define @nxv4bf16( %v) { -; CHECK-LABEL: nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv4bf16( %v) ret %r } define @nxv8bf16( %v) { -; CHECK-LABEL: nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv8bf16( %v) ret %r } define @nxv16bf16( %v) { -; CHECK-LABEL: nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv16bf16( %v) ret %r } define @nxv32bf16( %v) { -; CHECK-LABEL: nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv32bf16( %v) ret %r } @@ -100,6 +190,14 @@ define @vfabs_nxv1f16( %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv1f16( %v) ret %r } @@ -120,6 +218,14 @@ define @vfabs_nxv2f16( %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv2f16( %v) ret %r } @@ -140,6 +246,14 @@ define @vfabs_nxv4f16( %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv4f16( %v) ret %r } @@ -160,6 +274,14 @@ define @vfabs_nxv8f16( %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv8f16( %v) ret %r } @@ -180,6 +302,14 @@ define @vfabs_nxv16f16( %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv16f16( %v) ret %r } @@ -200,6 +330,14 @@ define @vfabs_nxv32f16( %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call @llvm.fabs.nxv32f16( %v) ret %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll index 8f9f9c4256c8f..c6888c0bcae0f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll @@ -1,12 +1,328 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v,+experimental-zvfbfa -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v,+experimental-zvfbfa -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define @vfabs_vv_nxv1bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv1bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfabs_vv_nxv1bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv1bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv1bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv1bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv1bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfabs_vv_nxv2bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv2bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfabs_vv_nxv2bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv2bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfabs_vv_nxv4bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv4bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfabs_vv_nxv4bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv4bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfabs_vv_nxv8bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv8bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfabs_vv_nxv8bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv8bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfabs_vv_nxv16bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv16bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfabs_vv_nxv16bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv16bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfabs_vv_nxv32bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv32bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfabs_vv_nxv32bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv32bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv32bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv32bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fabs.nxv32bf16( %va, splat (i1 true), i32 %evl) + ret %v +} declare @llvm.vp.fabs.nxv1f16(, , i32) @@ -24,6 +340,14 @@ define @vfabs_vv_nxv1f16( %va, @llvm.vp.fabs.nxv1f16( %va, %m, i32 %evl) ret %v } @@ -42,6 +366,14 @@ define @vfabs_vv_nxv1f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv1f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fabs.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -62,6 +394,14 @@ define @vfabs_vv_nxv2f16( %va, @llvm.vp.fabs.nxv2f16( %va, %m, i32 %evl) ret %v } @@ -80,6 +420,14 @@ define @vfabs_vv_nxv2f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fabs.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -100,6 +448,14 @@ define @vfabs_vv_nxv4f16( %va, @llvm.vp.fabs.nxv4f16( %va, %m, i32 %evl) ret %v } @@ -118,6 +474,14 @@ define @vfabs_vv_nxv4f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fabs.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -138,6 +502,14 @@ define @vfabs_vv_nxv8f16( %va, @llvm.vp.fabs.nxv8f16( %va, %m, i32 %evl) ret %v } @@ -156,6 +528,14 @@ define @vfabs_vv_nxv8f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fabs.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -176,6 +556,14 @@ define @vfabs_vv_nxv16f16( %va, @llvm.vp.fabs.nxv16f16( %va, %m, i32 %evl) ret %v } @@ -194,6 +582,14 @@ define @vfabs_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fabs.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -214,6 +610,14 @@ define @vfabs_vv_nxv32f16( %va, @llvm.vp.fabs.nxv32f16( %va, %m, i32 %evl) ret %v } @@ -232,6 +636,14 @@ define @vfabs_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv32f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fabs.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -473,10 +885,10 @@ define @vfabs_vv_nxv16f64( %va, @vfabs_vv_nxv16f64_unmasked( ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v16 -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: bltu a0, a1, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll index 83f588ce5027d..bef2e8d3b57fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll @@ -11,87 +11,189 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA define @nxv1bf16( %vm, %vs) { -; CHECK-LABEL: nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %r = call @llvm.copysign.nxv1bf16( %vm, %vs) ret %r } define @nxv2bf16( %vm, %vs) { -; CHECK-LABEL: nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %r = call @llvm.copysign.nxv2bf16( %vm, %vs) ret %r } define @nxv4bf16( %vm, %vs) { -; CHECK-LABEL: nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %r = call @llvm.copysign.nxv4bf16( %vm, %vs) ret %r } define @nxv8bf16( %vm, %vs) { -; CHECK-LABEL: nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vand.vx v10, v10, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v10, v10, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %r = call @llvm.copysign.nxv8bf16( %vm, %vs) ret %r } define @nxv16bf16( %vm, %vs) { -; CHECK-LABEL: nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vand.vx v12, v12, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v12, v12, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v12, v12, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %r = call @llvm.copysign.nxv16bf16( %vm, %vs) ret %r } define @nxv32bf32( %vm, %vs) { -; CHECK-LABEL: nxv32bf32: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vand.vx v16, v16, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv32bf32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v16, v16, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv32bf32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v16, v16, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv32bf32: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %r = call @llvm.copysign.nxv32bf32( %vm, %vs) ret %r } @@ -114,6 +216,16 @@ define @vfcopysign_vv_nxv1f16( %vm, @llvm.copysign.nxv1f16( %vm, %vs) ret %r } @@ -136,6 +248,18 @@ define @vfcopysign_vf_nxv1f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv1f16( %vm, %splat) @@ -159,6 +283,17 @@ define @vfcopynsign_vv_nxv1f16( %vm, %vs %r = call @llvm.copysign.nxv1f16( %vm, %n) ret %r @@ -183,6 +318,19 @@ define @vfcopynsign_vf_nxv1f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v9, v9, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -208,6 +356,17 @@ define @vfcopysign_exttrunc_vv_nxv1f16_nxv1f32( %vs to %r = call @llvm.copysign.nxv1f16( %vm, %e) ret %r @@ -235,6 +394,19 @@ define @vfcopysign_exttrunc_vf_nxv1f16_nxv1f32( poison, float %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %esplat = fptrunc %splat to @@ -261,6 +433,18 @@ define @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32( %vs %eneg = fptrunc %n to %r = call @llvm.copysign.nxv1f16( %vm, %eneg) @@ -290,6 +474,20 @@ define @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32( poison, float %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -320,6 +518,19 @@ define @vfcopysign_exttrunc_vv_nxv1f16_nxv1f64( %vs to %r = call @llvm.copysign.nxv1f16( %vm, %e) ret %r @@ -351,6 +562,21 @@ define @vfcopysign_exttrunc_vf_nxv1f16_nxv1f64( poison, double %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %esplat = fptrunc %splat to @@ -381,6 +607,20 @@ define @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64( %vs %eneg = fptrunc %n to %r = call @llvm.copysign.nxv1f16( %vm, %eneg) @@ -414,6 +654,22 @@ define @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64( poison, double %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -440,6 +696,16 @@ define @vfcopysign_vv_nxv2f16( %vm, @llvm.copysign.nxv2f16( %vm, %vs) ret %r } @@ -462,6 +728,18 @@ define @vfcopysign_vf_nxv2f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv2f16( %vm, %splat) @@ -485,6 +763,17 @@ define @vfcopynsign_vv_nxv2f16( %vm, %vs %r = call @llvm.copysign.nxv2f16( %vm, %n) ret %r @@ -509,6 +798,19 @@ define @vfcopynsign_vf_nxv2f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v9, v9, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -534,6 +836,16 @@ define @vfcopysign_vv_nxv4f16( %vm, @llvm.copysign.nxv4f16( %vm, %vs) ret %r } @@ -556,6 +868,18 @@ define @vfcopysign_vf_nxv4f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv4f16( %vm, %splat) @@ -579,6 +903,17 @@ define @vfcopynsign_vv_nxv4f16( %vm, %vs %r = call @llvm.copysign.nxv4f16( %vm, %n) ret %r @@ -603,6 +938,19 @@ define @vfcopynsign_vf_nxv4f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v9, v9, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -628,6 +976,16 @@ define @vfcopysign_vv_nxv8f16( %vm, @llvm.copysign.nxv8f16( %vm, %vs) ret %r } @@ -650,6 +1008,18 @@ define @vfcopysign_vf_nxv8f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v10, v10, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v10, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v10, v10, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv8f16( %vm, %splat) @@ -673,6 +1043,17 @@ define @vfcopynsign_vv_nxv8f16( %vm, %vs %r = call @llvm.copysign.nxv8f16( %vm, %n) ret %r @@ -697,6 +1078,19 @@ define @vfcopynsign_vf_nxv8f16( %vm, half ; ZVFHMIN-NEXT: vand.vx v10, v10, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v10, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v10, v10, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v10, v10, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -722,6 +1116,17 @@ define @vfcopysign_exttrunc_vv_nxv8f16_nxv8f32( %vs to %r = call @llvm.copysign.nxv8f16( %vm, %e) ret %r @@ -749,6 +1154,19 @@ define @vfcopysign_exttrunc_vf_nxv8f16_nxv8f32( poison, float %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %esplat = fptrunc %splat to @@ -775,6 +1193,18 @@ define @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32( %vs %eneg = fptrunc %n to %r = call @llvm.copysign.nxv8f16( %vm, %eneg) @@ -804,6 +1234,20 @@ define @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32( poison, float %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -834,6 +1278,19 @@ define @vfcopysign_exttrunc_vv_nxv8f16_nxv8f64( %vs to %r = call @llvm.copysign.nxv8f16( %vm, %e) ret %r @@ -865,6 +1322,21 @@ define @vfcopysign_exttrunc_vf_nxv8f16_nxv8f64( poison, double %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %esplat = fptrunc %splat to @@ -895,6 +1367,20 @@ define @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64( %vs %eneg = fptrunc %n to %r = call @llvm.copysign.nxv8f16( %vm, %eneg) @@ -928,6 +1414,22 @@ define @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64( poison, double %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -954,6 +1456,16 @@ define @vfcopysign_vv_nxv16f16( %vm, @llvm.copysign.nxv16f16( %vm, %vs) ret %r } @@ -976,6 +1488,18 @@ define @vfcopysign_vf_nxv16f16( %vm, ha ; ZVFHMIN-NEXT: vand.vx v12, v12, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v12 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vmv.v.x v12, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v12, v12, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv16f16( %vm, %splat) @@ -999,6 +1523,17 @@ define @vfcopynsign_vv_nxv16f16( %vm, < ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v12 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vv_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vxor.vx v12, v12, a0 +; ZVFBFA-NEXT: vand.vx v12, v12, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %n = fneg %vs %r = call @llvm.copysign.nxv16f16( %vm, %n) ret %r @@ -1023,6 +1558,19 @@ define @vfcopynsign_vf_nxv16f16( %vm, h ; ZVFHMIN-NEXT: vand.vx v12, v12, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v12 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vmv.v.x v12, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v12, v12, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v12, v12, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat @@ -1048,6 +1596,16 @@ define @vfcopysign_vv_nxv32f16( %vm, @llvm.copysign.nxv32f16( %vm, %vs) ret %r } @@ -1070,6 +1628,18 @@ define @vfcopysign_vf_nxv32f16( %vm, ha ; ZVFHMIN-NEXT: vand.vx v16, v16, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v16 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vmv.v.x v16, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v16, v16, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %r = call @llvm.copysign.nxv32f16( %vm, %splat) @@ -1093,6 +1663,17 @@ define @vfcopynsign_vv_nxv32f16( %vm, < ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v16 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vv_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vxor.vx v16, v16, a0 +; ZVFBFA-NEXT: vand.vx v16, v16, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %n = fneg %vs %r = call @llvm.copysign.nxv32f16( %vm, %n) ret %r @@ -1117,6 +1698,19 @@ define @vfcopynsign_vf_nxv32f16( %vm, h ; ZVFHMIN-NEXT: vand.vx v16, v16, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v16 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vmv.v.x v16, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v16, v16, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v16, v16, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %head = insertelement poison, half %s, i32 0 %splat = shufflevector %head, poison, zeroinitializer %n = fneg %splat diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll index 9f456e97be11d..c0b4916a54e51 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll @@ -11,69 +11,153 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA define @nxv1bf16( %va) { -; CHECK-LABEL: nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } define @nxv2bf16( %va) { -; CHECK-LABEL: nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } define @nxv4bf16( %va) { -; CHECK-LABEL: nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } define @nxv8bf16( %va) { -; CHECK-LABEL: nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } define @nxv16bf16( %va) { -; CHECK-LABEL: nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } define @nxv32bf16( %va) { -; CHECK-LABEL: nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } @@ -91,6 +175,13 @@ define @vfneg_vv_nxv1f16( %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } @@ -108,6 +199,13 @@ define @vfneg_vv_nxv2f16( %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } @@ -125,6 +223,13 @@ define @vfneg_vv_nxv4f16( %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } @@ -142,6 +247,13 @@ define @vfneg_vv_nxv8f16( %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } @@ -159,6 +271,13 @@ define @vfneg_vv_nxv16f16( %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } @@ -176,6 +295,13 @@ define @vfneg_vv_nxv32f16( %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg %va ret %vb } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll index bbab056f0ff46..9bd24c44b1b90 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll @@ -1,12 +1,304 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define @vfneg_vv_nxv1bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv1bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfneg_vv_nxv1bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv1bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv1bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv1bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv1bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfneg_vv_nxv2bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv2bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfneg_vv_nxv2bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv2bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfneg_vv_nxv4bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv4bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfneg_vv_nxv4bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv4bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfneg_vv_nxv8bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv8bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfneg_vv_nxv8bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv8bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfneg_vv_nxv16bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv16bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfneg_vv_nxv16bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv16bf16( %va, splat (i1 true), i32 %evl) + ret %v +} + +define @vfneg_vv_nxv32bf16( %va, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv32bf16( %va, %m, i32 %evl) + ret %v +} + +define @vfneg_vv_nxv32bf16_unmasked( %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv32bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv32bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv32bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call @llvm.vp.fneg.nxv32bf16( %va, splat (i1 true), i32 %evl) + ret %v +} declare @llvm.vp.fneg.nxv1f16(, , i32) @@ -23,6 +315,13 @@ define @vfneg_vv_nxv1f16( %va, @llvm.vp.fneg.nxv1f16( %va, %m, i32 %evl) ret %v } @@ -40,6 +339,13 @@ define @vfneg_vv_nxv1f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv1f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fneg.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -59,6 +365,13 @@ define @vfneg_vv_nxv2f16( %va, @llvm.vp.fneg.nxv2f16( %va, %m, i32 %evl) ret %v } @@ -76,6 +389,13 @@ define @vfneg_vv_nxv2f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fneg.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -95,6 +415,13 @@ define @vfneg_vv_nxv4f16( %va, @llvm.vp.fneg.nxv4f16( %va, %m, i32 %evl) ret %v } @@ -112,6 +439,13 @@ define @vfneg_vv_nxv4f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fneg.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -131,6 +465,13 @@ define @vfneg_vv_nxv8f16( %va, @llvm.vp.fneg.nxv8f16( %va, %m, i32 %evl) ret %v } @@ -148,6 +489,13 @@ define @vfneg_vv_nxv8f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fneg.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -167,6 +515,13 @@ define @vfneg_vv_nxv16f16( %va, @llvm.vp.fneg.nxv16f16( %va, %m, i32 %evl) ret %v } @@ -184,6 +539,13 @@ define @vfneg_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fneg.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -203,6 +565,13 @@ define @vfneg_vv_nxv32f16( %va, @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) ret %v } @@ -220,6 +589,13 @@ define @vfneg_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv32f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call @llvm.vp.fneg.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -461,10 +837,10 @@ define @vfneg_vv_nxv16f64( %va, @vfneg_vv_nxv16f64_unmasked( ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v16, v16 -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: bltu a0, a1, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret From cf1f871023e432837581b84c8563f3b0690dd9d3 Mon Sep 17 00:00:00 2001 From: Thirumalai Shaktivel <74826228+Thirumalai-Shaktivel@users.noreply.github.com> Date: Mon, 10 Nov 2025 09:44:22 +0530 Subject: [PATCH 09/11] [Flang] Add parser support for prefetch directive (#139702) Implementation details: * Recognize prefetch directive in the parser as `!dir$ prefetch ...` * Unparse the prefetch directive * Add required tests Details on the prefetch directive: `!dir$ prefetch designator[, designator]...`, where the designator list can be a variable or an array reference. This directive is used to insert a hint to the code generator to prefetch instructions for memory references. --- flang/docs/Directives.md | 3 + flang/include/flang/Parser/dump-parse-tree.h | 1 + flang/include/flang/Parser/parse-tree.h | 8 +- flang/lib/Lower/Bridge.cpp | 3 + flang/lib/Parser/Fortran-parsers.cpp | 4 + flang/lib/Parser/unparse.cpp | 4 + flang/lib/Semantics/resolve-names.cpp | 1 + flang/test/Parser/prefetch.f90 | 80 ++++++++++++++++++++ 8 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 flang/test/Parser/prefetch.f90 diff --git a/flang/docs/Directives.md b/flang/docs/Directives.md index 2f16a8d579f8b..d157b5d397dc9 100644 --- a/flang/docs/Directives.md +++ b/flang/docs/Directives.md @@ -52,6 +52,9 @@ A list of non-standard directives supported by Flang integer that specifying the unrolling factor. When `N` is `0` or `1`, the loop should not be unrolled at all. If `N` is omitted the optimizer will selects the number of times to unroll the loop. +* `!dir$ prefetch designator[, designator]...`, where the designator list can be + a variable or an array reference. This directive is used to insert a hint to + the code generator to prefetch instructions for memory references. * `!dir$ novector` disabling vectorization on the following loop. * `!dir$ nounroll` disabling unrolling on the following loop. * `!dir$ nounroll_and_jam` disabling unrolling and jamming on the following loop. diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index b2424023b0168..1c4d2daef2a11 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -218,6 +218,7 @@ class ParseTreeDumper { NODE(CompilerDirective, NoVector) NODE(CompilerDirective, NoUnroll) NODE(CompilerDirective, NoUnrollAndJam) + NODE(CompilerDirective, Prefetch) NODE(parser, ComplexLiteralConstant) NODE(parser, ComplexPart) NODE(parser, ComponentArraySpec) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 32e444fbb2e6c..cd9429b9631d6 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3360,6 +3360,7 @@ struct StmtFunctionStmt { // !DIR$ NOVECTOR // !DIR$ NOUNROLL // !DIR$ NOUNROLL_AND_JAM +// !DIR$ PREFETCH designator[, designator]... // !DIR$ FORCEINLINE // !DIR$ INLINE // !DIR$ NOINLINE @@ -3388,6 +3389,10 @@ struct CompilerDirective { struct UnrollAndJam { WRAPPER_CLASS_BOILERPLATE(UnrollAndJam, std::optional); }; + struct Prefetch { + WRAPPER_CLASS_BOILERPLATE( + Prefetch, std::list>); + }; EMPTY_CLASS(NoVector); EMPTY_CLASS(NoUnroll); EMPTY_CLASS(NoUnrollAndJam); @@ -3398,7 +3403,8 @@ struct CompilerDirective { CharBlock source; std::variant, LoopCount, std::list, VectorAlways, std::list, Unroll, UnrollAndJam, Unrecognized, - NoVector, NoUnroll, NoUnrollAndJam, ForceInline, Inline, NoInline> + NoVector, NoUnroll, NoUnrollAndJam, ForceInline, Inline, NoInline, + Prefetch> u; }; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 0f4b39a07c5da..5779bcd5d293c 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -3275,6 +3275,9 @@ class FirConverter : public Fortran::lower::AbstractConverter { [&](const Fortran::parser::CompilerDirective::NoInline &) { attachInliningDirectiveToStmt(dir, &eval); }, + [&](const Fortran::parser::CompilerDirective::Prefetch &prefetch) { + TODO(getCurrentLocation(), "!$dir prefetch"); + }, [&](const auto &) {}}, dir.u); } diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp index ea6a1eada2741..cdc9b0add7a48 100644 --- a/flang/lib/Parser/Fortran-parsers.cpp +++ b/flang/lib/Parser/Fortran-parsers.cpp @@ -1297,6 +1297,7 @@ TYPE_PARSER(construct("STAT =" >> statVariable) || // !DIR$ LOOP COUNT (n1[, n2]...) // !DIR$ name[=value] [, name[=value]]... // !DIR$ UNROLL [n] +// !DIR$ PREFETCH designator[, designator]... // !DIR$ constexpr auto ignore_tkr{ "IGNORE_TKR" >> optionalList(construct( @@ -1311,6 +1312,8 @@ constexpr auto vectorAlways{ "VECTOR ALWAYS" >> construct()}; constexpr auto unroll{ "UNROLL" >> construct(maybe(digitString64))}; +constexpr auto prefetch{"PREFETCH" >> + construct(nonemptyList(indirect(designator)))}; constexpr auto unrollAndJam{"UNROLL_AND_JAM" >> construct(maybe(digitString64))}; constexpr auto novector{"NOVECTOR" >> construct()}; @@ -1329,6 +1332,7 @@ TYPE_PARSER(beginDirective >> "DIR$ "_tok >> construct(vectorAlways) || construct(unrollAndJam) || construct(unroll) || + construct(prefetch) || construct(novector) || construct(nounrollAndJam) || construct(nounroll) || diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 6bb14a43e7b99..dc0f083c9fc95 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -1855,6 +1855,10 @@ class UnparseVisitor { Word("!DIR$ UNROLL"); Walk(" ", unroll.v); }, + [&](const CompilerDirective::Prefetch &prefetch) { + Word("!DIR$ PREFETCH"); + Walk(" ", prefetch.v); + }, [&](const CompilerDirective::UnrollAndJam &unrollAndJam) { Word("!DIR$ UNROLL_AND_JAM"); Walk(" ", unrollAndJam.v); diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index a2062ef28d52c..09ec951a422ca 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -10065,6 +10065,7 @@ void ResolveNamesVisitor::Post(const parser::CompilerDirective &x) { std::holds_alternative(x.u) || std::holds_alternative(x.u) || std::holds_alternative(x.u) || + std::holds_alternative(x.u) || std::holds_alternative(x.u)) { return; } diff --git a/flang/test/Parser/prefetch.f90 b/flang/test/Parser/prefetch.f90 new file mode 100644 index 0000000000000..1013a09c92117 --- /dev/null +++ b/flang/test/Parser/prefetch.f90 @@ -0,0 +1,80 @@ +!RUN: %flang_fc1 -fdebug-unparse-no-sema %s 2>&1 | FileCheck %s -check-prefix=UNPARSE +!RUN: %flang_fc1 -fdebug-dump-parse-tree-no-sema %s 2>&1 | FileCheck %s -check-prefix=TREE + +subroutine test_prefetch_01(a, b) + integer, intent(in) :: a + integer, intent(inout) :: b(5) + integer :: i = 2 + integer :: res + +!TREE: | | DeclarationConstruct -> SpecificationConstruct -> CompilerDirective -> Prefetch -> Designator -> DataRef -> Name = 'a' + +!UNPARSE: !DIR$ PREFETCH a + !dir$ prefetch a + b(1) = a + +!TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> CompilerDirective -> Prefetch -> Designator -> DataRef -> Name = 'b' + +!UNPARSE: !DIR$ PREFETCH b + !dir$ prefetch b + res = sum(b) + +!TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> CompilerDirective -> Prefetch -> Designator -> DataRef -> Name = 'a' +!TREE: | | Designator -> DataRef -> ArrayElement +!TREE: | | | DataRef -> Name = 'b' +!TREE: | | | SectionSubscript -> SubscriptTriplet +!TREE: | | | | Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '3' +!TREE: | | | | Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '5' + +!UNPARSE: !DIR$ PREFETCH a, b(3:5) + !dir$ prefetch a, b(3:5) + res = a + b(4) + +!TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> CompilerDirective -> Prefetch -> Designator -> DataRef -> Name = 'res' +!TREE: | | Designator -> DataRef -> ArrayElement +!TREE: | | | DataRef -> Name = 'b' +!TREE: | | | SectionSubscript -> Integer -> Expr -> Add +!TREE: | | | | Expr -> Designator -> DataRef -> Name = 'i' +!TREE: | | | | Expr -> LiteralConstant -> IntLiteralConstant = '2' + +!UNPARSE: !DIR$ PREFETCH res, b(i+2) + !dir$ prefetch res, b(i+2) + res = res + b(i+2) +end subroutine + +subroutine test_prefetch_02(n, a) + integer, intent(in) :: n + integer, intent(in) :: a(n) + type :: t + real, allocatable :: x(:, :) + end type t + type(t) :: p + + do i = 1, n +!TREE: | | | | ExecutionPartConstruct -> ExecutableConstruct -> CompilerDirective -> Prefetch -> Designator -> DataRef -> ArrayElement +!TREE: | | | | | DataRef -> StructureComponent +!TREE: | | | | | | DataRef -> Name = 'p' +!TREE: | | | | | | Name = 'x' +!TREE: | | | | | SectionSubscript -> Integer -> Expr -> Designator -> DataRef -> Name = 'i' +!TREE: | | | | | SectionSubscript -> SubscriptTriplet +!TREE: | | | | Designator -> DataRef -> Name = 'a' + +!UNPARSE: !DIR$ PREFETCH p%x(i,:), a + !dir$ prefetch p%x(i, :), a + do j = 1, n +!TREE: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> CompilerDirective -> Prefetch -> Designator -> DataRef -> ArrayElement +!TREE: | | | | | | | DataRef -> StructureComponent +!TREE: | | | | | | | | DataRef -> Name = 'p' +!TREE: | | | | | | | | Name = 'x' +!TREE: | | | | | | | SectionSubscript -> Integer -> Expr -> Designator -> DataRef -> Name = 'i' +!TREE: | | | | | | | SectionSubscript -> Integer -> Expr -> Designator -> DataRef -> Name = 'j' +!TREE: | | | | | | Designator -> DataRef -> ArrayElement +!TREE: | | | | | | | DataRef -> Name = 'a' +!TREE: | | | | | | | SectionSubscript -> Integer -> Expr -> Designator -> DataRef -> Name = 'i' + +!UNPARSE: !DIR$ PREFETCH p%x(i,j), a(i) + !dir$ prefetch p%x(i, j), a(i) + p%x(i, j) = p%x(i, j) ** a(j) + end do + end do +end subroutine From 4d88bb6c6303d9f3894803d3c34d87bbc3e9a611 Mon Sep 17 00:00:00 2001 From: Sudharsan Veeravalli Date: Mon, 10 Nov 2025 09:48:56 +0530 Subject: [PATCH 10/11] [Hexagon] Implement isMaskAndCmp0FoldingBeneficial (#166891) Sink `and` mask to `cmp` use block if it is masking a single bit since this will fold the `and/cmp/br` into a single `tstbit` instruction. --- .../Target/Hexagon/HexagonISelLowering.cpp | 10 +++ llvm/lib/Target/Hexagon/HexagonISelLowering.h | 2 + .../CodeGen/Hexagon/and_mask_cmp0_sink.ll | 68 +++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 526b4de975915..04a97606cb7f8 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3948,3 +3948,13 @@ HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { return AtomicExpansionKind::LLSC; } + +bool HexagonTargetLowering::isMaskAndCmp0FoldingBeneficial( + const Instruction &AndI) const { + // Only sink 'and' mask to cmp use block if it is masking a single bit since + // this will fold the and/cmp/br into a single tstbit instruction. + ConstantInt *Mask = dyn_cast(AndI.getOperand(1)); + if (!Mask) + return false; + return Mask->getValue().isPowerOf2(); +} diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 8d04edbea5b43..4ac3e7671592a 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -160,6 +160,8 @@ class HexagonTargetLowering : public TargetLowering { bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; + /// Return true if an FMA operation is faster than a pair of mul and add /// instructions. fmuladd intrinsics will be expanded to FMAs when this /// method returns true (and FMAs are legal), otherwise fmuladd is diff --git a/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll new file mode 100644 index 0000000000000..b5c3399ce6605 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; Test that 'and' mask is sunk to the cmp use block only if it is masking a single bit +; RUN: llc -march=hexagon --verify-machineinstrs < %s | FileCheck %s + +@A = global i32 zeroinitializer + +define i32 @and_sink1(i32 %a) { +; CHECK-LABEL: and_sink1: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: p0 = !tstbit(r0,#11) +; CHECK-NEXT: r0 = ##A +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: // %bb0 +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) jump:nt .LBB0_1 +; CHECK-NEXT: memw(r0+#0) = #0 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %bb2 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #0 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %and = and i32 %a, 2048 + br label %bb0 +bb0: + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb0, label %bb2 +bb2: + ret i32 0 +} + +define i32 @and_sink2(i32 %a) { +; CHECK-LABEL: and_sink2: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r1 = and(r0,##2049) +; CHECK-NEXT: r0 = ##A +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r1,#0) +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB1_1: // %bb0 +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) jump:nt .LBB1_1 +; CHECK-NEXT: memw(r0+#0) = #0 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %bb2 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #0 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %and = and i32 %a, 2049 + br label %bb0 +bb0: + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb0, label %bb2 +bb2: + ret i32 0 +} From 0d786b9a207aa0e6d88dde7fd9ffe0b364db69a4 Mon Sep 17 00:00:00 2001 From: Victor Chernyakin Date: Sun, 9 Nov 2025 21:24:29 -0700 Subject: [PATCH 11/11] [clang-tidy][NFC] Enable `performance-unnecessary-value-param` in the codebase (#163686) Closes #156156. In a few cases, instead of just applying the fix-it and making parameters const references to owning type, I refactored them to be non-owning types. --- clang-tools-extra/clang-tidy/.clang-tidy | 1 - clang-tools-extra/clang-tidy/ClangTidy.cpp | 4 ++-- .../clang-tidy/ExpandModularHeadersPPCallbacks.cpp | 5 ++--- .../clang-tidy/ExpandModularHeadersPPCallbacks.h | 5 ++--- clang-tools-extra/clang-tidy/android/CloexecCheck.cpp | 2 +- clang-tools-extra/clang-tidy/android/CloexecCheck.h | 6 +++--- clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp | 5 +++-- .../bugprone/RawMemoryCallOnNonTrivialTypeCheck.cpp | 7 ++++--- .../clang-tidy/bugprone/SignalHandlerCheck.cpp | 4 ++-- .../clang-tidy/bugprone/SignalHandlerCheck.h | 4 ++-- .../clang-tidy/bugprone/UnusedRaiiCheck.cpp | 4 ++-- .../cppcoreguidelines/SpecialMemberFunctionsCheck.h | 2 +- .../llvmlibc/RestrictSystemLibcHeadersCheck.cpp | 10 +++++----- .../clang-tidy/misc/RedundantExpressionCheck.cpp | 8 +++++--- .../clang-tidy/modernize/LoopConvertCheck.cpp | 2 +- .../clang-tidy/modernize/UseStdNumbersCheck.cpp | 11 ++++++----- .../clang-tidy/modernize/UseStdPrintCheck.cpp | 4 ++-- .../portability/RestrictSystemIncludesCheck.h | 4 ++-- .../clang-tidy/readability/ContainerContainsCheck.cpp | 5 ++--- clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp | 7 +++---- .../clang-tidy/utils/BracesAroundStatement.cpp | 2 +- .../clang-tidy/utils/BracesAroundStatement.h | 4 ++-- .../clang-tidy/utils/ExceptionAnalyzer.cpp | 2 +- .../clang-tidy/utils/TransformerClangTidyCheck.cpp | 2 +- .../clang-tidy/utils/TransformerClangTidyCheck.h | 5 +++-- clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp | 2 +- 26 files changed, 59 insertions(+), 58 deletions(-) diff --git a/clang-tools-extra/clang-tidy/.clang-tidy b/clang-tools-extra/clang-tidy/.clang-tidy index 0c2f34b529016..2cd9af494c1ec 100644 --- a/clang-tools-extra/clang-tidy/.clang-tidy +++ b/clang-tools-extra/clang-tidy/.clang-tidy @@ -15,7 +15,6 @@ Checks: > performance-*, -performance-enum-size, -performance-no-int-to-ptr, - -performance-unnecessary-value-param, readability-*, -readability-avoid-nested-conditional-operator, -readability-braces-around-statements, diff --git a/clang-tools-extra/clang-tidy/ClangTidy.cpp b/clang-tools-extra/clang-tidy/ClangTidy.cpp index 870f5169cf7c7..7b40c80653ebc 100644 --- a/clang-tools-extra/clang-tidy/ClangTidy.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidy.cpp @@ -455,8 +455,8 @@ ClangTidyASTConsumerFactory::createASTConsumer( if (Context.canEnableModuleHeadersParsing() && Context.getLangOpts().Modules && OverlayFS != nullptr) { - auto ModuleExpander = - std::make_unique(&Compiler, OverlayFS); + auto ModuleExpander = std::make_unique( + &Compiler, *OverlayFS); ModuleExpanderPP = ModuleExpander->getPreprocessor(); PP->addPPCallbacks(std::move(ModuleExpander)); } diff --git a/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.cpp b/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.cpp index 487e5e299d132..9a4fc7a30b472 100644 --- a/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.cpp +++ b/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.cpp @@ -65,8 +65,7 @@ class ExpandModularHeadersPPCallbacks::FileRecorder { }; ExpandModularHeadersPPCallbacks::ExpandModularHeadersPPCallbacks( - CompilerInstance *CI, - IntrusiveRefCntPtr OverlayFS) + CompilerInstance *CI, llvm::vfs::OverlayFileSystem &OverlayFS) : Recorder(std::make_unique()), Compiler(*CI), InMemoryFs(new llvm::vfs::InMemoryFileSystem), Sources(Compiler.getSourceManager()), @@ -76,7 +75,7 @@ ExpandModularHeadersPPCallbacks::ExpandModularHeadersPPCallbacks( LangOpts(Compiler.getLangOpts()), HSOpts(Compiler.getHeaderSearchOpts()) { // Add a FileSystem containing the extra files needed in place of modular // headers. - OverlayFS->pushOverlay(InMemoryFs); + OverlayFS.pushOverlay(InMemoryFs); Diags.setSourceManager(&Sources); // FIXME: Investigate whatever is there better way to initialize DiagEngine diff --git a/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h b/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h index 60cb01f13ae2f..95216368492ca 100644 --- a/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h +++ b/clang-tools-extra/clang-tidy/ExpandModularHeadersPPCallbacks.h @@ -41,9 +41,8 @@ namespace tooling { /// non-modular way. class ExpandModularHeadersPPCallbacks : public PPCallbacks { public: - ExpandModularHeadersPPCallbacks( - CompilerInstance *CI, - IntrusiveRefCntPtr OverlayFS); + ExpandModularHeadersPPCallbacks(CompilerInstance *CI, + llvm::vfs::OverlayFileSystem &OverlayFS); ~ExpandModularHeadersPPCallbacks() override; /// Returns the preprocessor that provides callbacks for the whole diff --git a/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp index c046578125590..ff86fc52879d9 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp @@ -36,7 +36,7 @@ static std::string buildFixMsgForStringFlag(const Expr *Arg, } void CloexecCheck::registerMatchersImpl( - MatchFinder *Finder, internal::Matcher Function) { + MatchFinder *Finder, const internal::Matcher &Function) { // We assume all the checked APIs are C functions. Finder->addMatcher( callExpr( diff --git a/clang-tools-extra/clang-tidy/android/CloexecCheck.h b/clang-tools-extra/clang-tidy/android/CloexecCheck.h index fc1accd5b4d4a..a6dcb57d488da 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecCheck.h +++ b/clang-tools-extra/clang-tidy/android/CloexecCheck.h @@ -29,9 +29,9 @@ class CloexecCheck : public ClangTidyCheck { : ClangTidyCheck(Name, Context) {} protected: - void - registerMatchersImpl(ast_matchers::MatchFinder *Finder, - ast_matchers::internal::Matcher Function); + void registerMatchersImpl( + ast_matchers::MatchFinder *Finder, + const ast_matchers::internal::Matcher &Function); /// Currently, we have three types of fixes. /// diff --git a/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp b/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp index 02fc8d89a0bf8..36dd3c94ee19f 100644 --- a/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp +++ b/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp @@ -18,6 +18,7 @@ #include #include #include +#include // FixItHint - Let the docs script know that this class does provide fixits @@ -217,11 +218,11 @@ utils::UseRangesCheck::ReplacerMap UseRangesCheck::getReplacerMap() const { const auto AddFromStd = [&](llvm::IntrusiveRefCntPtr Replacer, std::initializer_list Names) { - AddFrom(Replacer, Names, "std"); + AddFrom(std::move(Replacer), Names, "std"); }; const auto AddFromBoost = - [&](llvm::IntrusiveRefCntPtr Replacer, + [&](const llvm::IntrusiveRefCntPtr &Replacer, std::initializer_list< std::pair>> NamespaceAndNames) { diff --git a/clang-tools-extra/clang-tidy/bugprone/RawMemoryCallOnNonTrivialTypeCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/RawMemoryCallOnNonTrivialTypeCheck.cpp index f7f7ccb8877d3..e212301047ce2 100644 --- a/clang-tools-extra/clang-tidy/bugprone/RawMemoryCallOnNonTrivialTypeCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/RawMemoryCallOnNonTrivialTypeCheck.cpp @@ -64,7 +64,8 @@ void RawMemoryCallOnNonTrivialTypeCheck::storeOptions( void RawMemoryCallOnNonTrivialTypeCheck::registerMatchers(MatchFinder *Finder) { using namespace ast_matchers::internal; - auto IsStructPointer = [](Matcher Constraint = anything(), + auto IsStructPointer = [](const Matcher &Constraint = + anything(), bool Bind = false) { return expr(unaryOperator( hasOperatorName("&"), @@ -74,8 +75,8 @@ void RawMemoryCallOnNonTrivialTypeCheck::registerMatchers(MatchFinder *Finder) { }; auto IsRecordSizeOf = expr(sizeOfExpr(hasArgumentOfType(equalsBoundNode("Record")))); - auto ArgChecker = [&](Matcher RecordConstraint, - BindableMatcher SecondArg = expr()) { + auto ArgChecker = [&](const Matcher &RecordConstraint, + const BindableMatcher &SecondArg = expr()) { return allOf(argumentCountIs(3), hasArgument(0, IsStructPointer(RecordConstraint, true)), hasArgument(1, SecondArg), hasArgument(2, IsRecordSizeOf)); diff --git a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp index b9a0b9ee3a6c6..282a3b2581b8b 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp @@ -435,7 +435,7 @@ void SignalHandlerCheck::check(const MatchFinder::MatchResult &Result) { bool SignalHandlerCheck::checkFunction( const FunctionDecl *FD, const Expr *CallOrRef, - std::function ChainReporter) { + llvm::function_ref ChainReporter) { const bool FunctionIsCalled = isa(CallOrRef); if (isStandardFunction(FD)) { @@ -471,7 +471,7 @@ bool SignalHandlerCheck::checkFunction( bool SignalHandlerCheck::checkFunctionCPP14( const FunctionDecl *FD, const Expr *CallOrRef, - std::function ChainReporter) { + llvm::function_ref ChainReporter) { if (!FD->isExternC()) { diag(CallOrRef->getBeginLoc(), "functions without C linkage are not allowed as signal " diff --git a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h index 67bdc9e292764..324b2c88207fd 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h @@ -48,10 +48,10 @@ class SignalHandlerCheck : public ClangTidyCheck { /// The bool parameter is used like \c SkipPathEnd in \c reportHandlerChain . /// \return Returns true if a diagnostic was emitted for this function. bool checkFunction(const FunctionDecl *FD, const Expr *CallOrRef, - std::function ChainReporter); + llvm::function_ref ChainReporter); /// Similar as \c checkFunction but only check for C++14 rules. bool checkFunctionCPP14(const FunctionDecl *FD, const Expr *CallOrRef, - std::function ChainReporter); + llvm::function_ref ChainReporter); /// Returns true if a standard library function is considered /// asynchronous-safe. bool isStandardFunctionAsyncSafe(const FunctionDecl *FD) const; diff --git a/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.cpp index dae679baf14e5..6502fc9bfb89e 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.cpp @@ -37,8 +37,8 @@ void UnusedRaiiCheck::registerMatchers(MatchFinder *Finder) { } template -static void reportDiagnostic(DiagnosticBuilder D, const T *Node, SourceRange SR, - bool DefaultConstruction) { +static void reportDiagnostic(const DiagnosticBuilder &D, const T *Node, + SourceRange SR, bool DefaultConstruction) { const char *Replacement = " give_me_a_name"; // If this is a default ctor we have to remove the parens or we'll introduce a diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h index 1d41b578c77ae..6d76e07078f3b 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h @@ -93,7 +93,7 @@ struct DenseMapInfo< "TOMBSTONE"}; } - static unsigned getHashValue(ClassDefId Val) { + static unsigned getHashValue(const ClassDefId &Val) { assert(Val != getEmptyKey() && "Cannot hash the empty key!"); assert(Val != getTombstoneKey() && "Cannot hash the tombstone key!"); diff --git a/clang-tools-extra/clang-tidy/llvmlibc/RestrictSystemLibcHeadersCheck.cpp b/clang-tools-extra/clang-tidy/llvmlibc/RestrictSystemLibcHeadersCheck.cpp index 129b8a9a30a59..ecd8e19b8b2c6 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/RestrictSystemLibcHeadersCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvmlibc/RestrictSystemLibcHeadersCheck.cpp @@ -22,11 +22,11 @@ namespace { class RestrictedIncludesPPCallbacks : public portability::RestrictedIncludesPPCallbacks { public: - explicit RestrictedIncludesPPCallbacks( - RestrictSystemLibcHeadersCheck &Check, const SourceManager &SM, - const SmallString<128> CompilerIncudeDir) + explicit RestrictedIncludesPPCallbacks(RestrictSystemLibcHeadersCheck &Check, + const SourceManager &SM, + SmallString<128> CompilerIncudeDir) : portability::RestrictedIncludesPPCallbacks(Check, SM), - CompilerIncudeDir(CompilerIncudeDir) {} + CompilerIncudeDir(std::move(CompilerIncudeDir)) {} void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, @@ -61,7 +61,7 @@ void RestrictSystemLibcHeadersCheck::registerPPCallbacks( StringRef(PP->getHeaderSearchInfo().getHeaderSearchOpts().ResourceDir); llvm::sys::path::append(CompilerIncudeDir, "include"); PP->addPPCallbacks(std::make_unique( - *this, SM, CompilerIncudeDir)); + *this, SM, std::move(CompilerIncudeDir))); } } // namespace clang::tidy::llvm_libc diff --git a/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp b/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp index c8ae41c42064a..ea8405364df4c 100644 --- a/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp @@ -1147,16 +1147,18 @@ void RedundantExpressionCheck::checkArithmeticExpr( } } -static bool exprEvaluatesToZero(BinaryOperatorKind Opcode, APSInt Value) { +static bool exprEvaluatesToZero(BinaryOperatorKind Opcode, + const APSInt &Value) { return (Opcode == BO_And || Opcode == BO_AndAssign) && Value == 0; } static bool exprEvaluatesToBitwiseNegatedZero(BinaryOperatorKind Opcode, - APSInt Value) { + const APSInt &Value) { return (Opcode == BO_Or || Opcode == BO_OrAssign) && ~Value == 0; } -static bool exprEvaluatesToSymbolic(BinaryOperatorKind Opcode, APSInt Value) { +static bool exprEvaluatesToSymbolic(BinaryOperatorKind Opcode, + const APSInt &Value) { return ((Opcode == BO_Or || Opcode == BO_OrAssign) && Value == 0) || ((Opcode == BO_And || Opcode == BO_AndAssign) && ~Value == 0); } diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp index 30e2d4fe0a372..65c17223bae92 100644 --- a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp @@ -92,7 +92,7 @@ static StatementMatcher incrementVarMatcher() { } static StatementMatcher -arrayConditionMatcher(internal::Matcher LimitExpr) { +arrayConditionMatcher(const internal::Matcher &LimitExpr) { return binaryOperator( anyOf(allOf(hasOperatorName("<"), hasLHS(integerComparisonMatcher()), hasRHS(LimitExpr)), diff --git a/clang-tools-extra/clang-tidy/modernize/UseStdNumbersCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseStdNumbersCheck.cpp index 47ff9ffd3f7b7..0315728f851d1 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStdNumbersCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseStdNumbersCheck.cpp @@ -81,16 +81,17 @@ AST_MATCHER_P(clang::Expr, anyOfExhaustive, std::vector>, // literals. struct MatchBuilder { auto - ignoreParenAndArithmeticCasting(const Matcher Matcher) const { + ignoreParenAndArithmeticCasting(const Matcher &Matcher) const { return expr(hasType(qualType(isArithmetic())), ignoringParenCasts(Matcher)); } - auto ignoreParenAndFloatingCasting(const Matcher Matcher) const { + auto + ignoreParenAndFloatingCasting(const Matcher &Matcher) const { return expr(hasType(qualType(isFloating())), ignoringParenCasts(Matcher)); } auto matchMathCall(const StringRef FunctionName, - const Matcher ArgumentMatcher) const { + const Matcher &ArgumentMatcher) const { auto HasAnyPrecisionName = hasAnyName( FunctionName, (FunctionName + "l").str(), (FunctionName + "f").str()); // Support long double(l) and float(f). @@ -100,7 +101,7 @@ struct MatchBuilder { hasArgument(0, ArgumentMatcher)))); } - auto matchSqrt(const Matcher ArgumentMatcher) const { + auto matchSqrt(const Matcher &ArgumentMatcher) const { return matchMathCall("sqrt", ArgumentMatcher); } @@ -148,7 +149,7 @@ struct MatchBuilder { return expr(anyOf(Int, Float, Dref)); } - auto match1Div(const Matcher Match) const { + auto match1Div(const Matcher &Match) const { return binaryOperator(hasOperatorName("/"), hasLHS(matchValue(1)), hasRHS(Match)); } diff --git a/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.cpp index 99ade046305c1..22dc0683ac348 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseStdPrintCheck.cpp @@ -70,8 +70,8 @@ void UseStdPrintCheck::registerPPCallbacks(const SourceManager &SM, this->PP = PP; } -static clang::ast_matchers::StatementMatcher -unusedReturnValue(clang::ast_matchers::StatementMatcher MatchedCallExpr) { +static clang::ast_matchers::StatementMatcher unusedReturnValue( + const clang::ast_matchers::StatementMatcher &MatchedCallExpr) { auto UnusedInCompoundStmt = compoundStmt(forEach(MatchedCallExpr), // The checker can't currently differentiate between the diff --git a/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.h b/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.h index 0dd640224f011..d66149aceae91 100644 --- a/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.h +++ b/clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.h @@ -23,7 +23,7 @@ namespace clang::tidy::portability { class RestrictSystemIncludesCheck : public ClangTidyCheck { public: RestrictSystemIncludesCheck(StringRef Name, ClangTidyContext *Context, - std::string DefaultAllowedIncludes = "*") + StringRef DefaultAllowedIncludes = "*") : ClangTidyCheck(Name, Context), AllowedIncludes(Options.get("Includes", DefaultAllowedIncludes)), AllowedIncludesGlobList(AllowedIncludes) {} @@ -36,7 +36,7 @@ class RestrictSystemIncludesCheck : public ClangTidyCheck { } private: - std::string AllowedIncludes; + StringRef AllowedIncludes; GlobList AllowedIncludesGlobList; }; diff --git a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp index a3405524553d4..efcf13d63b4ff 100644 --- a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp @@ -47,9 +47,8 @@ void ContainerContainsCheck::registerMatchers(MatchFinder *Finder) { const auto StringNpos = anyOf(declRefExpr(to(varDecl(hasName("npos")))), memberExpr(member(hasName("npos")))); - auto AddSimpleMatcher = [&](auto Matcher) { - Finder->addMatcher( - traverse(TK_IgnoreUnlessSpelledInSource, std::move(Matcher)), this); + auto AddSimpleMatcher = [&](const auto &Matcher) { + Finder->addMatcher(traverse(TK_IgnoreUnlessSpelledInSource, Matcher), this); }; // Find membership tests which use `count()`. diff --git a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp index f5e4bf020bd68..bc6bd164e24f8 100644 --- a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp +++ b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp @@ -466,10 +466,9 @@ createOptionsProvider(llvm::IntrusiveRefCntPtr FS) { } static llvm::IntrusiveRefCntPtr -getVfsFromFile(const std::string &OverlayFile, - llvm::IntrusiveRefCntPtr BaseFS) { +getVfsFromFile(const std::string &OverlayFile, vfs::FileSystem &BaseFS) { llvm::ErrorOr> Buffer = - BaseFS->getBufferForFile(OverlayFile); + BaseFS.getBufferForFile(OverlayFile); if (!Buffer) { llvm::errs() << "Can't load virtual filesystem overlay file '" << OverlayFile << "': " << Buffer.getError().message() @@ -585,7 +584,7 @@ static llvm::IntrusiveRefCntPtr createBaseFS() { if (!VfsOverlay.empty()) { IntrusiveRefCntPtr VfsFromFile = - getVfsFromFile(VfsOverlay, BaseFS); + getVfsFromFile(VfsOverlay, *BaseFS); if (!VfsFromFile) return nullptr; BaseFS->pushOverlay(std::move(VfsFromFile)); diff --git a/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.cpp b/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.cpp index aacb4e33ea570..d0659ad94b86a 100644 --- a/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.cpp +++ b/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.cpp @@ -141,7 +141,7 @@ BraceInsertionHints getBraceInsertionsHints(const Stmt *const S, // StartLoc points at the location of the opening brace to be inserted. SourceLocation EndLoc; - std::string ClosingInsertion; + StringRef ClosingInsertion; if (EndLocHint.isValid()) { EndLoc = EndLocHint; ClosingInsertion = "} "; diff --git a/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h b/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h index 879c84d108d7b..2b2d71f3cf7b2 100644 --- a/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h +++ b/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h @@ -39,7 +39,7 @@ struct BraceInsertionHints { /// Constructor for a hint offering fix-its for brace insertion. Both /// positions must be valid. BraceInsertionHints(SourceLocation OpeningBracePos, - SourceLocation ClosingBracePos, std::string ClosingBrace) + SourceLocation ClosingBracePos, StringRef ClosingBrace) : DiagnosticPos(OpeningBracePos), OpeningBracePos(OpeningBracePos), ClosingBracePos(ClosingBracePos), ClosingBrace(ClosingBrace) { assert(offersFixIts()); @@ -64,7 +64,7 @@ struct BraceInsertionHints { private: SourceLocation OpeningBracePos; SourceLocation ClosingBracePos; - std::string ClosingBrace; + StringRef ClosingBrace; }; /// Create fix-it hints for braces that wrap the given statement when applied. diff --git a/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp b/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp index 8ead26407ee5d..c774f54b1da5a 100644 --- a/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp +++ b/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp @@ -192,7 +192,7 @@ static bool isFunctionPointerConvertible(QualType From, QualType To) { // // The function should only be called in C++ mode. static bool isQualificationConvertiblePointer(QualType From, QualType To, - LangOptions LangOpts) { + const LangOptions &LangOpts) { // [N4659 7.5 (1)] // A cv-decomposition of a type T is a sequence of cv_i and P_i such that T is diff --git a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp index d7dad624f5e7e..b58e716f7103f 100644 --- a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp @@ -66,7 +66,7 @@ TransformerClangTidyCheck::TransformerClangTidyCheck(StringRef Name, // we would be accessing `getLangOpts` and `Options` before the underlying // `ClangTidyCheck` instance was properly initialized. TransformerClangTidyCheck::TransformerClangTidyCheck( - std::function>( + llvm::function_ref>( const LangOptions &, const OptionsView &)> MakeRule, StringRef Name, ClangTidyContext *Context) diff --git a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h index e77f84b0cdc10..da8606f993b6e 100644 --- a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h +++ b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h @@ -48,8 +48,9 @@ class TransformerClangTidyCheck : public ClangTidyCheck { /// /// See \c setRule for constraints on the rule. TransformerClangTidyCheck( - std::function>( - const LangOptions &, const OptionsView &)> + llvm::function_ref< + std::optional>( + const LangOptions &, const OptionsView &)> MakeRule, StringRef Name, ClangTidyContext *Context); diff --git a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp index 8b7019b56a50e..09adbf1155e62 100644 --- a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp @@ -55,7 +55,7 @@ AST_MATCHER(Expr, hasSideEffects) { } // namespace static auto -makeExprMatcher(ast_matchers::internal::Matcher ArgumentMatcher, +makeExprMatcher(const ast_matchers::internal::Matcher &ArgumentMatcher, ArrayRef MethodNames, ArrayRef FreeNames) { return expr(