llvm · RolandF77 · Jun 9, 2025 · May 23, 2025 · May 23, 2025 · May 27, 2025
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9664,7 +9664,25 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     }
   }
 
-  if (!BVNIsConstantSplat || SplatBitSize > 32) {
+  bool IsSplat64 = false;
+  uint64_t SplatBits = 0;
+  int32_t SextVal = 0;
+  if (BVNIsConstantSplat) {
+    if (SplatBitSize <= 32) {
+      SplatBits = APSplatBits.getZExtValue();
+      SextVal = SignExtend32(SplatBits, SplatBitSize);
+    } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
+      int64_t Splat64Val = APSplatBits.getSExtValue();
+      SplatBits = (uint64_t)Splat64Val;
+      SextVal = (int32_t)SplatBits;
+      bool P9Vector = Subtarget.hasP9Vector();
+      int32_t Hi = P9Vector ? 127 : 15;
+      int32_t Lo = P9Vector ? -128 : -16;
+      IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
+    }
+  }
+
+  if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
     unsigned NewOpcode = PPCISD::LD_SPLAT;
 
     // Handle load-and-splat patterns as we have instructions that will do this
@@ -9750,7 +9768,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     return SDValue();
   }
 
-  uint64_t SplatBits = APSplatBits.getZExtValue();
   uint64_t SplatUndef = APSplatUndef.getZExtValue();
   unsigned SplatSize = SplatBitSize / 8;
 
@@ -9785,13 +9802,37 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
                                   dl);
 
   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
-  int32_t SextVal = SignExtend32(SplatBits, SplatBitSize);
-  if (SextVal >= -16 && SextVal <= 15)
-    return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
-                                  dl);
+  // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
+  if (SextVal >= -16 && SextVal <= 15) {
+    unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
+    SDValue Res =
+        getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
+    if (SplatSize != 8)
+      return Res;
+    return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
+  }
 
   // Two instruction sequences.
 
+  if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
+    SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
+    SmallVector<SDValue, 16> Ops(16, C);
+    SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
+    assert((SplatSize == 2 || SplatSize == 4 || SplatSize == 8) &&
+           "Unexpected type for vector constant.");
+    unsigned IID;
+    if (SplatSize == 2) {
+      IID = Intrinsic::ppc_altivec_vupklsb;
+    } else if (SplatSize == 4) {
+      IID = Intrinsic::ppc_altivec_vextsb2w;
+    } else { // SplatSize == 8
+      IID = Intrinsic::ppc_altivec_vextsb2d;
+    }
+    SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl);
+    return DAG.getBitcast(Op->getValueType(0), Extend);
+  }
+  assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
+
   // If this value is in the range [-32,30] and is even, use:
   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
   // If this value is in the range [17,31] and is odd, use:

diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -3713,30 +3713,26 @@ entry:
 define <2 x i64> @spltConst1ll() {
 ; P9BE-LABEL: spltConst1ll:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    addis r3, r2, .LCPI65_0@toc@ha
-; P9BE-NEXT:    addi r3, r3, .LCPI65_0@toc@l
-; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    vspltisw v2, 1
+; P9BE-NEXT:    vupklsw v2, v2
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: spltConst1ll:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    addis r3, r2, .LCPI65_0@toc@ha
-; P9LE-NEXT:    addi r3, r3, .LCPI65_0@toc@l
-; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    vspltisw v2, 1
+; P9LE-NEXT:    vupklsw v2, v2
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: spltConst1ll:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    addis r3, r2, .LCPI65_0@toc@ha
-; P8BE-NEXT:    addi r3, r3, .LCPI65_0@toc@l
-; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    vspltisw v2, 1
+; P8BE-NEXT:    vupklsw v2, v2
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: spltConst1ll:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addis r3, r2, .LCPI65_0@toc@ha
-; P8LE-NEXT:    addi r3, r3, .LCPI65_0@toc@l
-; P8LE-NEXT:    lxvd2x v2, 0, r3
+; P8LE-NEXT:    vspltisw v2, 1
+; P8LE-NEXT:    vupklsw v2, v2
 ; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 1, i64 1>
@@ -4173,30 +4169,26 @@ entry:
 define <2 x i64> @spltCnstConvftoll() {
 ; P9BE-LABEL: spltCnstConvftoll:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    addis r3, r2, .LCPI78_0@toc@ha
-; P9BE-NEXT:    addi r3, r3, .LCPI78_0@toc@l
-; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    vspltisw v2, 4
+; P9BE-NEXT:    vupklsw v2, v2
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: spltCnstConvftoll:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    addis r3, r2, .LCPI78_0@toc@ha
-; P9LE-NEXT:    addi r3, r3, .LCPI78_0@toc@l
-; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    vspltisw v2, 4
+; P9LE-NEXT:    vupklsw v2, v2
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: spltCnstConvftoll:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    addis r3, r2, .LCPI78_0@toc@ha
-; P8BE-NEXT:    addi r3, r3, .LCPI78_0@toc@l
-; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    vspltisw v2, 4
+; P8BE-NEXT:    vupklsw v2, v2
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: spltCnstConvftoll:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addis r3, r2, .LCPI78_0@toc@ha
-; P8LE-NEXT:    addi r3, r3, .LCPI78_0@toc@l
-; P8LE-NEXT:    lxvd2x v2, 0, r3
+; P8LE-NEXT:    vspltisw v2, 4
+; P8LE-NEXT:    vupklsw v2, v2
 ; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 4, i64 4>
@@ -4526,30 +4518,26 @@ entry:
 define <2 x i64> @spltCnstConvdtoll() {
 ; P9BE-LABEL: spltCnstConvdtoll:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    addis r3, r2, .LCPI87_0@toc@ha
-; P9BE-NEXT:    addi r3, r3, .LCPI87_0@toc@l
-; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    vspltisw v2, 4
+; P9BE-NEXT:    vupklsw v2, v2
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: spltCnstConvdtoll:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    addis r3, r2, .LCPI87_0@toc@ha
-; P9LE-NEXT:    addi r3, r3, .LCPI87_0@toc@l
-; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    vspltisw v2, 4
+; P9LE-NEXT:    vupklsw v2, v2
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: spltCnstConvdtoll:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    addis r3, r2, .LCPI87_0@toc@ha
-; P8BE-NEXT:    addi r3, r3, .LCPI87_0@toc@l
-; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    vspltisw v2, 4
+; P8BE-NEXT:    vupklsw v2, v2
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: spltCnstConvdtoll:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addis r3, r2, .LCPI87_0@toc@ha
-; P8LE-NEXT:    addi r3, r3, .LCPI87_0@toc@l
-; P8LE-NEXT:    lxvd2x v2, 0, r3
+; P8LE-NEXT:    vspltisw v2, 4
+; P8LE-NEXT:    vupklsw v2, v2
 ; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 4, i64 4>
@@ -4879,30 +4867,26 @@ entry:
 define <2 x i64> @spltConst1ull() {
 ; P9BE-LABEL: spltConst1ull:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    addis r3, r2, .LCPI97_0@toc@ha
-; P9BE-NEXT:    addi r3, r3, .LCPI97_0@toc@l
-; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    vspltisw v2, 1
+; P9BE-NEXT:    vupklsw v2, v2
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: spltConst1ull:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    addis r3, r2, .LCPI97_0@toc@ha
-; P9LE-NEXT:    addi r3, r3, .LCPI97_0@toc@l
-; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    vspltisw v2, 1
+; P9LE-NEXT:    vupklsw v2, v2
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: spltConst1ull:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    addis r3, r2, .LCPI97_0@toc@ha
-; P8BE-NEXT:    addi r3, r3, .LCPI97_0@toc@l
-; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    vspltisw v2, 1
+; P8BE-NEXT:    vupklsw v2, v2
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: spltConst1ull:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addis r3, r2, .LCPI97_0@toc@ha
-; P8LE-NEXT:    addi r3, r3, .LCPI97_0@toc@l
-; P8LE-NEXT:    lxvd2x v2, 0, r3
+; P8LE-NEXT:    vspltisw v2, 1
+; P8LE-NEXT:    vupklsw v2, v2
 ; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 1, i64 1>
@@ -5339,30 +5323,26 @@ entry:
 define <2 x i64> @spltCnstConvftoull() {
 ; P9BE-LABEL: spltCnstConvftoull:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    addis r3, r2, .LCPI110_0@toc@ha
-; P9BE-NEXT:    addi r3, r3, .LCPI110_0@toc@l
-; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    vspltisw v2, 4
+; P9BE-NEXT:    vupklsw v2, v2
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: spltCnstConvftoull:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    addis r3, r2, .LCPI110_0@toc@ha
-; P9LE-NEXT:    addi r3, r3, .LCPI110_0@toc@l
-; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    vspltisw v2, 4
+; P9LE-NEXT:    vupklsw v2, v2
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: spltCnstConvftoull:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    addis r3, r2, .LCPI110_0@toc@ha
-; P8BE-NEXT:    addi r3, r3, .LCPI110_0@toc@l
-; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    vspltisw v2, 4
+; P8BE-NEXT:    vupklsw v2, v2
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: spltCnstConvftoull:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addis r3, r2, .LCPI110_0@toc@ha
-; P8LE-NEXT:    addi r3, r3, .LCPI110_0@toc@l
-; P8LE-NEXT:    lxvd2x v2, 0, r3
+; P8LE-NEXT:    vspltisw v2, 4
+; P8LE-NEXT:    vupklsw v2, v2
 ; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 4, i64 4>
@@ -5692,30 +5672,26 @@ entry:
 define <2 x i64> @spltCnstConvdtoull() {
 ; P9BE-LABEL: spltCnstConvdtoull:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    addis r3, r2, .LCPI119_0@toc@ha
-; P9BE-NEXT:    addi r3, r3, .LCPI119_0@toc@l
-; P9BE-NEXT:    lxv v2, 0(r3)
+; P9BE-NEXT:    vspltisw v2, 4
+; P9BE-NEXT:    vupklsw v2, v2
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: spltCnstConvdtoull:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    addis r3, r2, .LCPI119_0@toc@ha
-; P9LE-NEXT:    addi r3, r3, .LCPI119_0@toc@l
-; P9LE-NEXT:    lxv v2, 0(r3)
+; P9LE-NEXT:    vspltisw v2, 4
+; P9LE-NEXT:    vupklsw v2, v2
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: spltCnstConvdtoull:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    addis r3, r2, .LCPI119_0@toc@ha
-; P8BE-NEXT:    addi r3, r3, .LCPI119_0@toc@l
-; P8BE-NEXT:    lxvd2x v2, 0, r3
+; P8BE-NEXT:    vspltisw v2, 4
+; P8BE-NEXT:    vupklsw v2, v2
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: spltCnstConvdtoull:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addis r3, r2, .LCPI119_0@toc@ha
-; P8LE-NEXT:    addi r3, r3, .LCPI119_0@toc@l
-; P8LE-NEXT:    lxvd2x v2, 0, r3
+; P8LE-NEXT:    vspltisw v2, 4
+; P8LE-NEXT:    vupklsw v2, v2
 ; P8LE-NEXT:    blr
 entry:
   ret <2 x i64> <i64 4, i64 4>

diff --git a/llvm/test/CodeGen/PowerPC/mul-const-vector.ll b/llvm/test/CodeGen/PowerPC/mul-const-vector.ll
@@ -271,8 +271,7 @@ define <2 x i64> @test1_v2i64(<2 x i64> %a) {
         ret <2 x i64> %tmp.1
 }
 ; CHECK-LABEL: test1_v2i64:
-; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
+; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
 ; CHECK-NOT: vmul
 ; CHECK-NEXT: vsld v{{[0-9]+}}, v2, v[[REG2]]
 
@@ -282,8 +281,7 @@ define <2 x i64> @test2_v2i64(<2 x i64> %a) {
 }
 
 ; CHECK-LABEL: test2_v2i64:
-; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
+; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
 ; CHECK-NOT: vmul
 ; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
 ; CHECK-NEXT: vaddudm v{{[0-9]+}}, v2, v[[REG3]]
@@ -294,8 +292,7 @@ define <2 x i64> @test3_v2i64(<2 x i64> %a) {
 }
 
 ; CHECK-LABEL: test3_v2i64:
-; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
+; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
 ; CHECK-NOT: vmul
 ; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
 ; CHECK-NEXT: vsubudm v{{[0-9]+}}, v[[REG3]], v2
@@ -308,8 +305,7 @@ define <2 x i64> @test4_v2i64(<2 x i64> %a) {
 }
 
 ; CHECK-LABEL: test4_v2i64:
-; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
+; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
 ; CHECK-NOT: vmul
 ; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
 ; CHECK-P8-NEXT: xxlxor v[[REG4:[0-9]+]],
@@ -322,8 +318,7 @@ define <2 x i64> @test5_v2i64(<2 x i64> %a) {
 }
 
 ; CHECK-LABEL: test5_v2i64:
-; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
+; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
 ; CHECK-NOT: vmul
 ; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
 ; CHECK-NEXT: vaddudm v[[REG4:[0-9]+]], v2, v[[REG3]]
@@ -337,8 +332,7 @@ define <2 x i64> @test6_v2i64(<2 x i64> %a) {
 }
 
 ; CHECK-LABEL: test6_v2i64:
-; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
+; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
 ; CHECK-NOT: vmul
 ; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
 ; CHECK-NEXT: vsubudm v{{[0-9]+}}, v2, v[[REG3]]

diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
@@ -105,9 +105,8 @@ define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr {
 ;
 ; CHECK-NOPREFIX-LABEL: testDoubleToDoubleNaNFail:
 ; CHECK-NOPREFIX:       # %bb.0: # %entry
-; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
-; CHECK-NOPREFIX-NEXT:    addi r3, r3, .LCPI2_0@toc@l
-; CHECK-NOPREFIX-NEXT:    lxv vs34, 0(r3)
+; CHECK-NOPREFIX-NEXT:    vspltisw v2, -16
+; CHECK-NOPREFIX-NEXT:    vupklsw v2, v2
 ; CHECK-NOPREFIX-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testDoubleToDoubleNaNFail: