-
Notifications
You must be signed in to change notification settings - Fork 14.1k
[PowerPC] extend smaller splats into bigger splats (with fix) #142194
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
3d6a32b
87641f5
835c042
3186a56
3fc0b9a
613fdac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9664,7 +9664,25 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, | |
} | ||
} | ||
|
||
if (!BVNIsConstantSplat || SplatBitSize > 32) { | ||
bool IsSplat64 = false; | ||
uint64_t SplatBits = 0; | ||
int32_t SextVal = 0; | ||
if (BVNIsConstantSplat) { | ||
if (SplatBitSize <= 32) { | ||
SplatBits = APSplatBits.getZExtValue(); | ||
SextVal = SignExtend32(SplatBits, SplatBitSize); | ||
} else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) { | ||
int64_t Splat64Val = APSplatBits.getSExtValue(); | ||
SplatBits = (uint64_t)Splat64Val; | ||
SextVal = (int32_t)SplatBits; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is C style cast, suggest change to
since the SplatBits is not used , we can change to and we can hoist There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SplatBits is used below, it is an existing variable moved earlier. I think the rest is addressed. |
||
bool P9Vector = Subtarget.hasP9Vector(); | ||
int32_t Hi = P9Vector ? 127 : 15; | ||
int32_t Lo = P9Vector ? -128 : -16; | ||
IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi; | ||
} | ||
} | ||
|
||
if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) { | ||
unsigned NewOpcode = PPCISD::LD_SPLAT; | ||
|
||
// Handle load-and-splat patterns as we have instructions that will do this | ||
|
@@ -9750,7 +9768,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, | |
return SDValue(); | ||
} | ||
|
||
uint64_t SplatBits = APSplatBits.getZExtValue(); | ||
uint64_t SplatUndef = APSplatUndef.getZExtValue(); | ||
unsigned SplatSize = SplatBitSize / 8; | ||
|
||
|
@@ -9785,13 +9802,37 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, | |
dl); | ||
|
||
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. | ||
int32_t SextVal = SignExtend32(SplatBits, SplatBitSize); | ||
if (SextVal >= -16 && SextVal <= 15) | ||
return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG, | ||
dl); | ||
// Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15]. | ||
if (SextVal >= -16 && SextVal <= 15) { | ||
unsigned UseSize = SplatSize == 8 ? 4 : SplatSize; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggest change to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SplatSize may may be 1, 2, 4, or 8, not just 4 or 8. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be reasonable to put that comment about the SplatSize in a comment here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment. |
||
SDValue Res = | ||
getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl); | ||
if (SplatSize != 8) | ||
return Res; | ||
return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl); | ||
} | ||
|
||
// Two instruction sequences. | ||
|
||
if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) { | ||
SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32); | ||
SmallVector<SDValue, 16> Ops(16, C); | ||
SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops); | ||
assert((SplatSize == 2 || SplatSize == 4 || SplatSize == 8) && | ||
"Unexpected type for vector constant."); | ||
unsigned IID; | ||
if (SplatSize == 2) { | ||
IID = Intrinsic::ppc_altivec_vupklsb; | ||
} else if (SplatSize == 4) { | ||
IID = Intrinsic::ppc_altivec_vextsb2w; | ||
} else { // SplatSize == 8 | ||
IID = Intrinsic::ppc_altivec_vextsb2d; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. line 9821~9830, prefer change to switch statement
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl); | ||
return DAG.getBitcast(Op->getValueType(0), Extend); | ||
} | ||
assert(!IsSplat64 && "Unhandled 64-bit splat pattern"); | ||
|
||
// If this value is in the range [-32,30] and is even, use: | ||
// VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) | ||
// If this value is in the range [17,31] and is odd, use: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3713,30 +3713,26 @@ entry: | |
define <2 x i64> @spltConst1ll() { | ||
; P9BE-LABEL: spltConst1ll: | ||
; P9BE: # %bb.0: # %entry | ||
; P9BE-NEXT: addis r3, r2, .LCPI65_0@toc@ha | ||
; P9BE-NEXT: addi r3, r3, .LCPI65_0@toc@l | ||
; P9BE-NEXT: lxv v2, 0(r3) | ||
; P9BE-NEXT: vspltisw v2, 1 | ||
; P9BE-NEXT: vupklsw v2, v2 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am curiously , why we can not implement
to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a P10 instruction. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so do we want to check if the Target is P10 and use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like there already is code to exploit P10 instructions at line 9630 above, so P10 will not get here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be clearer, we already generate xxsplitdp or xxsplti32dx for P10. Can't always use xxsplitidp since it is floating point. |
||
; P9BE-NEXT: blr | ||
; | ||
; P9LE-LABEL: spltConst1ll: | ||
; P9LE: # %bb.0: # %entry | ||
; P9LE-NEXT: addis r3, r2, .LCPI65_0@toc@ha | ||
; P9LE-NEXT: addi r3, r3, .LCPI65_0@toc@l | ||
; P9LE-NEXT: lxv v2, 0(r3) | ||
; P9LE-NEXT: vspltisw v2, 1 | ||
; P9LE-NEXT: vupklsw v2, v2 | ||
; P9LE-NEXT: blr | ||
; | ||
; P8BE-LABEL: spltConst1ll: | ||
; P8BE: # %bb.0: # %entry | ||
; P8BE-NEXT: addis r3, r2, .LCPI65_0@toc@ha | ||
; P8BE-NEXT: addi r3, r3, .LCPI65_0@toc@l | ||
; P8BE-NEXT: lxvd2x v2, 0, r3 | ||
; P8BE-NEXT: vspltisw v2, 1 | ||
; P8BE-NEXT: vupklsw v2, v2 | ||
; P8BE-NEXT: blr | ||
; | ||
; P8LE-LABEL: spltConst1ll: | ||
; P8LE: # %bb.0: # %entry | ||
; P8LE-NEXT: addis r3, r2, .LCPI65_0@toc@ha | ||
; P8LE-NEXT: addi r3, r3, .LCPI65_0@toc@l | ||
; P8LE-NEXT: lxvd2x v2, 0, r3 | ||
; P8LE-NEXT: vspltisw v2, 1 | ||
; P8LE-NEXT: vupklsw v2, v2 | ||
; P8LE-NEXT: blr | ||
entry: | ||
ret <2 x i64> <i64 1, i64 1> | ||
|
@@ -4173,30 +4169,26 @@ entry: | |
define <2 x i64> @spltCnstConvftoll() { | ||
; P9BE-LABEL: spltCnstConvftoll: | ||
; P9BE: # %bb.0: # %entry | ||
; P9BE-NEXT: addis r3, r2, .LCPI78_0@toc@ha | ||
; P9BE-NEXT: addi r3, r3, .LCPI78_0@toc@l | ||
; P9BE-NEXT: lxv v2, 0(r3) | ||
; P9BE-NEXT: vspltisw v2, 4 | ||
; P9BE-NEXT: vupklsw v2, v2 | ||
; P9BE-NEXT: blr | ||
; | ||
; P9LE-LABEL: spltCnstConvftoll: | ||
; P9LE: # %bb.0: # %entry | ||
; P9LE-NEXT: addis r3, r2, .LCPI78_0@toc@ha | ||
; P9LE-NEXT: addi r3, r3, .LCPI78_0@toc@l | ||
; P9LE-NEXT: lxv v2, 0(r3) | ||
; P9LE-NEXT: vspltisw v2, 4 | ||
; P9LE-NEXT: vupklsw v2, v2 | ||
; P9LE-NEXT: blr | ||
; | ||
; P8BE-LABEL: spltCnstConvftoll: | ||
; P8BE: # %bb.0: # %entry | ||
; P8BE-NEXT: addis r3, r2, .LCPI78_0@toc@ha | ||
; P8BE-NEXT: addi r3, r3, .LCPI78_0@toc@l | ||
; P8BE-NEXT: lxvd2x v2, 0, r3 | ||
; P8BE-NEXT: vspltisw v2, 4 | ||
; P8BE-NEXT: vupklsw v2, v2 | ||
; P8BE-NEXT: blr | ||
; | ||
; P8LE-LABEL: spltCnstConvftoll: | ||
; P8LE: # %bb.0: # %entry | ||
; P8LE-NEXT: addis r3, r2, .LCPI78_0@toc@ha | ||
; P8LE-NEXT: addi r3, r3, .LCPI78_0@toc@l | ||
; P8LE-NEXT: lxvd2x v2, 0, r3 | ||
; P8LE-NEXT: vspltisw v2, 4 | ||
; P8LE-NEXT: vupklsw v2, v2 | ||
; P8LE-NEXT: blr | ||
entry: | ||
ret <2 x i64> <i64 4, i64 4> | ||
|
@@ -4526,30 +4518,26 @@ entry: | |
define <2 x i64> @spltCnstConvdtoll() { | ||
; P9BE-LABEL: spltCnstConvdtoll: | ||
; P9BE: # %bb.0: # %entry | ||
; P9BE-NEXT: addis r3, r2, .LCPI87_0@toc@ha | ||
; P9BE-NEXT: addi r3, r3, .LCPI87_0@toc@l | ||
; P9BE-NEXT: lxv v2, 0(r3) | ||
; P9BE-NEXT: vspltisw v2, 4 | ||
; P9BE-NEXT: vupklsw v2, v2 | ||
; P9BE-NEXT: blr | ||
; | ||
; P9LE-LABEL: spltCnstConvdtoll: | ||
; P9LE: # %bb.0: # %entry | ||
; P9LE-NEXT: addis r3, r2, .LCPI87_0@toc@ha | ||
; P9LE-NEXT: addi r3, r3, .LCPI87_0@toc@l | ||
; P9LE-NEXT: lxv v2, 0(r3) | ||
; P9LE-NEXT: vspltisw v2, 4 | ||
; P9LE-NEXT: vupklsw v2, v2 | ||
; P9LE-NEXT: blr | ||
; | ||
; P8BE-LABEL: spltCnstConvdtoll: | ||
; P8BE: # %bb.0: # %entry | ||
; P8BE-NEXT: addis r3, r2, .LCPI87_0@toc@ha | ||
; P8BE-NEXT: addi r3, r3, .LCPI87_0@toc@l | ||
; P8BE-NEXT: lxvd2x v2, 0, r3 | ||
; P8BE-NEXT: vspltisw v2, 4 | ||
; P8BE-NEXT: vupklsw v2, v2 | ||
; P8BE-NEXT: blr | ||
; | ||
; P8LE-LABEL: spltCnstConvdtoll: | ||
; P8LE: # %bb.0: # %entry | ||
; P8LE-NEXT: addis r3, r2, .LCPI87_0@toc@ha | ||
; P8LE-NEXT: addi r3, r3, .LCPI87_0@toc@l | ||
; P8LE-NEXT: lxvd2x v2, 0, r3 | ||
; P8LE-NEXT: vspltisw v2, 4 | ||
; P8LE-NEXT: vupklsw v2, v2 | ||
; P8LE-NEXT: blr | ||
entry: | ||
ret <2 x i64> <i64 4, i64 4> | ||
|
@@ -4879,30 +4867,26 @@ entry: | |
define <2 x i64> @spltConst1ull() { | ||
; P9BE-LABEL: spltConst1ull: | ||
; P9BE: # %bb.0: # %entry | ||
; P9BE-NEXT: addis r3, r2, .LCPI97_0@toc@ha | ||
; P9BE-NEXT: addi r3, r3, .LCPI97_0@toc@l | ||
; P9BE-NEXT: lxv v2, 0(r3) | ||
; P9BE-NEXT: vspltisw v2, 1 | ||
; P9BE-NEXT: vupklsw v2, v2 | ||
; P9BE-NEXT: blr | ||
; | ||
; P9LE-LABEL: spltConst1ull: | ||
; P9LE: # %bb.0: # %entry | ||
; P9LE-NEXT: addis r3, r2, .LCPI97_0@toc@ha | ||
; P9LE-NEXT: addi r3, r3, .LCPI97_0@toc@l | ||
; P9LE-NEXT: lxv v2, 0(r3) | ||
; P9LE-NEXT: vspltisw v2, 1 | ||
; P9LE-NEXT: vupklsw v2, v2 | ||
; P9LE-NEXT: blr | ||
; | ||
; P8BE-LABEL: spltConst1ull: | ||
; P8BE: # %bb.0: # %entry | ||
; P8BE-NEXT: addis r3, r2, .LCPI97_0@toc@ha | ||
; P8BE-NEXT: addi r3, r3, .LCPI97_0@toc@l | ||
; P8BE-NEXT: lxvd2x v2, 0, r3 | ||
; P8BE-NEXT: vspltisw v2, 1 | ||
; P8BE-NEXT: vupklsw v2, v2 | ||
; P8BE-NEXT: blr | ||
; | ||
; P8LE-LABEL: spltConst1ull: | ||
; P8LE: # %bb.0: # %entry | ||
; P8LE-NEXT: addis r3, r2, .LCPI97_0@toc@ha | ||
; P8LE-NEXT: addi r3, r3, .LCPI97_0@toc@l | ||
; P8LE-NEXT: lxvd2x v2, 0, r3 | ||
; P8LE-NEXT: vspltisw v2, 1 | ||
; P8LE-NEXT: vupklsw v2, v2 | ||
; P8LE-NEXT: blr | ||
entry: | ||
ret <2 x i64> <i64 1, i64 1> | ||
|
@@ -5339,30 +5323,26 @@ entry: | |
define <2 x i64> @spltCnstConvftoull() { | ||
; P9BE-LABEL: spltCnstConvftoull: | ||
; P9BE: # %bb.0: # %entry | ||
; P9BE-NEXT: addis r3, r2, .LCPI110_0@toc@ha | ||
; P9BE-NEXT: addi r3, r3, .LCPI110_0@toc@l | ||
; P9BE-NEXT: lxv v2, 0(r3) | ||
; P9BE-NEXT: vspltisw v2, 4 | ||
; P9BE-NEXT: vupklsw v2, v2 | ||
; P9BE-NEXT: blr | ||
; | ||
; P9LE-LABEL: spltCnstConvftoull: | ||
; P9LE: # %bb.0: # %entry | ||
; P9LE-NEXT: addis r3, r2, .LCPI110_0@toc@ha | ||
; P9LE-NEXT: addi r3, r3, .LCPI110_0@toc@l | ||
; P9LE-NEXT: lxv v2, 0(r3) | ||
; P9LE-NEXT: vspltisw v2, 4 | ||
; P9LE-NEXT: vupklsw v2, v2 | ||
; P9LE-NEXT: blr | ||
; | ||
; P8BE-LABEL: spltCnstConvftoull: | ||
; P8BE: # %bb.0: # %entry | ||
; P8BE-NEXT: addis r3, r2, .LCPI110_0@toc@ha | ||
; P8BE-NEXT: addi r3, r3, .LCPI110_0@toc@l | ||
; P8BE-NEXT: lxvd2x v2, 0, r3 | ||
; P8BE-NEXT: vspltisw v2, 4 | ||
; P8BE-NEXT: vupklsw v2, v2 | ||
; P8BE-NEXT: blr | ||
; | ||
; P8LE-LABEL: spltCnstConvftoull: | ||
; P8LE: # %bb.0: # %entry | ||
; P8LE-NEXT: addis r3, r2, .LCPI110_0@toc@ha | ||
; P8LE-NEXT: addi r3, r3, .LCPI110_0@toc@l | ||
; P8LE-NEXT: lxvd2x v2, 0, r3 | ||
; P8LE-NEXT: vspltisw v2, 4 | ||
; P8LE-NEXT: vupklsw v2, v2 | ||
; P8LE-NEXT: blr | ||
entry: | ||
ret <2 x i64> <i64 4, i64 4> | ||
|
@@ -5692,30 +5672,26 @@ entry: | |
define <2 x i64> @spltCnstConvdtoull() { | ||
; P9BE-LABEL: spltCnstConvdtoull: | ||
; P9BE: # %bb.0: # %entry | ||
; P9BE-NEXT: addis r3, r2, .LCPI119_0@toc@ha | ||
; P9BE-NEXT: addi r3, r3, .LCPI119_0@toc@l | ||
; P9BE-NEXT: lxv v2, 0(r3) | ||
; P9BE-NEXT: vspltisw v2, 4 | ||
; P9BE-NEXT: vupklsw v2, v2 | ||
; P9BE-NEXT: blr | ||
; | ||
; P9LE-LABEL: spltCnstConvdtoull: | ||
; P9LE: # %bb.0: # %entry | ||
; P9LE-NEXT: addis r3, r2, .LCPI119_0@toc@ha | ||
; P9LE-NEXT: addi r3, r3, .LCPI119_0@toc@l | ||
; P9LE-NEXT: lxv v2, 0(r3) | ||
; P9LE-NEXT: vspltisw v2, 4 | ||
; P9LE-NEXT: vupklsw v2, v2 | ||
; P9LE-NEXT: blr | ||
; | ||
; P8BE-LABEL: spltCnstConvdtoull: | ||
; P8BE: # %bb.0: # %entry | ||
; P8BE-NEXT: addis r3, r2, .LCPI119_0@toc@ha | ||
; P8BE-NEXT: addi r3, r3, .LCPI119_0@toc@l | ||
; P8BE-NEXT: lxvd2x v2, 0, r3 | ||
; P8BE-NEXT: vspltisw v2, 4 | ||
; P8BE-NEXT: vupklsw v2, v2 | ||
; P8BE-NEXT: blr | ||
; | ||
; P8LE-LABEL: spltCnstConvdtoull: | ||
; P8LE: # %bb.0: # %entry | ||
; P8LE-NEXT: addis r3, r2, .LCPI119_0@toc@ha | ||
; P8LE-NEXT: addi r3, r3, .LCPI119_0@toc@l | ||
; P8LE-NEXT: lxvd2x v2, 0, r3 | ||
; P8LE-NEXT: vspltisw v2, 4 | ||
; P8LE-NEXT: vupklsw v2, v2 | ||
; P8LE-NEXT: blr | ||
entry: | ||
ret <2 x i64> <i64 4, i64 4> | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: since
SplatBits
is only used inif (BVNIsConstantSplat && SplatBitSize <= 64) {
, we can put theuint64_t SplatBits = 0;
inside theif (BVNIsConstantSplat && SplatBitSize <= 64) {
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SplatBits is used in existing code, much lower down, and that would hide the declaration in a lower scope.