Skip to content

Commit 0fcff69

Browse files
committed
[InstCombine] try to narrow shifted bswap-of-zext (2nd try)
The first attempt at this missed a validity check. This version includes a test of the narrow source type for modulo-16-bits. Original commit message: This is the IR counterpart to 370ebc9 which provided a bswap narrowing fix for issue llvm#53867. Here we can be more general (although I'm not sure yet what would happen for illegal types in codegen - too rare to worry about?): https://alive2.llvm.org/ce/z/3-CPfo This will be more effective if we have moved the shift after the bswap as proposed in D122010, but it is independent of that patch. Differential Revision: https://reviews.llvm.org/D122166
1 parent 87f3ebd commit 0fcff69

File tree

2 files changed

+34
-15
lines changed

2 files changed

+34
-15
lines changed

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,6 +1173,22 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
11731173
MulC->logBase2() == ShAmtC)
11741174
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
11751175

1176+
// Try to narrow a bswap:
1177+
// (bswap (zext X)) >> C --> zext (bswap X >> C')
1178+
// In the case where the shift amount equals the bitwidth difference, the
1179+
// shift is eliminated.
1180+
if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::bswap>(
1181+
m_OneUse(m_ZExt(m_Value(X))))))) {
1182+
// TODO: If the shift amount is less than the zext, we could shift left.
1183+
unsigned SrcWidth = X->getType()->getScalarSizeInBits();
1184+
unsigned WidthDiff = BitWidth - SrcWidth;
1185+
if (SrcWidth % 16 == 0 && ShAmtC >= WidthDiff) {
1186+
Value *NarrowSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
1187+
Value *NewShift = Builder.CreateLShr(NarrowSwap, ShAmtC - WidthDiff);
1188+
return new ZExtInst(NewShift, Ty);
1189+
}
1190+
}
1191+
11761192
// If the shifted-out value is known-zero, then this is an exact shift.
11771193
if (!I.isExact() &&
11781194
MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmtC), 0, &I)) {

llvm/test/Transforms/InstCombine/lshr.ll

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -831,9 +831,8 @@ define i1 @icmp_sge(i32 %x, i32 %y) {
831831

832832
define i32 @narrow_bswap(i16 %x) {
833833
; CHECK-LABEL: @narrow_bswap(
834-
; CHECK-NEXT: [[Z:%.*]] = zext i16 [[X:%.*]] to i32
835-
; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.bswap.i32(i32 [[Z]])
836-
; CHECK-NEXT: [[S:%.*]] = lshr exact i32 [[B]], 16
834+
; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[X:%.*]])
835+
; CHECK-NEXT: [[S:%.*]] = zext i16 [[TMP1]] to i32
837836
; CHECK-NEXT: ret i32 [[S]]
838837
;
839838
%z = zext i16 %x to i32
@@ -844,9 +843,8 @@ define i32 @narrow_bswap(i16 %x) {
844843

845844
define i128 @narrow_bswap_extra_wide(i16 %x) {
846845
; CHECK-LABEL: @narrow_bswap_extra_wide(
847-
; CHECK-NEXT: [[Z:%.*]] = zext i16 [[X:%.*]] to i128
848-
; CHECK-NEXT: [[B:%.*]] = call i128 @llvm.bswap.i128(i128 [[Z]])
849-
; CHECK-NEXT: [[S:%.*]] = lshr exact i128 [[B]], 112
846+
; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[X:%.*]])
847+
; CHECK-NEXT: [[S:%.*]] = zext i16 [[TMP1]] to i128
850848
; CHECK-NEXT: ret i128 [[S]]
851849
;
852850
%z = zext i16 %x to i128
@@ -855,6 +853,8 @@ define i128 @narrow_bswap_extra_wide(i16 %x) {
855853
ret i128 %s
856854
}
857855

856+
; TODO: The bswap can be narrowed followed by shl.
857+
858858
define i32 @narrow_bswap_undershift(i16 %x) {
859859
; CHECK-LABEL: @narrow_bswap_undershift(
860860
; CHECK-NEXT: [[Z:%.*]] = zext i16 [[X:%.*]] to i32
@@ -870,9 +870,8 @@ define i32 @narrow_bswap_undershift(i16 %x) {
870870

871871
define <2 x i64> @narrow_bswap_splat(<2 x i16> %x) {
872872
; CHECK-LABEL: @narrow_bswap_splat(
873-
; CHECK-NEXT: [[Z:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i64>
874-
; CHECK-NEXT: [[B:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[Z]])
875-
; CHECK-NEXT: [[S:%.*]] = lshr exact <2 x i64> [[B]], <i64 48, i64 48>
873+
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[X:%.*]])
874+
; CHECK-NEXT: [[S:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i64>
876875
; CHECK-NEXT: ret <2 x i64> [[S]]
877876
;
878877
%z = zext <2 x i16> %x to <2 x i64>
@@ -881,6 +880,8 @@ define <2 x i64> @narrow_bswap_splat(<2 x i16> %x) {
881880
ret <2 x i64> %s
882881
}
883882

883+
; TODO: poison/undef in the shift amount is ok to propagate.
884+
884885
define <2 x i64> @narrow_bswap_splat_poison_elt(<2 x i16> %x) {
885886
; CHECK-LABEL: @narrow_bswap_splat_poison_elt(
886887
; CHECK-NEXT: [[Z:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i64>
@@ -896,9 +897,9 @@ define <2 x i64> @narrow_bswap_splat_poison_elt(<2 x i16> %x) {
896897

897898
define <2 x i64> @narrow_bswap_overshift(<2 x i32> %x) {
898899
; CHECK-LABEL: @narrow_bswap_overshift(
899-
; CHECK-NEXT: [[Z:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
900-
; CHECK-NEXT: [[B:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[Z]])
901-
; CHECK-NEXT: [[S:%.*]] = lshr <2 x i64> [[B]], <i64 48, i64 48>
900+
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]])
901+
; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], <i32 16, i32 16>
902+
; CHECK-NEXT: [[S:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
902903
; CHECK-NEXT: ret <2 x i64> [[S]]
903904
;
904905
%z = zext <2 x i32> %x to <2 x i64>
@@ -909,9 +910,9 @@ define <2 x i64> @narrow_bswap_overshift(<2 x i32> %x) {
909910

910911
define i128 @narrow_bswap_overshift2(i96 %x) {
911912
; CHECK-LABEL: @narrow_bswap_overshift2(
912-
; CHECK-NEXT: [[Z:%.*]] = zext i96 [[X:%.*]] to i128
913-
; CHECK-NEXT: [[B:%.*]] = call i128 @llvm.bswap.i128(i128 [[Z]])
914-
; CHECK-NEXT: [[S:%.*]] = lshr i128 [[B]], 61
913+
; CHECK-NEXT: [[TMP1:%.*]] = call i96 @llvm.bswap.i96(i96 [[X:%.*]])
914+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i96 [[TMP1]], 29
915+
; CHECK-NEXT: [[S:%.*]] = zext i96 [[TMP2]] to i128
915916
; CHECK-NEXT: ret i128 [[S]]
916917
;
917918
%z = zext i96 %x to i128
@@ -920,6 +921,8 @@ define i128 @narrow_bswap_overshift2(i96 %x) {
920921
ret i128 %s
921922
}
922923

924+
; negative test - can't make a bswap with an odd number of bytes
925+
923926
define i32 @not_narrow_bswap(i24 %x) {
924927
; CHECK-LABEL: @not_narrow_bswap(
925928
; CHECK-NEXT: [[Z:%.*]] = zext i24 [[X:%.*]] to i32

0 commit comments

Comments
 (0)