Skip to content

Commit 403926a

Browse files
committed
[WebAssembly] Skip implied bitmask operation in LowerShift
This patch skips redundant explicit masks of the shift count since it is implied inside wasm shift instruction. Differential Revision: https://reviews.llvm.org/D144619
1 parent af2969f commit 403926a

File tree

2 files changed

+81
-25
lines changed

2 files changed

+81
-25
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2287,10 +2287,43 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
22872287
// Only manually lower vector shifts
22882288
assert(Op.getSimpleValueType().isVector());
22892289

2290-
auto ShiftVal = DAG.getSplatValue(Op.getOperand(1));
2290+
uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2291+
auto ShiftVal = Op.getOperand(1);
2292+
2293+
// Try to skip bitmask operation since it is implied inside shift instruction
2294+
auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2295+
if (MaskOp.getOpcode() != ISD::AND)
2296+
return MaskOp;
2297+
SDValue LHS = MaskOp.getOperand(0);
2298+
SDValue RHS = MaskOp.getOperand(1);
2299+
if (MaskOp.getValueType().isVector()) {
2300+
APInt MaskVal;
2301+
if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2302+
std::swap(LHS, RHS);
2303+
2304+
if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2305+
MaskVal == MaskBits)
2306+
MaskOp = LHS;
2307+
} else {
2308+
if (!isa<ConstantSDNode>(RHS.getNode()))
2309+
std::swap(LHS, RHS);
2310+
2311+
auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2312+
if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2313+
MaskOp = LHS;
2314+
}
2315+
2316+
return MaskOp;
2317+
};
2318+
2319+
// Skip vector and operation
2320+
ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2321+
ShiftVal = DAG.getSplatValue(ShiftVal);
22912322
if (!ShiftVal)
22922323
return unrollVectorShift(Op, DAG);
22932324

2325+
// Skip scalar and operation
2326+
ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
22942327
// Use anyext because none of the high bits can affect the shift
22952328
ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
22962329

llvm/test/CodeGen/WebAssembly/masked-shifts.ll

Lines changed: 47 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,6 @@ define <16 x i8> @shl_v16i8_late(<16 x i8> %v, i8 %x) {
106106
; CHECK-NEXT: # %bb.0:
107107
; CHECK-NEXT: local.get 0
108108
; CHECK-NEXT: local.get 1
109-
; CHECK-NEXT: i8x16.splat
110-
; CHECK-NEXT: v128.const 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
111-
; CHECK-NEXT: v128.and
112-
; CHECK-NEXT: i8x16.extract_lane_u 0
113109
; CHECK-NEXT: i8x16.shl
114110
; CHECK-NEXT: # fallthrough-return
115111
%t = insertelement <16 x i8> undef, i8 %x, i32 0
@@ -145,10 +141,6 @@ define <16 x i8> @ashr_v16i8_late(<16 x i8> %v, i8 %x) {
145141
; CHECK-NEXT: # %bb.0:
146142
; CHECK-NEXT: local.get 0
147143
; CHECK-NEXT: local.get 1
148-
; CHECK-NEXT: i8x16.splat
149-
; CHECK-NEXT: v128.const 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
150-
; CHECK-NEXT: v128.and
151-
; CHECK-NEXT: i8x16.extract_lane_u 0
152144
; CHECK-NEXT: i8x16.shr_s
153145
; CHECK-NEXT: # fallthrough-return
154146
%t = insertelement <16 x i8> undef, i8 %x, i32 0
@@ -184,10 +176,6 @@ define <16 x i8> @lshr_v16i8_late(<16 x i8> %v, i8 %x) {
184176
; CHECK-NEXT: # %bb.0:
185177
; CHECK-NEXT: local.get 0
186178
; CHECK-NEXT: local.get 1
187-
; CHECK-NEXT: i8x16.splat
188-
; CHECK-NEXT: v128.const 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
189-
; CHECK-NEXT: v128.and
190-
; CHECK-NEXT: i8x16.extract_lane_u 0
191179
; CHECK-NEXT: i8x16.shr_u
192180
; CHECK-NEXT: # fallthrough-return
193181
%t = insertelement <16 x i8> undef, i8 %x, i32 0
@@ -222,10 +210,6 @@ define <8 x i16> @shl_v8i16_late(<8 x i16> %v, i16 %x) {
222210
; CHECK-NEXT: # %bb.0:
223211
; CHECK-NEXT: local.get 0
224212
; CHECK-NEXT: local.get 1
225-
; CHECK-NEXT: i16x8.splat
226-
; CHECK-NEXT: v128.const 15, 15, 15, 15, 15, 15, 15, 15
227-
; CHECK-NEXT: v128.and
228-
; CHECK-NEXT: i16x8.extract_lane_u 0
229213
; CHECK-NEXT: i16x8.shl
230214
; CHECK-NEXT: # fallthrough-return
231215
%t = insertelement <8 x i16> undef, i16 %x, i32 0
@@ -259,10 +243,6 @@ define <8 x i16> @ashr_v8i16_late(<8 x i16> %v, i16 %x) {
259243
; CHECK-NEXT: # %bb.0:
260244
; CHECK-NEXT: local.get 0
261245
; CHECK-NEXT: local.get 1
262-
; CHECK-NEXT: i16x8.splat
263-
; CHECK-NEXT: v128.const 15, 15, 15, 15, 15, 15, 15, 15
264-
; CHECK-NEXT: v128.and
265-
; CHECK-NEXT: i16x8.extract_lane_u 0
266246
; CHECK-NEXT: i16x8.shr_s
267247
; CHECK-NEXT: # fallthrough-return
268248
%t = insertelement <8 x i16> undef, i16 %x, i32 0
@@ -296,10 +276,6 @@ define <8 x i16> @lshr_v8i16_late(<8 x i16> %v, i16 %x) {
296276
; CHECK-NEXT: # %bb.0:
297277
; CHECK-NEXT: local.get 0
298278
; CHECK-NEXT: local.get 1
299-
; CHECK-NEXT: i16x8.splat
300-
; CHECK-NEXT: v128.const 15, 15, 15, 15, 15, 15, 15, 15
301-
; CHECK-NEXT: v128.and
302-
; CHECK-NEXT: i16x8.extract_lane_u 0
303279
; CHECK-NEXT: i16x8.shr_u
304280
; CHECK-NEXT: # fallthrough-return
305281
%t = insertelement <8 x i16> undef, i16 %x, i32 0
@@ -519,6 +495,22 @@ define <2 x i64> @shl_v2i64_i32(<2 x i64> %v, i32 %x) {
519495
ret <2 x i64> %a
520496
}
521497

498+
define <2 x i64> @shl_v2i64_i32_late(<2 x i64> %v, i32 %x) {
499+
; CHECK-LABEL: shl_v2i64_i32_late:
500+
; CHECK: .functype shl_v2i64_i32_late (v128, i32) -> (v128)
501+
; CHECK-NEXT: # %bb.0:
502+
; CHECK-NEXT: local.get 0
503+
; CHECK-NEXT: local.get 1
504+
; CHECK-NEXT: i64x2.shl
505+
; CHECK-NEXT: # fallthrough-return
506+
%z = zext i32 %x to i64
507+
%t = insertelement <2 x i64> undef, i64 %z, i32 0
508+
%s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
509+
%m = and <2 x i64> %s, <i64 63, i64 63>
510+
%a = shl <2 x i64> %v, %m
511+
ret <2 x i64> %a
512+
}
513+
522514
define <2 x i64> @ashr_v2i64_i32(<2 x i64> %v, i32 %x) {
523515
; CHECK-LABEL: ashr_v2i64_i32:
524516
; CHECK: .functype ashr_v2i64_i32 (v128, i32) -> (v128)
@@ -535,6 +527,22 @@ define <2 x i64> @ashr_v2i64_i32(<2 x i64> %v, i32 %x) {
535527
ret <2 x i64> %a
536528
}
537529

530+
define <2 x i64> @ashr_v2i64_i32_late(<2 x i64> %v, i32 %x) {
531+
; CHECK-LABEL: ashr_v2i64_i32_late:
532+
; CHECK: .functype ashr_v2i64_i32_late (v128, i32) -> (v128)
533+
; CHECK-NEXT: # %bb.0:
534+
; CHECK-NEXT: local.get 0
535+
; CHECK-NEXT: local.get 1
536+
; CHECK-NEXT: i64x2.shr_s
537+
; CHECK-NEXT: # fallthrough-return
538+
%z = zext i32 %x to i64
539+
%t = insertelement <2 x i64> undef, i64 %z, i32 0
540+
%s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
541+
%m = and <2 x i64> %s, <i64 63, i64 63>
542+
%a = ashr <2 x i64> %v, %m
543+
ret <2 x i64> %a
544+
}
545+
538546
define <2 x i64> @lshr_v2i64_i32(<2 x i64> %v, i32 %x) {
539547
; CHECK-LABEL: lshr_v2i64_i32:
540548
; CHECK: .functype lshr_v2i64_i32 (v128, i32) -> (v128)
@@ -551,3 +559,18 @@ define <2 x i64> @lshr_v2i64_i32(<2 x i64> %v, i32 %x) {
551559
ret <2 x i64> %a
552560
}
553561

562+
define <2 x i64> @lshr_v2i64_i32_late(<2 x i64> %v, i32 %x) {
563+
; CHECK-LABEL: lshr_v2i64_i32_late:
564+
; CHECK: .functype lshr_v2i64_i32_late (v128, i32) -> (v128)
565+
; CHECK-NEXT: # %bb.0:
566+
; CHECK-NEXT: local.get 0
567+
; CHECK-NEXT: local.get 1
568+
; CHECK-NEXT: i64x2.shr_u
569+
; CHECK-NEXT: # fallthrough-return
570+
%z = zext i32 %x to i64
571+
%t = insertelement <2 x i64> undef, i64 %z, i32 0
572+
%s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
573+
%m = and <2 x i64> %s, <i64 63, i64 63>
574+
%a = lshr <2 x i64> %v, %m
575+
ret <2 x i64> %a
576+
}

0 commit comments

Comments
 (0)