Skip to content

Commit e9d7f96

Browse files
committed
[WebAssembly] Add more combine pattern for vector shift
After change with D144169, the codegen generates redundant instructions like and and wrap. This fixes it. Differential Revision: https://reviews.llvm.org/D144360
1 parent c339f9e commit e9d7f96

File tree

2 files changed

+55
-0
lines changed

2 files changed

+55
-0
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,12 @@ def : Pat<(wasm_shr_s (v4i32 V128:$lhs), (and I32:$rhs, 31)),
971971
def : Pat<(wasm_shr_u (v4i32 V128:$lhs), (and I32:$rhs, 31)),
972972
(SHR_U_I32x4 V128:$lhs, I32:$rhs)>;
973973

974+
def : Pat<(wasm_shl (v2i64 V128:$lhs), (and I32:$rhs, 63)),
975+
(SHL_I64x2 V128:$lhs, I32:$rhs)>;
976+
def : Pat<(wasm_shr_s (v2i64 V128:$lhs), (and I32:$rhs, 63)),
977+
(SHR_S_I64x2 V128:$lhs, I32:$rhs)>;
978+
def : Pat<(wasm_shr_u (v2i64 V128:$lhs), (and I32:$rhs, 63)),
979+
(SHR_U_I64x2 V128:$lhs, I32:$rhs)>;
974980
def : Pat<(wasm_shl (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))),
975981
(SHL_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>;
976982
def : Pat<(wasm_shr_s (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))),

llvm/test/CodeGen/WebAssembly/masked-shifts.ll

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,3 +502,52 @@ define <2 x i64> @lshr_v2i64_late(<2 x i64> %v, i64 %x) {
502502
%a = lshr <2 x i64> %v, %m
503503
ret <2 x i64> %a
504504
}
505+
506+
define <2 x i64> @shl_v2i64_i32(<2 x i64> %v, i32 %x) {
507+
; CHECK-LABEL: shl_v2i64_i32:
508+
; CHECK: .functype shl_v2i64_i32 (v128, i32) -> (v128)
509+
; CHECK-NEXT: # %bb.0:
510+
; CHECK-NEXT: local.get 0
511+
; CHECK-NEXT: local.get 1
512+
; CHECK-NEXT: i64x2.shl
513+
; CHECK-NEXT: # fallthrough-return
514+
%z = and i32 %x, 63
515+
%m = zext i32 %z to i64
516+
%t = insertelement <2 x i64> undef, i64 %m, i32 0
517+
%s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
518+
%a = shl <2 x i64> %v, %s
519+
ret <2 x i64> %a
520+
}
521+
522+
define <2 x i64> @ashr_v2i64_i32(<2 x i64> %v, i32 %x) {
523+
; CHECK-LABEL: ashr_v2i64_i32:
524+
; CHECK: .functype ashr_v2i64_i32 (v128, i32) -> (v128)
525+
; CHECK-NEXT: # %bb.0:
526+
; CHECK-NEXT: local.get 0
527+
; CHECK-NEXT: local.get 1
528+
; CHECK-NEXT: i64x2.shr_s
529+
; CHECK-NEXT: # fallthrough-return
530+
%z = and i32 %x, 63
531+
%m = zext i32 %z to i64
532+
%t = insertelement <2 x i64> undef, i64 %m, i32 0
533+
%s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
534+
%a = ashr <2 x i64> %v, %s
535+
ret <2 x i64> %a
536+
}
537+
538+
define <2 x i64> @lshr_v2i64_i32(<2 x i64> %v, i32 %x) {
539+
; CHECK-LABEL: lshr_v2i64_i32:
540+
; CHECK: .functype lshr_v2i64_i32 (v128, i32) -> (v128)
541+
; CHECK-NEXT: # %bb.0:
542+
; CHECK-NEXT: local.get 0
543+
; CHECK-NEXT: local.get 1
544+
; CHECK-NEXT: i64x2.shr_u
545+
; CHECK-NEXT: # fallthrough-return
546+
%z = and i32 %x, 63
547+
%m = zext i32 %z to i64
548+
%t = insertelement <2 x i64> undef, i64 %m, i32 0
549+
%s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
550+
%a = lshr <2 x i64> %v, %s
551+
ret <2 x i64> %a
552+
}
553+

0 commit comments

Comments
 (0)