Skip to content

Commit f253bb6

Browse files
committed
[WebAssembly] Fix simd bit shift intrinsics codegen
According to github.com/WebAssembly/simd/blob/main/proposals/simd/SIMD.md, the shift count of bit shift instructions is taken modulo lane width. This patch adds such operation. Fixes PR#60655 Differential Revision: https://reviews.llvm.org/D144169
1 parent e073de8 commit f253bb6

File tree

2 files changed

+63
-51
lines changed

2 files changed

+63
-51
lines changed

clang/lib/Headers/wasm_simd128.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -961,17 +961,17 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_popcnt(v128_t __a) {
961961

962962
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shl(v128_t __a,
963963
uint32_t __b) {
964-
return (v128_t)((__i8x16)__a << __b);
964+
return (v128_t)((__i8x16)__a << (__b & 0x7));
965965
}
966966

967967
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shr(v128_t __a,
968968
uint32_t __b) {
969-
return (v128_t)((__i8x16)__a >> __b);
969+
return (v128_t)((__i8x16)__a >> (__b & 0x7));
970970
}
971971

972972
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_shr(v128_t __a,
973973
uint32_t __b) {
974-
return (v128_t)((__u8x16)__a >> __b);
974+
return (v128_t)((__u8x16)__a >> (__b & 0x7));
975975
}
976976

977977
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_add(v128_t __a,
@@ -1047,17 +1047,17 @@ static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i16x8_bitmask(v128_t __a) {
10471047

10481048
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shl(v128_t __a,
10491049
uint32_t __b) {
1050-
return (v128_t)((__i16x8)__a << __b);
1050+
return (v128_t)((__i16x8)__a << (__b & 0xF));
10511051
}
10521052

10531053
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shr(v128_t __a,
10541054
uint32_t __b) {
1055-
return (v128_t)((__i16x8)__a >> __b);
1055+
return (v128_t)((__i16x8)__a >> (__b & 0xF));
10561056
}
10571057

10581058
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_shr(v128_t __a,
10591059
uint32_t __b) {
1060-
return (v128_t)((__u16x8)__a >> __b);
1060+
return (v128_t)((__u16x8)__a >> (__b & 0xF));
10611061
}
10621062

10631063
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_add(v128_t __a,
@@ -1138,17 +1138,17 @@ static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i32x4_bitmask(v128_t __a) {
11381138

11391139
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shl(v128_t __a,
11401140
uint32_t __b) {
1141-
return (v128_t)((__i32x4)__a << __b);
1141+
return (v128_t)((__i32x4)__a << (__b & 0x1F));
11421142
}
11431143

11441144
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shr(v128_t __a,
11451145
uint32_t __b) {
1146-
return (v128_t)((__i32x4)__a >> __b);
1146+
return (v128_t)((__i32x4)__a >> (__b & 0x1F));
11471147
}
11481148

11491149
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_shr(v128_t __a,
11501150
uint32_t __b) {
1151-
return (v128_t)((__u32x4)__a >> __b);
1151+
return (v128_t)((__u32x4)__a >> (__b & 0x1F));
11521152
}
11531153

11541154
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_add(v128_t __a,
@@ -1209,17 +1209,17 @@ static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i64x2_bitmask(v128_t __a) {
12091209

12101210
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shl(v128_t __a,
12111211
uint32_t __b) {
1212-
return (v128_t)((__i64x2)__a << (int64_t)__b);
1212+
return (v128_t)((__i64x2)__a << ((int64_t)__b & 0x3F));
12131213
}
12141214

12151215
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shr(v128_t __a,
12161216
uint32_t __b) {
1217-
return (v128_t)((__i64x2)__a >> (int64_t)__b);
1217+
return (v128_t)((__i64x2)__a >> ((int64_t)__b & 0x3F));
12181218
}
12191219

12201220
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_shr(v128_t __a,
12211221
uint32_t __b) {
1222-
return (v128_t)((__u64x2)__a >> (int64_t)__b);
1222+
return (v128_t)((__u64x2)__a >> ((int64_t)__b & 0x3F));
12231223
}
12241224

12251225
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_add(v128_t __a,

clang/test/Headers/wasm.c

Lines changed: 51 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1584,11 +1584,12 @@ v128_t test_i8x16_popcnt(v128_t a) {
15841584
// CHECK-NEXT: entry:
15851585
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
15861586
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1587-
// CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i64 0
1588-
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> zeroinitializer
1587+
// CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
1588+
// CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> undef, i8 [[TMP2]], i64 0
1589+
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
15891590
// CHECK-NEXT: [[SHL_I:%.*]] = shl <16 x i8> [[TMP0]], [[SH_PROM_I]]
1590-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[SHL_I]] to <4 x i32>
1591-
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
1591+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[SHL_I]] to <4 x i32>
1592+
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
15921593
//
15931594
v128_t test_i8x16_shl(v128_t a, uint32_t b) {
15941595
return wasm_i8x16_shl(a, b);
@@ -1598,11 +1599,12 @@ v128_t test_i8x16_shl(v128_t a, uint32_t b) {
15981599
// CHECK-NEXT: entry:
15991600
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
16001601
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1601-
// CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i64 0
1602-
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> zeroinitializer
1602+
// CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
1603+
// CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> undef, i8 [[TMP2]], i64 0
1604+
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
16031605
// CHECK-NEXT: [[SHR_I:%.*]] = ashr <16 x i8> [[TMP0]], [[SH_PROM_I]]
1604-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
1605-
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
1606+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
1607+
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
16061608
//
16071609
v128_t test_i8x16_shr(v128_t a, uint32_t b) {
16081610
return wasm_i8x16_shr(a, b);
@@ -1612,11 +1614,12 @@ v128_t test_i8x16_shr(v128_t a, uint32_t b) {
16121614
// CHECK-NEXT: entry:
16131615
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
16141616
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1615-
// CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i64 0
1616-
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> zeroinitializer
1617+
// CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
1618+
// CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> undef, i8 [[TMP2]], i64 0
1619+
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
16171620
// CHECK-NEXT: [[SHR_I:%.*]] = lshr <16 x i8> [[TMP0]], [[SH_PROM_I]]
1618-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
1619-
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
1621+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
1622+
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
16201623
//
16211624
v128_t test_u8x16_shr(v128_t a, uint32_t b) {
16221625
return wasm_u8x16_shr(a, b);
@@ -1801,11 +1804,12 @@ uint32_t test_i16x8_bitmask(v128_t a) {
18011804
// CHECK-NEXT: entry:
18021805
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
18031806
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1804-
// CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i64 0
1805-
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> zeroinitializer
1807+
// CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
1808+
// CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i64 0
1809+
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer
18061810
// CHECK-NEXT: [[SHL_I:%.*]] = shl <8 x i16> [[TMP0]], [[SH_PROM_I]]
1807-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[SHL_I]] to <4 x i32>
1808-
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
1811+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[SHL_I]] to <4 x i32>
1812+
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
18091813
//
18101814
v128_t test_i16x8_shl(v128_t a, uint32_t b) {
18111815
return wasm_i16x8_shl(a, b);
@@ -1815,11 +1819,12 @@ v128_t test_i16x8_shl(v128_t a, uint32_t b) {
18151819
// CHECK-NEXT: entry:
18161820
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
18171821
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1818-
// CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i64 0
1819-
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> zeroinitializer
1822+
// CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
1823+
// CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i64 0
1824+
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer
18201825
// CHECK-NEXT: [[SHR_I:%.*]] = ashr <8 x i16> [[TMP0]], [[SH_PROM_I]]
1821-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
1822-
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
1826+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
1827+
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
18231828
//
18241829
v128_t test_i16x8_shr(v128_t a, uint32_t b) {
18251830
return wasm_i16x8_shr(a, b);
@@ -1829,11 +1834,12 @@ v128_t test_i16x8_shr(v128_t a, uint32_t b) {
18291834
// CHECK-NEXT: entry:
18301835
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
18311836
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1832-
// CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i64 0
1833-
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> zeroinitializer
1837+
// CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
1838+
// CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i64 0
1839+
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer
18341840
// CHECK-NEXT: [[SHR_I:%.*]] = lshr <8 x i16> [[TMP0]], [[SH_PROM_I]]
1835-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
1836-
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
1841+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
1842+
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
18371843
//
18381844
v128_t test_u16x8_shr(v128_t a, uint32_t b) {
18391845
return wasm_u16x8_shr(a, b);
@@ -2022,7 +2028,8 @@ uint32_t test_i32x4_bitmask(v128_t a) {
20222028

20232029
// CHECK-LABEL: @test_i32x4_shl(
20242030
// CHECK-NEXT: entry:
2025-
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
2031+
// CHECK-NEXT: [[REM_I:%.*]] = and i32 [[B:%.*]], 31
2032+
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[REM_I]], i64 0
20262033
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
20272034
// CHECK-NEXT: [[SHL_I:%.*]] = shl <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
20282035
// CHECK-NEXT: ret <4 x i32> [[SHL_I]]
@@ -2033,7 +2040,8 @@ v128_t test_i32x4_shl(v128_t a, uint32_t b) {
20332040

20342041
// CHECK-LABEL: @test_i32x4_shr(
20352042
// CHECK-NEXT: entry:
2036-
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
2043+
// CHECK-NEXT: [[REM_I:%.*]] = and i32 [[B:%.*]], 31
2044+
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[REM_I]], i64 0
20372045
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
20382046
// CHECK-NEXT: [[SHR_I:%.*]] = ashr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
20392047
// CHECK-NEXT: ret <4 x i32> [[SHR_I]]
@@ -2044,7 +2052,8 @@ v128_t test_i32x4_shr(v128_t a, uint32_t b) {
20442052

20452053
// CHECK-LABEL: @test_u32x4_shr(
20462054
// CHECK-NEXT: entry:
2047-
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
2055+
// CHECK-NEXT: [[REM_I:%.*]] = and i32 [[B:%.*]], 31
2056+
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[REM_I]], i64 0
20482057
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
20492058
// CHECK-NEXT: [[SHR_I:%.*]] = lshr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
20502059
// CHECK-NEXT: ret <4 x i32> [[SHR_I]]
@@ -2173,12 +2182,13 @@ uint32_t test_i64x2_bitmask(v128_t a) {
21732182
// CHECK-LABEL: @test_i64x2_shl(
21742183
// CHECK-NEXT: entry:
21752184
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2176-
// CHECK-NEXT: [[CONV_I:%.*]] = zext i32 [[B:%.*]] to i64
2177-
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i64 0
2185+
// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 63
2186+
// CHECK-NEXT: [[REM_I:%.*]] = zext i32 [[TMP1]] to i64
2187+
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[REM_I]], i64 0
21782188
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
21792189
// CHECK-NEXT: [[SHL_I:%.*]] = shl <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
2180-
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SHL_I]] to <4 x i32>
2181-
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
2190+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SHL_I]] to <4 x i32>
2191+
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
21822192
//
21832193
v128_t test_i64x2_shl(v128_t a, uint32_t b) {
21842194
return wasm_i64x2_shl(a, b);
@@ -2187,12 +2197,13 @@ v128_t test_i64x2_shl(v128_t a, uint32_t b) {
21872197
// CHECK-LABEL: @test_i64x2_shr(
21882198
// CHECK-NEXT: entry:
21892199
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2190-
// CHECK-NEXT: [[CONV_I:%.*]] = zext i32 [[B:%.*]] to i64
2191-
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i64 0
2200+
// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 63
2201+
// CHECK-NEXT: [[REM_I:%.*]] = zext i32 [[TMP1]] to i64
2202+
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[REM_I]], i64 0
21922203
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
21932204
// CHECK-NEXT: [[SHR_I:%.*]] = ashr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
2194-
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
2195-
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
2205+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
2206+
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
21962207
//
21972208
v128_t test_i64x2_shr(v128_t a, uint32_t b) {
21982209
return wasm_i64x2_shr(a, b);
@@ -2201,12 +2212,13 @@ v128_t test_i64x2_shr(v128_t a, uint32_t b) {
22012212
// CHECK-LABEL: @test_u64x2_shr(
22022213
// CHECK-NEXT: entry:
22032214
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2204-
// CHECK-NEXT: [[CONV_I:%.*]] = zext i32 [[B:%.*]] to i64
2205-
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i64 0
2215+
// CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 63
2216+
// CHECK-NEXT: [[REM_I:%.*]] = zext i32 [[TMP1]] to i64
2217+
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[REM_I]], i64 0
22062218
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
22072219
// CHECK-NEXT: [[SHR_I:%.*]] = lshr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
2208-
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
2209-
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
2220+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
2221+
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
22102222
//
22112223
v128_t test_u64x2_shr(v128_t a, uint32_t b) {
22122224
return wasm_u64x2_shr(a, b);

0 commit comments

Comments
 (0)