@@ -1584,11 +1584,12 @@ v128_t test_i8x16_popcnt(v128_t a) {
1584
1584
// CHECK-NEXT: entry:
1585
1585
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1586
1586
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1587
- // CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i64 0
1588
- // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> zeroinitializer
1587
+ // CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
1588
+ // CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> undef, i8 [[TMP2]], i64 0
1589
+ // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
1589
1590
// CHECK-NEXT: [[SHL_I:%.*]] = shl <16 x i8> [[TMP0]], [[SH_PROM_I]]
1590
- // CHECK-NEXT: [[TMP3 :%.*]] = bitcast <16 x i8> [[SHL_I]] to <4 x i32>
1591
- // CHECK-NEXT: ret <4 x i32> [[TMP3 ]]
1591
+ // CHECK-NEXT: [[TMP4 :%.*]] = bitcast <16 x i8> [[SHL_I]] to <4 x i32>
1592
+ // CHECK-NEXT: ret <4 x i32> [[TMP4 ]]
1592
1593
//
1593
1594
v128_t test_i8x16_shl (v128_t a , uint32_t b ) {
1594
1595
return wasm_i8x16_shl (a , b );
@@ -1598,11 +1599,12 @@ v128_t test_i8x16_shl(v128_t a, uint32_t b) {
1598
1599
// CHECK-NEXT: entry:
1599
1600
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1600
1601
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1601
- // CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i64 0
1602
- // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> zeroinitializer
1602
+ // CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
1603
+ // CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> undef, i8 [[TMP2]], i64 0
1604
+ // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
1603
1605
// CHECK-NEXT: [[SHR_I:%.*]] = ashr <16 x i8> [[TMP0]], [[SH_PROM_I]]
1604
- // CHECK-NEXT: [[TMP3 :%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
1605
- // CHECK-NEXT: ret <4 x i32> [[TMP3 ]]
1606
+ // CHECK-NEXT: [[TMP4 :%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
1607
+ // CHECK-NEXT: ret <4 x i32> [[TMP4 ]]
1606
1608
//
1607
1609
v128_t test_i8x16_shr (v128_t a , uint32_t b ) {
1608
1610
return wasm_i8x16_shr (a , b );
@@ -1612,11 +1614,12 @@ v128_t test_i8x16_shr(v128_t a, uint32_t b) {
1612
1614
// CHECK-NEXT: entry:
1613
1615
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1614
1616
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1615
- // CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i64 0
1616
- // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> zeroinitializer
1617
+ // CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
1618
+ // CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> undef, i8 [[TMP2]], i64 0
1619
+ // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
1617
1620
// CHECK-NEXT: [[SHR_I:%.*]] = lshr <16 x i8> [[TMP0]], [[SH_PROM_I]]
1618
- // CHECK-NEXT: [[TMP3 :%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
1619
- // CHECK-NEXT: ret <4 x i32> [[TMP3 ]]
1621
+ // CHECK-NEXT: [[TMP4 :%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
1622
+ // CHECK-NEXT: ret <4 x i32> [[TMP4 ]]
1620
1623
//
1621
1624
v128_t test_u8x16_shr (v128_t a , uint32_t b ) {
1622
1625
return wasm_u8x16_shr (a , b );
@@ -1801,11 +1804,12 @@ uint32_t test_i16x8_bitmask(v128_t a) {
1801
1804
// CHECK-NEXT: entry:
1802
1805
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1803
1806
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1804
- // CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i64 0
1805
- // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> zeroinitializer
1807
+ // CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
1808
+ // CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i64 0
1809
+ // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer
1806
1810
// CHECK-NEXT: [[SHL_I:%.*]] = shl <8 x i16> [[TMP0]], [[SH_PROM_I]]
1807
- // CHECK-NEXT: [[TMP3 :%.*]] = bitcast <8 x i16> [[SHL_I]] to <4 x i32>
1808
- // CHECK-NEXT: ret <4 x i32> [[TMP3 ]]
1811
+ // CHECK-NEXT: [[TMP4 :%.*]] = bitcast <8 x i16> [[SHL_I]] to <4 x i32>
1812
+ // CHECK-NEXT: ret <4 x i32> [[TMP4 ]]
1809
1813
//
1810
1814
v128_t test_i16x8_shl (v128_t a , uint32_t b ) {
1811
1815
return wasm_i16x8_shl (a , b );
@@ -1815,11 +1819,12 @@ v128_t test_i16x8_shl(v128_t a, uint32_t b) {
1815
1819
// CHECK-NEXT: entry:
1816
1820
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1817
1821
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1818
- // CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i64 0
1819
- // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> zeroinitializer
1822
+ // CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
1823
+ // CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i64 0
1824
+ // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer
1820
1825
// CHECK-NEXT: [[SHR_I:%.*]] = ashr <8 x i16> [[TMP0]], [[SH_PROM_I]]
1821
- // CHECK-NEXT: [[TMP3 :%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
1822
- // CHECK-NEXT: ret <4 x i32> [[TMP3 ]]
1826
+ // CHECK-NEXT: [[TMP4 :%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
1827
+ // CHECK-NEXT: ret <4 x i32> [[TMP4 ]]
1823
1828
//
1824
1829
v128_t test_i16x8_shr (v128_t a , uint32_t b ) {
1825
1830
return wasm_i16x8_shr (a , b );
@@ -1829,11 +1834,12 @@ v128_t test_i16x8_shr(v128_t a, uint32_t b) {
1829
1834
// CHECK-NEXT: entry:
1830
1835
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1831
1836
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1832
- // CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i64 0
1833
- // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> zeroinitializer
1837
+ // CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
1838
+ // CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i64 0
1839
+ // CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer
1834
1840
// CHECK-NEXT: [[SHR_I:%.*]] = lshr <8 x i16> [[TMP0]], [[SH_PROM_I]]
1835
- // CHECK-NEXT: [[TMP3 :%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
1836
- // CHECK-NEXT: ret <4 x i32> [[TMP3 ]]
1841
+ // CHECK-NEXT: [[TMP4 :%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
1842
+ // CHECK-NEXT: ret <4 x i32> [[TMP4 ]]
1837
1843
//
1838
1844
v128_t test_u16x8_shr (v128_t a , uint32_t b ) {
1839
1845
return wasm_u16x8_shr (a , b );
@@ -2022,7 +2028,8 @@ uint32_t test_i32x4_bitmask(v128_t a) {
2022
2028
2023
2029
// CHECK-LABEL: @test_i32x4_shl(
2024
2030
// CHECK-NEXT: entry:
2025
- // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
2031
+ // CHECK-NEXT: [[REM_I:%.*]] = and i32 [[B:%.*]], 31
2032
+ // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[REM_I]], i64 0
2026
2033
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
2027
2034
// CHECK-NEXT: [[SHL_I:%.*]] = shl <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
2028
2035
// CHECK-NEXT: ret <4 x i32> [[SHL_I]]
@@ -2033,7 +2040,8 @@ v128_t test_i32x4_shl(v128_t a, uint32_t b) {
2033
2040
2034
2041
// CHECK-LABEL: @test_i32x4_shr(
2035
2042
// CHECK-NEXT: entry:
2036
- // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
2043
+ // CHECK-NEXT: [[REM_I:%.*]] = and i32 [[B:%.*]], 31
2044
+ // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[REM_I]], i64 0
2037
2045
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
2038
2046
// CHECK-NEXT: [[SHR_I:%.*]] = ashr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
2039
2047
// CHECK-NEXT: ret <4 x i32> [[SHR_I]]
@@ -2044,7 +2052,8 @@ v128_t test_i32x4_shr(v128_t a, uint32_t b) {
2044
2052
2045
2053
// CHECK-LABEL: @test_u32x4_shr(
2046
2054
// CHECK-NEXT: entry:
2047
- // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
2055
+ // CHECK-NEXT: [[REM_I:%.*]] = and i32 [[B:%.*]], 31
2056
+ // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[REM_I]], i64 0
2048
2057
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
2049
2058
// CHECK-NEXT: [[SHR_I:%.*]] = lshr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
2050
2059
// CHECK-NEXT: ret <4 x i32> [[SHR_I]]
@@ -2173,12 +2182,13 @@ uint32_t test_i64x2_bitmask(v128_t a) {
2173
2182
// CHECK-LABEL: @test_i64x2_shl(
2174
2183
// CHECK-NEXT: entry:
2175
2184
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2176
- // CHECK-NEXT: [[CONV_I:%.*]] = zext i32 [[B:%.*]] to i64
2177
- // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i64 0
2185
+ // CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 63
2186
+ // CHECK-NEXT: [[REM_I:%.*]] = zext i32 [[TMP1]] to i64
2187
+ // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[REM_I]], i64 0
2178
2188
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
2179
2189
// CHECK-NEXT: [[SHL_I:%.*]] = shl <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
2180
- // CHECK-NEXT: [[TMP1 :%.*]] = bitcast <2 x i64> [[SHL_I]] to <4 x i32>
2181
- // CHECK-NEXT: ret <4 x i32> [[TMP1 ]]
2190
+ // CHECK-NEXT: [[TMP2 :%.*]] = bitcast <2 x i64> [[SHL_I]] to <4 x i32>
2191
+ // CHECK-NEXT: ret <4 x i32> [[TMP2 ]]
2182
2192
//
2183
2193
v128_t test_i64x2_shl (v128_t a , uint32_t b ) {
2184
2194
return wasm_i64x2_shl (a , b );
@@ -2187,12 +2197,13 @@ v128_t test_i64x2_shl(v128_t a, uint32_t b) {
2187
2197
// CHECK-LABEL: @test_i64x2_shr(
2188
2198
// CHECK-NEXT: entry:
2189
2199
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2190
- // CHECK-NEXT: [[CONV_I:%.*]] = zext i32 [[B:%.*]] to i64
2191
- // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i64 0
2200
+ // CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 63
2201
+ // CHECK-NEXT: [[REM_I:%.*]] = zext i32 [[TMP1]] to i64
2202
+ // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[REM_I]], i64 0
2192
2203
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
2193
2204
// CHECK-NEXT: [[SHR_I:%.*]] = ashr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
2194
- // CHECK-NEXT: [[TMP1 :%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
2195
- // CHECK-NEXT: ret <4 x i32> [[TMP1 ]]
2205
+ // CHECK-NEXT: [[TMP2 :%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
2206
+ // CHECK-NEXT: ret <4 x i32> [[TMP2 ]]
2196
2207
//
2197
2208
v128_t test_i64x2_shr (v128_t a , uint32_t b ) {
2198
2209
return wasm_i64x2_shr (a , b );
@@ -2201,12 +2212,13 @@ v128_t test_i64x2_shr(v128_t a, uint32_t b) {
2201
2212
// CHECK-LABEL: @test_u64x2_shr(
2202
2213
// CHECK-NEXT: entry:
2203
2214
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2204
- // CHECK-NEXT: [[CONV_I:%.*]] = zext i32 [[B:%.*]] to i64
2205
- // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i64 0
2215
+ // CHECK-NEXT: [[TMP1:%.*]] = and i32 [[B:%.*]], 63
2216
+ // CHECK-NEXT: [[REM_I:%.*]] = zext i32 [[TMP1]] to i64
2217
+ // CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[REM_I]], i64 0
2206
2218
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
2207
2219
// CHECK-NEXT: [[SHR_I:%.*]] = lshr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
2208
- // CHECK-NEXT: [[TMP1 :%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
2209
- // CHECK-NEXT: ret <4 x i32> [[TMP1 ]]
2220
+ // CHECK-NEXT: [[TMP2 :%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
2221
+ // CHECK-NEXT: ret <4 x i32> [[TMP2 ]]
2210
2222
//
2211
2223
v128_t test_u64x2_shr (v128_t a , uint32_t b ) {
2212
2224
return wasm_u64x2_shr (a , b );
0 commit comments