@@ -667,30 +667,30 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
667
667
; CHECK-NEXT: add x10, x3, #12
668
668
; CHECK-NEXT: bic v1.8h, #255, lsl #8
669
669
; CHECK-NEXT: ld1 { v0.s }[3], [x3], #4
670
- ; CHECK-NEXT: ldr s3 , [x0, #12]
671
- ; CHECK-NEXT: ldp s2, s7 , [x0 , #4]
670
+ ; CHECK-NEXT: ldr s4 , [x0, #12]
671
+ ; CHECK-NEXT: ldp s5, s2 , [x2 , #4]
672
672
; CHECK-NEXT: ldr s6, [x2, #12]
673
- ; CHECK-NEXT: ldp s5, s4 , [x2 , #4]
674
- ; CHECK-NEXT: ld1 { v3 .s }[1], [x11]
673
+ ; CHECK-NEXT: ldp s3, s7 , [x0 , #4]
674
+ ; CHECK-NEXT: ld1 { v4 .s }[1], [x11]
675
675
; CHECK-NEXT: ld1 { v6.s }[1], [x10]
676
- ; CHECK-NEXT: ld1 { v2.s }[1], [x9]
677
- ; CHECK-NEXT: ld1 { v4.s }[1], [x8]
676
+ ; CHECK-NEXT: ld1 { v2.s }[1], [x8]
678
677
; CHECK-NEXT: ld1 { v5.s }[1], [x3]
679
678
; CHECK-NEXT: add x8, x1, #8
679
+ ; CHECK-NEXT: ld1 { v3.s }[1], [x9]
680
680
; CHECK-NEXT: ld1 { v7.s }[1], [x8]
681
- ; CHECK-NEXT: uaddl v2.8h, v2.8b, v3.8b
682
- ; CHECK-NEXT: ushll v4 .8h, v4 .8b, #0
683
- ; CHECK-NEXT: uaddl v3 .8h, v5.8b, v6.8b
681
+ ; CHECK-NEXT: ushll v2.8h, v2.8b, #0
682
+ ; CHECK-NEXT: uaddl v3 .8h, v3 .8b, v4.8b
683
+ ; CHECK-NEXT: uaddl v4 .8h, v5.8b, v6.8b
684
684
; CHECK-NEXT: uaddw v1.8h, v1.8h, v7.8b
685
- ; CHECK-NEXT: uaddw2 v4.8h, v4.8h, v0.16b
686
- ; CHECK-NEXT: ushll v0.4s, v2.4h, #3
687
- ; CHECK-NEXT: ushll v5.4s, v3.4h, #3
685
+ ; CHECK-NEXT: uaddw2 v2.8h, v2.8h, v0.16b
686
+ ; CHECK-NEXT: ushll v0.4s, v3.4h, #3
687
+ ; CHECK-NEXT: ushll v5.4s, v4.4h, #3
688
+ ; CHECK-NEXT: ushll2 v4.4s, v4.8h, #3
688
689
; CHECK-NEXT: ushll2 v3.4s, v3.8h, #3
689
- ; CHECK-NEXT: ushll2 v2.4s, v2.8h, #3
690
690
; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
691
- ; CHECK-NEXT: uaddw2 v1.4s, v2 .4s, v1.8h
692
- ; CHECK-NEXT: uaddw2 v3.4s, v3 .4s, v4 .8h
693
- ; CHECK-NEXT: uaddw v2.4s, v5.4s, v4 .4h
691
+ ; CHECK-NEXT: uaddw2 v1.4s, v3 .4s, v1.8h
692
+ ; CHECK-NEXT: uaddw2 v3.4s, v4 .4s, v2 .8h
693
+ ; CHECK-NEXT: uaddw v2.4s, v5.4s, v2 .4h
694
694
; CHECK-NEXT: ret
695
695
%lp1 = load <4 x i8 >, ptr %p
696
696
store <4 x i8 > %lp1 , ptr %z
@@ -1073,24 +1073,24 @@ define <16 x i32> @extrause_ext2(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
1073
1073
; CHECK-NEXT: ld1 { v6.s }[1], [x10]
1074
1074
; CHECK-NEXT: ld1 { v5.s }[1], [x9]
1075
1075
; CHECK-NEXT: ld1 { v7.s }[1], [x8]
1076
- ; CHECK-NEXT: uaddl v16 .8h, v2.8b, v3.8b
1077
- ; CHECK-NEXT: uaddl v3 .8h, v1.8b, v6.8b
1078
- ; CHECK-NEXT: uaddl v2 .8h, v4.8b, v5.8b
1076
+ ; CHECK-NEXT: uaddl v2 .8h, v2.8b, v3.8b
1077
+ ; CHECK-NEXT: uaddl v1 .8h, v1.8b, v6.8b
1078
+ ; CHECK-NEXT: uaddl v3 .8h, v4.8b, v5.8b
1079
1079
; CHECK-NEXT: uaddl v4.8h, v0.8b, v7.8b
1080
- ; CHECK-NEXT: ushll v0.4s, v16.4h, #3
1081
- ; CHECK-NEXT: ushll2 v1.4s, v16.8h, #3
1082
- ; CHECK-NEXT: ushll2 v18.4s, v16.8h, #0
1083
- ; CHECK-NEXT: ushll v6.4s, v2.4h, #3
1084
- ; CHECK-NEXT: ushll2 v7.4s, v2.8h, #3
1085
- ; CHECK-NEXT: ushll2 v5.4s, v2.8h, #0
1080
+ ; CHECK-NEXT: ushll2 v0.4s, v2.8h, #0
1081
+ ; CHECK-NEXT: ushll v5.4s, v2.4h, #3
1082
+ ; CHECK-NEXT: ushll2 v16.4s, v2.8h, #3
1083
+ ; CHECK-NEXT: ushll v6.4s, v3.4h, #3
1084
+ ; CHECK-NEXT: ushll2 v7.4s, v3.8h, #3
1086
1085
; CHECK-NEXT: ushll v17.4s, v2.4h, #0
1087
- ; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v3.8h
1088
- ; CHECK-NEXT: uaddw v0.4s, v0.4s, v3.4h
1086
+ ; CHECK-NEXT: ushll2 v18.4s, v3.8h, #0
1087
+ ; CHECK-NEXT: ushll v19.4s, v3.4h, #0
1088
+ ; CHECK-NEXT: stp q17, q0, [x4]
1089
+ ; CHECK-NEXT: uaddw v0.4s, v5.4s, v1.4h
1090
+ ; CHECK-NEXT: uaddw2 v1.4s, v16.4s, v1.8h
1089
1091
; CHECK-NEXT: uaddw2 v3.4s, v7.4s, v4.8h
1090
1092
; CHECK-NEXT: uaddw v2.4s, v6.4s, v4.4h
1091
- ; CHECK-NEXT: ushll v4.4s, v16.4h, #0
1092
- ; CHECK-NEXT: stp q17, q5, [x4, #32]
1093
- ; CHECK-NEXT: stp q4, q18, [x4]
1093
+ ; CHECK-NEXT: stp q19, q18, [x4, #32]
1094
1094
; CHECK-NEXT: ret
1095
1095
%lp1 = load <4 x i8 >, ptr %p
1096
1096
%p2 = getelementptr i8 , ptr %p , i32 4
@@ -1176,19 +1176,20 @@ define <16 x i32> @extrause_shl(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
1176
1176
; CHECK-NEXT: ld1 { v5.s }[1], [x9]
1177
1177
; CHECK-NEXT: ld1 { v7.s }[1], [x8]
1178
1178
; CHECK-NEXT: uaddl v2.8h, v2.8b, v3.8b
1179
+ ; CHECK-NEXT: uaddl v1.8h, v1.8b, v6.8b
1179
1180
; CHECK-NEXT: uaddl v3.8h, v4.8b, v5.8b
1180
- ; CHECK-NEXT: uaddl v4 .8h, v1 .8b, v6 .8b
1181
- ; CHECK-NEXT: ushll v5 .4s, v2.4h, #3
1182
- ; CHECK-NEXT: ushll2 v6 .4s, v2.8h, #3
1183
- ; CHECK-NEXT: uaddl v2.8h, v0.8b, v7.8b
1184
- ; CHECK-NEXT: ushll v7.4s, v3.4h , #3
1185
- ; CHECK-NEXT: ushll2 v16 .4s, v3.8h, #3
1186
- ; CHECK-NEXT: uaddw2 v1.4s, v6 .4s, v4 .8h
1187
- ; CHECK-NEXT: uaddw v0.4s, v5.4s, v4.4h
1188
- ; CHECK-NEXT: stp q5 , q6, [x4]
1189
- ; CHECK-NEXT: uaddw2 v3.4s, v16 .4s, v2 .8h
1190
- ; CHECK-NEXT: uaddw v2.4s, v7 .4s, v2 .4h
1191
- ; CHECK-NEXT: stp q7, q16, [x4, #32 ]
1181
+ ; CHECK-NEXT: uaddl v5 .8h, v0 .8b, v7 .8b
1182
+ ; CHECK-NEXT: ushll v4 .4s, v2.4h, #3
1183
+ ; CHECK-NEXT: ushll2 v2 .4s, v2.8h, #3
1184
+ ; CHECK-NEXT: ushll v6.4s, v3.4h, #3
1185
+ ; CHECK-NEXT: ushll2 v7.4s, v3.8h , #3
1186
+ ; CHECK-NEXT: uaddw v0 .4s, v4.4s, v1.4h
1187
+ ; CHECK-NEXT: uaddw2 v1.4s, v2 .4s, v1 .8h
1188
+ ; CHECK-NEXT: str q4, [x4]
1189
+ ; CHECK-NEXT: stp q2 , q6, [x4, #16 ]
1190
+ ; CHECK-NEXT: uaddw2 v3.4s, v7 .4s, v5 .8h
1191
+ ; CHECK-NEXT: uaddw v2.4s, v6 .4s, v5 .4h
1192
+ ; CHECK-NEXT: str q7, [x4, #48 ]
1192
1193
; CHECK-NEXT: ret
1193
1194
%lp1 = load <4 x i8 >, ptr %p
1194
1195
%p2 = getelementptr i8 , ptr %p , i32 4
0 commit comments