@@ -917,20 +917,11 @@ define <4 x i64> @udot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){
917
917
;
918
918
; CHECK-NEWLOWERING-I8MM-LABEL: udot_no_bin_op_8to64:
919
919
; CHECK-NEWLOWERING-I8MM: // %bb.0:
920
- ; CHECK-NEWLOWERING-I8MM-NEXT: ushll v3.8h, v2.8b, #0
921
- ; CHECK-NEWLOWERING-I8MM-NEXT: ushll2 v2.8h, v2.16b, #0
922
- ; CHECK-NEWLOWERING-I8MM-NEXT: ushll v4.4s, v3.4h, #0
923
- ; CHECK-NEWLOWERING-I8MM-NEXT: ushll v5.4s, v2.4h, #0
924
- ; CHECK-NEWLOWERING-I8MM-NEXT: ushll2 v3.4s, v3.8h, #0
925
- ; CHECK-NEWLOWERING-I8MM-NEXT: ushll2 v2.4s, v2.8h, #0
926
- ; CHECK-NEWLOWERING-I8MM-NEXT: uaddw v1.2d, v1.2d, v5.2s
920
+ ; CHECK-NEWLOWERING-I8MM-NEXT: movi v3.16b, #1
921
+ ; CHECK-NEWLOWERING-I8MM-NEXT: movi v4.2d, #0000000000000000
922
+ ; CHECK-NEWLOWERING-I8MM-NEXT: udot v4.4s, v2.16b, v3.16b
927
923
; CHECK-NEWLOWERING-I8MM-NEXT: uaddw v0.2d, v0.2d, v4.2s
928
- ; CHECK-NEWLOWERING-I8MM-NEXT: uaddw2 v1.2d, v1.2d, v5.4s
929
924
; CHECK-NEWLOWERING-I8MM-NEXT: uaddw2 v0.2d, v0.2d, v4.4s
930
- ; CHECK-NEWLOWERING-I8MM-NEXT: uaddw v1.2d, v1.2d, v2.2s
931
- ; CHECK-NEWLOWERING-I8MM-NEXT: uaddw v0.2d, v0.2d, v3.2s
932
- ; CHECK-NEWLOWERING-I8MM-NEXT: uaddw2 v1.2d, v1.2d, v2.4s
933
- ; CHECK-NEWLOWERING-I8MM-NEXT: uaddw2 v0.2d, v0.2d, v3.4s
934
925
; CHECK-NEWLOWERING-I8MM-NEXT: ret
935
926
%a.wide = zext <16 x i8 > %a to <16 x i64 >
936
927
%partial.reduce = tail call <4 x i64 > @llvm.experimental.vector.partial.reduce.add.v4i64.v16i64 (<4 x i64 > %acc , <16 x i64 > %a.wide )
@@ -967,20 +958,11 @@ define <4 x i64> @sdot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){
967
958
;
968
959
; CHECK-NEWLOWERING-I8MM-LABEL: sdot_no_bin_op_8to64:
969
960
; CHECK-NEWLOWERING-I8MM: // %bb.0:
970
- ; CHECK-NEWLOWERING-I8MM-NEXT: sshll v3.8h, v2.8b, #0
971
- ; CHECK-NEWLOWERING-I8MM-NEXT: sshll2 v2.8h, v2.16b, #0
972
- ; CHECK-NEWLOWERING-I8MM-NEXT: sshll v4.4s, v3.4h, #0
973
- ; CHECK-NEWLOWERING-I8MM-NEXT: sshll v5.4s, v2.4h, #0
974
- ; CHECK-NEWLOWERING-I8MM-NEXT: sshll2 v3.4s, v3.8h, #0
975
- ; CHECK-NEWLOWERING-I8MM-NEXT: sshll2 v2.4s, v2.8h, #0
976
- ; CHECK-NEWLOWERING-I8MM-NEXT: saddw v1.2d, v1.2d, v5.2s
961
+ ; CHECK-NEWLOWERING-I8MM-NEXT: movi v3.16b, #1
962
+ ; CHECK-NEWLOWERING-I8MM-NEXT: movi v4.2d, #0000000000000000
963
+ ; CHECK-NEWLOWERING-I8MM-NEXT: sdot v4.4s, v2.16b, v3.16b
977
964
; CHECK-NEWLOWERING-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
978
- ; CHECK-NEWLOWERING-I8MM-NEXT: saddw2 v1.2d, v1.2d, v5.4s
979
965
; CHECK-NEWLOWERING-I8MM-NEXT: saddw2 v0.2d, v0.2d, v4.4s
980
- ; CHECK-NEWLOWERING-I8MM-NEXT: saddw v1.2d, v1.2d, v2.2s
981
- ; CHECK-NEWLOWERING-I8MM-NEXT: saddw v0.2d, v0.2d, v3.2s
982
- ; CHECK-NEWLOWERING-I8MM-NEXT: saddw2 v1.2d, v1.2d, v2.4s
983
- ; CHECK-NEWLOWERING-I8MM-NEXT: saddw2 v0.2d, v0.2d, v3.4s
984
966
; CHECK-NEWLOWERING-I8MM-NEXT: ret
985
967
%a.wide = sext <16 x i8 > %a to <16 x i64 >
986
968
%partial.reduce = tail call <4 x i64 > @llvm.experimental.vector.partial.reduce.add.v4i64.v16i64 (<4 x i64 > %acc , <16 x i64 > %a.wide )
0 commit comments