@@ -561,31 +561,34 @@ define <vscale x 4 x i64> @udot_no_bin_op_8to64(<vscale x 4 x i64> %acc, <vscale
561
561
; CHECK-NEXT: add z1.d, z1.d, z3.d
562
562
; CHECK-NEXT: ret
563
563
;
564
- ; CHECK-NEWLOWERING-LABEL: udot_no_bin_op_8to64:
565
- ; CHECK-NEWLOWERING: // %bb.0:
566
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.h, z2.b
567
- ; CHECK-NEWLOWERING-NEXT: uunpklo z2.h, z2.b
568
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z3.h
569
- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.s, z3.h
570
- ; CHECK-NEWLOWERING-NEXT: uunpklo z5.s, z2.h
571
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
572
- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z4.s
573
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z3.s
574
- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.d, z5.s
575
- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.d, z3.s
576
- ; CHECK-NEWLOWERING-NEXT: uunpklo z25.d, z2.s
577
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.d, z5.s
578
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s
579
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
580
- ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z24.d
581
- ; CHECK-NEWLOWERING-NEXT: add z5.d, z5.d, z25.d
582
- ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
583
- ; CHECK-NEWLOWERING-NEXT: add z3.d, z7.d, z6.d
584
- ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z5.d
585
- ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
586
- ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
587
- ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z4.d
588
- ; CHECK-NEWLOWERING-NEXT: ret
564
+ ; CHECK-NEWLOWERING-SVE-LABEL: udot_no_bin_op_8to64:
565
+ ; CHECK-NEWLOWERING-SVE: // %bb.0:
566
+ ; CHECK-NEWLOWERING-SVE-NEXT: movi v3.2d, #0000000000000000
567
+ ; CHECK-NEWLOWERING-SVE-NEXT: mov z4.b, #1 // =0x1
568
+ ; CHECK-NEWLOWERING-SVE-NEXT: udot z3.s, z2.b, z4.b
569
+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z2.d, z3.s
570
+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z3.d, z3.s
571
+ ; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
572
+ ; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z3.d
573
+ ; CHECK-NEWLOWERING-SVE-NEXT: ret
574
+ ;
575
+ ; CHECK-NEWLOWERING-SVE2-LABEL: udot_no_bin_op_8to64:
576
+ ; CHECK-NEWLOWERING-SVE2: // %bb.0:
577
+ ; CHECK-NEWLOWERING-SVE2-NEXT: movi v3.2d, #0000000000000000
578
+ ; CHECK-NEWLOWERING-SVE2-NEXT: mov z4.b, #1 // =0x1
579
+ ; CHECK-NEWLOWERING-SVE2-NEXT: udot z3.s, z2.b, z4.b
580
+ ; CHECK-NEWLOWERING-SVE2-NEXT: uaddwb z0.d, z0.d, z3.s
581
+ ; CHECK-NEWLOWERING-SVE2-NEXT: uaddwt z0.d, z0.d, z3.s
582
+ ; CHECK-NEWLOWERING-SVE2-NEXT: ret
583
+ ;
584
+ ; CHECK-NEWLOWERING-SME-LABEL: udot_no_bin_op_8to64:
585
+ ; CHECK-NEWLOWERING-SME: // %bb.0:
586
+ ; CHECK-NEWLOWERING-SME-NEXT: mov z3.b, #1 // =0x1
587
+ ; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0
588
+ ; CHECK-NEWLOWERING-SME-NEXT: udot z4.s, z2.b, z3.b
589
+ ; CHECK-NEWLOWERING-SME-NEXT: uaddwb z0.d, z0.d, z4.s
590
+ ; CHECK-NEWLOWERING-SME-NEXT: uaddwt z0.d, z0.d, z4.s
591
+ ; CHECK-NEWLOWERING-SME-NEXT: ret
589
592
%a.ext = zext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
590
593
%partial.reduce = tail call <vscale x 4 x i64 > @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64 (<vscale x 4 x i64 > %acc , <vscale x 16 x i64 > %a.ext )
591
594
ret <vscale x 4 x i64 > %partial.reduce
@@ -603,31 +606,34 @@ define <vscale x 4 x i64> @sdot_no_bin_op_8to64(<vscale x 4 x i64> %acc, <vscale
603
606
; CHECK-NEXT: add z1.d, z1.d, z3.d
604
607
; CHECK-NEXT: ret
605
608
;
606
- ; CHECK-NEWLOWERING-LABEL: sdot_no_bin_op_8to64:
607
- ; CHECK-NEWLOWERING: // %bb.0:
608
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.h, z2.b
609
- ; CHECK-NEWLOWERING-NEXT: sunpklo z2.h, z2.b
610
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z3.h
611
- ; CHECK-NEWLOWERING-NEXT: sunpklo z3.s, z3.h
612
- ; CHECK-NEWLOWERING-NEXT: sunpklo z5.s, z2.h
613
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
614
- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z4.s
615
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z3.s
616
- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.d, z5.s
617
- ; CHECK-NEWLOWERING-NEXT: sunpklo z3.d, z3.s
618
- ; CHECK-NEWLOWERING-NEXT: sunpklo z25.d, z2.s
619
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.d, z5.s
620
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s
621
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
622
- ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z24.d
623
- ; CHECK-NEWLOWERING-NEXT: add z5.d, z5.d, z25.d
624
- ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
625
- ; CHECK-NEWLOWERING-NEXT: add z3.d, z7.d, z6.d
626
- ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z5.d
627
- ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
628
- ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
629
- ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z4.d
630
- ; CHECK-NEWLOWERING-NEXT: ret
609
+ ; CHECK-NEWLOWERING-SVE-LABEL: sdot_no_bin_op_8to64:
610
+ ; CHECK-NEWLOWERING-SVE: // %bb.0:
611
+ ; CHECK-NEWLOWERING-SVE-NEXT: movi v3.2d, #0000000000000000
612
+ ; CHECK-NEWLOWERING-SVE-NEXT: mov z4.b, #1 // =0x1
613
+ ; CHECK-NEWLOWERING-SVE-NEXT: sdot z3.s, z2.b, z4.b
614
+ ; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z2.d, z3.s
615
+ ; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z3.d, z3.s
616
+ ; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
617
+ ; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z3.d
618
+ ; CHECK-NEWLOWERING-SVE-NEXT: ret
619
+ ;
620
+ ; CHECK-NEWLOWERING-SVE2-LABEL: sdot_no_bin_op_8to64:
621
+ ; CHECK-NEWLOWERING-SVE2: // %bb.0:
622
+ ; CHECK-NEWLOWERING-SVE2-NEXT: movi v3.2d, #0000000000000000
623
+ ; CHECK-NEWLOWERING-SVE2-NEXT: mov z4.b, #1 // =0x1
624
+ ; CHECK-NEWLOWERING-SVE2-NEXT: sdot z3.s, z2.b, z4.b
625
+ ; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.d, z0.d, z3.s
626
+ ; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.d, z0.d, z3.s
627
+ ; CHECK-NEWLOWERING-SVE2-NEXT: ret
628
+ ;
629
+ ; CHECK-NEWLOWERING-SME-LABEL: sdot_no_bin_op_8to64:
630
+ ; CHECK-NEWLOWERING-SME: // %bb.0:
631
+ ; CHECK-NEWLOWERING-SME-NEXT: mov z3.b, #1 // =0x1
632
+ ; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0
633
+ ; CHECK-NEWLOWERING-SME-NEXT: sdot z4.s, z2.b, z3.b
634
+ ; CHECK-NEWLOWERING-SME-NEXT: saddwb z0.d, z0.d, z4.s
635
+ ; CHECK-NEWLOWERING-SME-NEXT: saddwt z0.d, z0.d, z4.s
636
+ ; CHECK-NEWLOWERING-SME-NEXT: ret
631
637
%a.ext = sext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
632
638
%partial.reduce = tail call <vscale x 4 x i64 > @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64 (<vscale x 4 x i64 > %acc , <vscale x 16 x i64 > %a.ext )
633
639
ret <vscale x 4 x i64 > %partial.reduce
@@ -647,18 +653,34 @@ define <vscale x 4 x i32> @not_udot(<vscale x 4 x i32> %acc, <vscale x 8 x i8> %
647
653
; CHECK-NEXT: mla z0.s, p0/m, z1.s, z2.s
648
654
; CHECK-NEXT: ret
649
655
;
650
- ; CHECK-NEWLOWERING-LABEL: not_udot:
651
- ; CHECK-NEWLOWERING: // %bb.0: // %entry
652
- ; CHECK-NEWLOWERING-NEXT: and z1.h, z1.h, #0xff
653
- ; CHECK-NEWLOWERING-NEXT: and z2.h, z2.h, #0xff
654
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.s
655
- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.s, z1.h
656
- ; CHECK-NEWLOWERING-NEXT: uunpklo z4.s, z2.h
657
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
658
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
659
- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z3.s, z4.s
660
- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z1.s, z2.s
661
- ; CHECK-NEWLOWERING-NEXT: ret
656
+ ; CHECK-NEWLOWERING-SVE-LABEL: not_udot:
657
+ ; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
658
+ ; CHECK-NEWLOWERING-SVE-NEXT: and z1.h, z1.h, #0xff
659
+ ; CHECK-NEWLOWERING-SVE-NEXT: and z2.h, z2.h, #0xff
660
+ ; CHECK-NEWLOWERING-SVE-NEXT: ptrue p0.s
661
+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z3.s, z1.h
662
+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z4.s, z2.h
663
+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z1.s, z1.h
664
+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z2.s, z2.h
665
+ ; CHECK-NEWLOWERING-SVE-NEXT: mla z0.s, p0/m, z3.s, z4.s
666
+ ; CHECK-NEWLOWERING-SVE-NEXT: mla z0.s, p0/m, z1.s, z2.s
667
+ ; CHECK-NEWLOWERING-SVE-NEXT: ret
668
+ ;
669
+ ; CHECK-NEWLOWERING-SVE2-LABEL: not_udot:
670
+ ; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
671
+ ; CHECK-NEWLOWERING-SVE2-NEXT: and z2.h, z2.h, #0xff
672
+ ; CHECK-NEWLOWERING-SVE2-NEXT: and z1.h, z1.h, #0xff
673
+ ; CHECK-NEWLOWERING-SVE2-NEXT: umlalb z0.s, z1.h, z2.h
674
+ ; CHECK-NEWLOWERING-SVE2-NEXT: umlalt z0.s, z1.h, z2.h
675
+ ; CHECK-NEWLOWERING-SVE2-NEXT: ret
676
+ ;
677
+ ; CHECK-NEWLOWERING-SME-LABEL: not_udot:
678
+ ; CHECK-NEWLOWERING-SME: // %bb.0: // %entry
679
+ ; CHECK-NEWLOWERING-SME-NEXT: and z2.h, z2.h, #0xff
680
+ ; CHECK-NEWLOWERING-SME-NEXT: and z1.h, z1.h, #0xff
681
+ ; CHECK-NEWLOWERING-SME-NEXT: umlalb z0.s, z1.h, z2.h
682
+ ; CHECK-NEWLOWERING-SME-NEXT: umlalt z0.s, z1.h, z2.h
683
+ ; CHECK-NEWLOWERING-SME-NEXT: ret
662
684
entry:
663
685
%a.wide = zext <vscale x 8 x i8 > %a to <vscale x 8 x i32 >
664
686
%b.wide = zext <vscale x 8 x i8 > %b to <vscale x 8 x i32 >
@@ -681,18 +703,34 @@ define <vscale x 2 x i64> @not_udot_wide(<vscale x 2 x i64> %acc, <vscale x 4 x
681
703
; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
682
704
; CHECK-NEXT: ret
683
705
;
684
- ; CHECK-NEWLOWERING-LABEL: not_udot_wide:
685
- ; CHECK-NEWLOWERING: // %bb.0: // %entry
686
- ; CHECK-NEWLOWERING-NEXT: and z1.s, z1.s, #0xffff
687
- ; CHECK-NEWLOWERING-NEXT: and z2.s, z2.s, #0xffff
688
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.d
689
- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.d, z1.s
690
- ; CHECK-NEWLOWERING-NEXT: uunpklo z4.d, z2.s
691
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z1.d, z1.s
692
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
693
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z3.d, z4.d
694
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z1.d, z2.d
695
- ; CHECK-NEWLOWERING-NEXT: ret
706
+ ; CHECK-NEWLOWERING-SVE-LABEL: not_udot_wide:
707
+ ; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
708
+ ; CHECK-NEWLOWERING-SVE-NEXT: and z1.s, z1.s, #0xffff
709
+ ; CHECK-NEWLOWERING-SVE-NEXT: and z2.s, z2.s, #0xffff
710
+ ; CHECK-NEWLOWERING-SVE-NEXT: ptrue p0.d
711
+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z3.d, z1.s
712
+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z4.d, z2.s
713
+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z1.d, z1.s
714
+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z2.d, z2.s
715
+ ; CHECK-NEWLOWERING-SVE-NEXT: mla z0.d, p0/m, z3.d, z4.d
716
+ ; CHECK-NEWLOWERING-SVE-NEXT: mla z0.d, p0/m, z1.d, z2.d
717
+ ; CHECK-NEWLOWERING-SVE-NEXT: ret
718
+ ;
719
+ ; CHECK-NEWLOWERING-SVE2-LABEL: not_udot_wide:
720
+ ; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
721
+ ; CHECK-NEWLOWERING-SVE2-NEXT: and z2.s, z2.s, #0xffff
722
+ ; CHECK-NEWLOWERING-SVE2-NEXT: and z1.s, z1.s, #0xffff
723
+ ; CHECK-NEWLOWERING-SVE2-NEXT: umlalb z0.d, z1.s, z2.s
724
+ ; CHECK-NEWLOWERING-SVE2-NEXT: umlalt z0.d, z1.s, z2.s
725
+ ; CHECK-NEWLOWERING-SVE2-NEXT: ret
726
+ ;
727
+ ; CHECK-NEWLOWERING-SME-LABEL: not_udot_wide:
728
+ ; CHECK-NEWLOWERING-SME: // %bb.0: // %entry
729
+ ; CHECK-NEWLOWERING-SME-NEXT: and z2.s, z2.s, #0xffff
730
+ ; CHECK-NEWLOWERING-SME-NEXT: and z1.s, z1.s, #0xffff
731
+ ; CHECK-NEWLOWERING-SME-NEXT: umlalb z0.d, z1.s, z2.s
732
+ ; CHECK-NEWLOWERING-SME-NEXT: umlalt z0.d, z1.s, z2.s
733
+ ; CHECK-NEWLOWERING-SME-NEXT: ret
696
734
entry:
697
735
%a.wide = zext <vscale x 4 x i16 > %a to <vscale x 4 x i64 >
698
736
%b.wide = zext <vscale x 4 x i16 > %b to <vscale x 4 x i64 >
0 commit comments