@@ -918,6 +918,34 @@ define <8 x double> @combine_vpermi2var_vpermt2var_8f64_as_vperm2(<8 x double> %
918
918
ret <8 x double > %res1
919
919
}
920
920
921
+ define <8 x double > @combine_vpermi2var_8f64_as_permpd (<8 x double > %x0 , <8 x double > %x1 , i64 %a2 ) {
922
+ ; X86-LABEL: combine_vpermi2var_8f64_as_permpd:
923
+ ; X86: # %bb.0:
924
+ ; X86-NEXT: movl $2, %eax
925
+ ; X86-NEXT: vmovd %eax, %xmm2
926
+ ; X86-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
927
+ ; X86-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
928
+ ; X86-NEXT: vinserti128 $1, {{\.LCPI.*}}, %ymm2, %ymm2
929
+ ; X86-NEXT: vinserti64x4 $1, {{\.LCPI.*}}, %zmm2, %zmm2
930
+ ; X86-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2
931
+ ; X86-NEXT: vpermpd {{.*#+}} zmm0 = zmm2[2,3,0,1,6,7,4,5]
932
+ ; X86-NEXT: retl
933
+ ;
934
+ ; X64-LABEL: combine_vpermi2var_8f64_as_permpd:
935
+ ; X64: # %bb.0:
936
+ ; X64-NEXT: vmovdqa {{.*#+}} xmm2 = <u,2,1,3,4,6,5,7>
937
+ ; X64-NEXT: vpinsrq $0, %rdi, %xmm2, %xmm2
938
+ ; X64-NEXT: vmovdqa64 {{.*#+}} zmm3 = <u,2,1,3,4,6,5,7>
939
+ ; X64-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2
940
+ ; X64-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2
941
+ ; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm2[2,3,0,1,6,7,4,5]
942
+ ; X64-NEXT: retq
943
+ %res0 = insertelement <8 x i64 > <i64 0 , i64 2 , i64 1 , i64 3 , i64 4 , i64 6 , i64 5 , i64 7 >, i64 %a2 , i32 0
944
+ %res1 = call <8 x double > @llvm.x86.avx512.mask.vpermi2var.pd.512 (<8 x double > %x0 , <8 x i64 > %res0 , <8 x double > %x1 , i8 -1 )
945
+ %res2 = shufflevector <8 x double > %res1 , <8 x double > undef , <8 x i32 > <i32 2 , i32 3 , i32 undef , i32 1 , i32 6 , i32 7 , i32 4 , i32 5 >
946
+ ret <8 x double > %res2
947
+ }
948
+
921
949
define <16 x i32 > @combine_vpermi2var_vpermt2var_16i32_as_vpermd (<16 x i32 > %x0 , <16 x i32 > %x1 ) {
922
950
; CHECK-LABEL: combine_vpermi2var_vpermt2var_16i32_as_vpermd:
923
951
; CHECK: # %bb.0:
0 commit comments