-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86][AVX10.2] Use 's_' for saturate-convert intrinsics #131592
Conversation
- Add '_' after cvt[t]s intrinsics when 's' is for saturation; - Add 's_' for all ipcvt[t] intrinsics since they are all saturation ones; This is to solve potential confusion since 's' before a type usually represents for scalar. Synced with GCC folks and they will change in the same way.
@llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) Changes
This is to solve potential confusion since 's' before a type usually represents for scalar. Synced with GCC folks and they will change in the same way. Patch is 234.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131592.diff 15 Files Affected:
diff --git a/clang/lib/Headers/avx10_2_512convertintrin.h b/clang/lib/Headers/avx10_2_512convertintrin.h
index 516ccc68672d6..429faa930ecf8 100644
--- a/clang/lib/Headers/avx10_2_512convertintrin.h
+++ b/clang/lib/Headers/avx10_2_512convertintrin.h
@@ -157,21 +157,21 @@ _mm512_maskz_cvt2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvts2ph_bf8(__m512h __A, __m512h __B) {
+_mm512_cvts_2ph_bf8(__m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_vcvt2ph2bf8s_512((__v32hf)(__A),
(__v32hf)(__B));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvts2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_mask_cvts_2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B), (__v64qi)__W);
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B), (__v64qi)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvts2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_maskz_cvts_2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B),
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B),
(__v64qi)(__m512i)_mm512_setzero_si512());
}
@@ -195,21 +195,21 @@ _mm512_maskz_cvt2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvts2ph_hf8(__m512h __A, __m512h __B) {
+_mm512_cvts_2ph_hf8(__m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_vcvt2ph2hf8s_512((__v32hf)(__A),
(__v32hf)(__B));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvts2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_mask_cvts_2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B), (__v64qi)__W);
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B), (__v64qi)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvts2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_maskz_cvts_2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B),
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B),
(__v64qi)(__m512i)_mm512_setzero_si512());
}
@@ -247,19 +247,20 @@ _mm512_maskz_cvtph_bf8(__mmask32 __U, __m512h __A) {
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_bf8(__m512h __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvts_ph_bf8(__m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtsph_bf8(__m256i __W, __mmask32 __U, __m512h __A) {
+_mm512_mask_cvts_ph_bf8(__m256i __W, __mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtsph_bf8(__mmask32 __U, __m512h __A) {
+_mm512_maskz_cvts_ph_bf8(__mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
@@ -281,19 +282,20 @@ _mm512_maskz_cvtph_hf8(__mmask32 __U, __m512h __A) {
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_hf8(__m512h __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvts_ph_hf8(__m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtsph_hf8(__m256i __W, __mmask32 __U, __m512h __A) {
+_mm512_mask_cvts_ph_hf8(__m256i __W, __mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtsph_hf8(__mmask32 __U, __m512h __A) {
+_mm512_maskz_cvts_ph_hf8(__mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
diff --git a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
index 5970ab0331444..012a6282b5b18 100644
--- a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
@@ -20,20 +20,21 @@
__min_vector_width__(512)))
// 512 bit : Double -> Int
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epi32(__m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
(__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epi32(__mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -55,20 +56,21 @@ _mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
(const int)(__R)))
// 512 bit : Double -> uInt
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epu32(__m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
(__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epu32(__mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -91,18 +93,19 @@ _mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
// 512 bit : Double -> Long
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epi64(__m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
(__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epi64(__mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -125,20 +128,21 @@ _mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
// 512 bit : Double -> ULong
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epu64(__m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
(__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epu64(__mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -160,20 +164,20 @@ _mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
(const int)(__R)))
// 512 bit: Float -> int
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi32(__m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
+_mm512_mask_cvtts_ps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
(__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
+_mm512_maskz_cvtts_ps_epi32(__mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -195,20 +199,20 @@ _mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
(__mmask16)(__U), (const int)(__R)))
// 512 bit: Float -> uint
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu32(__m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
+_mm512_mask_cvtts_ps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
(__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
+_mm512_maskz_cvtts_ps_epu32(__mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -230,20 +234,20 @@ _mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
(__mmask16)(__U), (const int)(__R)))
// 512 bit : float -> long
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi64(__m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+_mm512_mask_cvtts_ps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
(__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
+_mm512_maskz_cvtts_ps_epi64(__mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -265,20 +269,20 @@ _mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
(const int)(__R)))
// 512 bit : float -> ulong
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu64(__m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+_mm512_mask_cvtts_ps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
(__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epu64(__mmask8 __U, __m256 __A) {
+_mm512_maskz_cvtts_ps_epu64(__mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
diff --git a/clang/lib/Headers/avx10_2_512satcvtintrin.h b/clang/lib/Headers/avx10_2_512satcvtintrin.h
index 7f41deb5212c5..b58e3db8956d6 100644
--- a/clang/lib/Headers/avx10_2_512satcvtintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtintrin.h
@@ -14,286 +14,286 @@
#ifndef __AVX10_2_512SATCVTINTRIN_H
#define __AVX10_2_512SATCVTINTRIN_H
-#define _mm512_ipcvtbf16_epi8(A) \
+#define _mm512_ipcvts_bf16_epi8(A) \
((__m512i)__builtin_ia32_vcvtbf162ibs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvtbf16_epi8(W, U, A) \
+#define _mm512_mask_ipcvts_bf16_epi8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epi8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvtbf16_epi8(U, A) \
+#define _mm512_maskz_ipcvts_bf16_epi8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epi8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvtbf16_epu8(A) \
+#define _mm512_ipcvts_bf16_epu8(A) \
((__m512i)__builtin_ia32_vcvtbf162iubs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvtbf16_epu8(W, U, A) \
+#define _mm512_mask_ipcvts_bf16_epu8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epu8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epu8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvtbf16_epu8(U, A) \
+#define _mm512_maskz_ipcvts_bf16_epu8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epu8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epu8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvttbf16_epi8(A) \
+#define _mm512_ipcvtts_bf16_epi8(A) \
((__m512i)__builtin_ia32_vcvttbf162ibs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvttbf16_epi8(W, U, A) \
+#define _mm512_mask_ipcvtts_bf16_epi8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvttbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvtts_bf16_epi8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvttbf16_epi8(U, A) \
+#define _mm512_maskz_ipcvtts_bf16_epi8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvttbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvtts_bf16_epi8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvttbf16_epu8(A) \
+#define _mm512_ipcvtts_bf16_epu8(A) \
((__m512i)__builtin_ia32_vcvttbf162iubs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvttbf16_epu8(W, U, A) \
+#define _mm512_mask_ipcvtts_bf16_epu8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvttbf16_epu8(A), \
+ (__v32hi)_mm512_ipcvtts_bf16_epu8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvttbf16_epu8(U, A) \
+#define _mm512_maskz_ipcvtts_bf16_epu8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvttbf16_epu8(A), \
+ (__v32hi)_mm512_ipcvtts_bf16_epu8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvtph_epi8(A) \
+#define _mm512_ipcvts_ph_epi8(A) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
- (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
_MM_FROUND_CUR_DIRECTION))
-#define _mm512_mask_ipcvtph_epi8(W, U, A) \
+#define _mm512_mask_ipcvts_ph_epi8(W, U, A) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
(__v32hu)(W), (__mmask32)(U), \
_MM_FROUND_CUR_DIRECTION))
-#define _mm512_maskz_ipcvtph_epi8(U, A) \
+#define _mm512_maskz_ipcvts_ph_epi8(U, A) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
(__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
_MM_FROUND_CUR_DIRECTION))
-#define _mm512_ipcvt_roundph_epi8(A, R) \
+#define _mm512_ipcvts_roundph_epi8(A, R) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
...
[truncated]
|
@llvm/pr-subscribers-clang Author: Phoebe Wang (phoebewang) Changes
This is to solve potential confusion since 's' before a type usually represents for scalar. Synced with GCC folks and they will change in the same way. Patch is 234.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131592.diff 15 Files Affected:
diff --git a/clang/lib/Headers/avx10_2_512convertintrin.h b/clang/lib/Headers/avx10_2_512convertintrin.h
index 516ccc68672d6..429faa930ecf8 100644
--- a/clang/lib/Headers/avx10_2_512convertintrin.h
+++ b/clang/lib/Headers/avx10_2_512convertintrin.h
@@ -157,21 +157,21 @@ _mm512_maskz_cvt2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvts2ph_bf8(__m512h __A, __m512h __B) {
+_mm512_cvts_2ph_bf8(__m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_vcvt2ph2bf8s_512((__v32hf)(__A),
(__v32hf)(__B));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvts2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_mask_cvts_2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B), (__v64qi)__W);
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B), (__v64qi)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvts2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_maskz_cvts_2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B),
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B),
(__v64qi)(__m512i)_mm512_setzero_si512());
}
@@ -195,21 +195,21 @@ _mm512_maskz_cvt2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvts2ph_hf8(__m512h __A, __m512h __B) {
+_mm512_cvts_2ph_hf8(__m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_vcvt2ph2hf8s_512((__v32hf)(__A),
(__v32hf)(__B));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvts2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_mask_cvts_2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B), (__v64qi)__W);
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B), (__v64qi)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvts2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_maskz_cvts_2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B),
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B),
(__v64qi)(__m512i)_mm512_setzero_si512());
}
@@ -247,19 +247,20 @@ _mm512_maskz_cvtph_bf8(__mmask32 __U, __m512h __A) {
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_bf8(__m512h __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvts_ph_bf8(__m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtsph_bf8(__m256i __W, __mmask32 __U, __m512h __A) {
+_mm512_mask_cvts_ph_bf8(__m256i __W, __mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtsph_bf8(__mmask32 __U, __m512h __A) {
+_mm512_maskz_cvts_ph_bf8(__mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
@@ -281,19 +282,20 @@ _mm512_maskz_cvtph_hf8(__mmask32 __U, __m512h __A) {
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_hf8(__m512h __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvts_ph_hf8(__m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtsph_hf8(__m256i __W, __mmask32 __U, __m512h __A) {
+_mm512_mask_cvts_ph_hf8(__m256i __W, __mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtsph_hf8(__mmask32 __U, __m512h __A) {
+_mm512_maskz_cvts_ph_hf8(__mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
diff --git a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
index 5970ab0331444..012a6282b5b18 100644
--- a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
@@ -20,20 +20,21 @@
__min_vector_width__(512)))
// 512 bit : Double -> Int
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epi32(__m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
(__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epi32(__mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -55,20 +56,21 @@ _mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
(const int)(__R)))
// 512 bit : Double -> uInt
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epu32(__m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
(__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epu32(__mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -91,18 +93,19 @@ _mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
// 512 bit : Double -> Long
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epi64(__m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
(__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epi64(__mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -125,20 +128,21 @@ _mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
// 512 bit : Double -> ULong
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epu64(__m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
(__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epu64(__mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -160,20 +164,20 @@ _mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
(const int)(__R)))
// 512 bit: Float -> int
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi32(__m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
+_mm512_mask_cvtts_ps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
(__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
+_mm512_maskz_cvtts_ps_epi32(__mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -195,20 +199,20 @@ _mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
(__mmask16)(__U), (const int)(__R)))
// 512 bit: Float -> uint
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu32(__m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
+_mm512_mask_cvtts_ps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
(__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
+_mm512_maskz_cvtts_ps_epu32(__mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -230,20 +234,20 @@ _mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
(__mmask16)(__U), (const int)(__R)))
// 512 bit : float -> long
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi64(__m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+_mm512_mask_cvtts_ps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
(__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
+_mm512_maskz_cvtts_ps_epi64(__mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -265,20 +269,20 @@ _mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
(const int)(__R)))
// 512 bit : float -> ulong
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu64(__m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+_mm512_mask_cvtts_ps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
(__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epu64(__mmask8 __U, __m256 __A) {
+_mm512_maskz_cvtts_ps_epu64(__mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
diff --git a/clang/lib/Headers/avx10_2_512satcvtintrin.h b/clang/lib/Headers/avx10_2_512satcvtintrin.h
index 7f41deb5212c5..b58e3db8956d6 100644
--- a/clang/lib/Headers/avx10_2_512satcvtintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtintrin.h
@@ -14,286 +14,286 @@
#ifndef __AVX10_2_512SATCVTINTRIN_H
#define __AVX10_2_512SATCVTINTRIN_H
-#define _mm512_ipcvtbf16_epi8(A) \
+#define _mm512_ipcvts_bf16_epi8(A) \
((__m512i)__builtin_ia32_vcvtbf162ibs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvtbf16_epi8(W, U, A) \
+#define _mm512_mask_ipcvts_bf16_epi8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epi8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvtbf16_epi8(U, A) \
+#define _mm512_maskz_ipcvts_bf16_epi8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epi8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvtbf16_epu8(A) \
+#define _mm512_ipcvts_bf16_epu8(A) \
((__m512i)__builtin_ia32_vcvtbf162iubs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvtbf16_epu8(W, U, A) \
+#define _mm512_mask_ipcvts_bf16_epu8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epu8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epu8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvtbf16_epu8(U, A) \
+#define _mm512_maskz_ipcvts_bf16_epu8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epu8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epu8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvttbf16_epi8(A) \
+#define _mm512_ipcvtts_bf16_epi8(A) \
((__m512i)__builtin_ia32_vcvttbf162ibs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvttbf16_epi8(W, U, A) \
+#define _mm512_mask_ipcvtts_bf16_epi8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvttbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvtts_bf16_epi8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvttbf16_epi8(U, A) \
+#define _mm512_maskz_ipcvtts_bf16_epi8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvttbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvtts_bf16_epi8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvttbf16_epu8(A) \
+#define _mm512_ipcvtts_bf16_epu8(A) \
((__m512i)__builtin_ia32_vcvttbf162iubs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvttbf16_epu8(W, U, A) \
+#define _mm512_mask_ipcvtts_bf16_epu8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvttbf16_epu8(A), \
+ (__v32hi)_mm512_ipcvtts_bf16_epu8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvttbf16_epu8(U, A) \
+#define _mm512_maskz_ipcvtts_bf16_epu8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvttbf16_epu8(A), \
+ (__v32hi)_mm512_ipcvtts_bf16_epu8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvtph_epi8(A) \
+#define _mm512_ipcvts_ph_epi8(A) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
- (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
_MM_FROUND_CUR_DIRECTION))
-#define _mm512_mask_ipcvtph_epi8(W, U, A) \
+#define _mm512_mask_ipcvts_ph_epi8(W, U, A) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
(__v32hu)(W), (__mmask32)(U), \
_MM_FROUND_CUR_DIRECTION))
-#define _mm512_maskz_ipcvtph_epi8(U, A) \
+#define _mm512_maskz_ipcvts_ph_epi8(U, A) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
(__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
_MM_FROUND_CUR_DIRECTION))
-#define _mm512_ipcvt_roundph_epi8(A, R) \
+#define _mm512_ipcvts_roundph_epi8(A, R) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
...
[truncated]
|
Here is GCC patch: https://gcc.gnu.org/pipermail/gcc-patches/2025-March/678227.html |
Went through the patch with my eyes, I'm not sure how useful it was but wasn't able to spot anything.
I haven't found any changes regarding |
No idea. @Rejur @jianghc724 |
That is missing since it is not in satcvt, GCC will need to add that. |
I have a question, do we need to modify |
Good question! Discussed with @jianghc724, we prefer to |
should be cvts_biasph_hf8 :) |
Good catch! Proves moving the s ahead is wisdom :) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/18087 Here is the relevant piece of the build log for the reference
|
- Add '_' after cvt[t]s intrinsics when 's' is for saturation; - Add 's_' for all ipcvt[t] intrinsics since they are all saturation ones; - Move 's' after 'cvt' and add '_' after it for prior `biass` intrinsics; This is to solve potential confusion since 's' before a type usually represents for scalar. Synced with GCC folks and they will change in the same way.
biass
intrinsics;This is to solve potential confusion since 's' before a type usually represents for scalar.
Synced with GCC folks and they will change in the same way.