Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[X86][AVX10.2] Use 's_' for saturate-convert intrinsics #131592

Merged
merged 2 commits into from
Mar 21, 2025

Conversation

phoebewang
Copy link
Contributor

@phoebewang phoebewang commented Mar 17, 2025

  • Add '_' after cvt[t]s intrinsics when 's' is for saturation;
  • Add 's_' for all ipcvt[t] intrinsics since they are all saturation ones;
  • Move 's' after 'cvt' and add '_' after it for prior biass intrinsics;

This is to solve potential confusion since 's' before a type usually represents for scalar.

Synced with GCC folks and they will change in the same way.

- Add '_' after cvt[t]s intrinsics when 's' is for saturation;
- Add 's_' for all ipcvt[t] intrinsics since they are all saturation
  ones;

This is to solve potential confusion since 's' before a type usually
represents for scalar.

Synced with GCC folks and they will change in the same way.
@phoebewang phoebewang marked this pull request as ready for review March 19, 2025 04:06
@phoebewang phoebewang requested a review from FreddyLeaf March 19, 2025 04:06
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Mar 19, 2025
@phoebewang phoebewang requested review from RKSimon and e-kud March 19, 2025 04:07
@llvmbot
Copy link
Member

llvmbot commented Mar 19, 2025

@llvm/pr-subscribers-backend-x86

Author: Phoebe Wang (phoebewang)

Changes
  • Add '_' after cvt[t]s intrinsics when 's' is for saturation;
  • Add 's_' for all ipcvt[t] intrinsics since they are all saturation ones;

This is to solve potential confusion since 's' before a type usually represents for scalar.

Synced with GCC folks and they will change in the same way.


Patch is 234.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131592.diff

15 Files Affected:

  • (modified) clang/lib/Headers/avx10_2_512convertintrin.h (+18-16)
  • (modified) clang/lib/Headers/avx10_2_512satcvtdsintrin.h (+28-24)
  • (modified) clang/lib/Headers/avx10_2_512satcvtintrin.h (+84-84)
  • (modified) clang/lib/Headers/avx10_2convertintrin.h (+32-32)
  • (modified) clang/lib/Headers/avx10_2satcvtdsintrin.h (+52-48)
  • (modified) clang/lib/Headers/avx10_2satcvtintrin.h (+136-136)
  • (modified) clang/test/CodeGen/X86/avx10_2_512convert-builtins.c (+36-36)
  • (modified) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins-error.c (+96-96)
  • (modified) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c (+180-180)
  • (modified) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c (+48-48)
  • (modified) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c (+37-37)
  • (modified) clang/test/CodeGen/X86/avx10_2convert-builtins.c (+72-72)
  • (modified) clang/test/CodeGen/X86/avx10_2satcvt-builtins.c (+288-288)
  • (modified) clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c (+84-84)
  • (modified) clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c (+72-72)
diff --git a/clang/lib/Headers/avx10_2_512convertintrin.h b/clang/lib/Headers/avx10_2_512convertintrin.h
index 516ccc68672d6..429faa930ecf8 100644
--- a/clang/lib/Headers/avx10_2_512convertintrin.h
+++ b/clang/lib/Headers/avx10_2_512convertintrin.h
@@ -157,21 +157,21 @@ _mm512_maskz_cvt2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvts2ph_bf8(__m512h __A, __m512h __B) {
+_mm512_cvts_2ph_bf8(__m512h __A, __m512h __B) {
   return (__m512i)__builtin_ia32_vcvt2ph2bf8s_512((__v32hf)(__A),
                                                   (__v32hf)(__B));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvts2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_mask_cvts_2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
   return (__m512i)__builtin_ia32_selectb_512(
-      (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B), (__v64qi)__W);
+      (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B), (__v64qi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvts2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_maskz_cvts_2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
   return (__m512i)__builtin_ia32_selectb_512(
-      (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B),
+      (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B),
       (__v64qi)(__m512i)_mm512_setzero_si512());
 }
 
@@ -195,21 +195,21 @@ _mm512_maskz_cvt2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvts2ph_hf8(__m512h __A, __m512h __B) {
+_mm512_cvts_2ph_hf8(__m512h __A, __m512h __B) {
   return (__m512i)__builtin_ia32_vcvt2ph2hf8s_512((__v32hf)(__A),
                                                   (__v32hf)(__B));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvts2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_mask_cvts_2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
   return (__m512i)__builtin_ia32_selectb_512(
-      (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B), (__v64qi)__W);
+      (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B), (__v64qi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvts2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_maskz_cvts_2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
   return (__m512i)__builtin_ia32_selectb_512(
-      (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B),
+      (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B),
       (__v64qi)(__m512i)_mm512_setzero_si512());
 }
 
@@ -247,19 +247,20 @@ _mm512_maskz_cvtph_bf8(__mmask32 __U, __m512h __A) {
       (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_bf8(__m512h __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvts_ph_bf8(__m512h __A) {
   return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
       (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtsph_bf8(__m256i __W, __mmask32 __U, __m512h __A) {
+_mm512_mask_cvts_ph_bf8(__m256i __W, __mmask32 __U, __m512h __A) {
   return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
       (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtsph_bf8(__mmask32 __U, __m512h __A) {
+_mm512_maskz_cvts_ph_bf8(__mmask32 __U, __m512h __A) {
   return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
       (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
 }
@@ -281,19 +282,20 @@ _mm512_maskz_cvtph_hf8(__mmask32 __U, __m512h __A) {
       (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_hf8(__m512h __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvts_ph_hf8(__m512h __A) {
   return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
       (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtsph_hf8(__m256i __W, __mmask32 __U, __m512h __A) {
+_mm512_mask_cvts_ph_hf8(__m256i __W, __mmask32 __U, __m512h __A) {
   return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
       (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtsph_hf8(__mmask32 __U, __m512h __A) {
+_mm512_maskz_cvts_ph_hf8(__mmask32 __U, __m512h __A) {
   return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
       (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
 }
diff --git a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
index 5970ab0331444..012a6282b5b18 100644
--- a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
@@ -20,20 +20,21 @@
                  __min_vector_width__(512)))
 
 // 512 bit : Double -> Int
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epi32(__m512d __A) {
   return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
       (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
   return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
       (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epi32(__mmask8 __U, __m512d __A) {
   return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
       (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
       _MM_FROUND_CUR_DIRECTION));
@@ -55,20 +56,21 @@ _mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
       (const int)(__R)))
 
 // 512 bit : Double -> uInt
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epu32(__m512d __A) {
   return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
       (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
   return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
       (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epu32(__mmask8 __U, __m512d __A) {
   return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
       (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
       _MM_FROUND_CUR_DIRECTION));
@@ -91,18 +93,19 @@ _mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
 
 //  512 bit : Double -> Long
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epi64(__m512d __A) {
   return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
       (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
   return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
       (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epi64(__mmask8 __U, __m512d __A) {
   return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
       (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
       _MM_FROUND_CUR_DIRECTION));
@@ -125,20 +128,21 @@ _mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
 
 // 512 bit : Double -> ULong
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epu64(__m512d __A) {
   return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
       (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
   return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
       (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epu64(__mmask8 __U, __m512d __A) {
   return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
       (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
       _MM_FROUND_CUR_DIRECTION));
@@ -160,20 +164,20 @@ _mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
       (const int)(__R)))
 
 // 512 bit: Float -> int
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi32(__m512 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
       (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
+_mm512_mask_cvtts_ps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
       (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
+_mm512_maskz_cvtts_ps_epi32(__mmask16 __U, __m512 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
       (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
       _MM_FROUND_CUR_DIRECTION));
@@ -195,20 +199,20 @@ _mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
       (__mmask16)(__U), (const int)(__R)))
 
 // 512 bit: Float -> uint
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu32(__m512 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
       (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
+_mm512_mask_cvtts_ps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
       (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
+_mm512_maskz_cvtts_ps_epu32(__mmask16 __U, __m512 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
       (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
       _MM_FROUND_CUR_DIRECTION));
@@ -230,20 +234,20 @@ _mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
       (__mmask16)(__U), (const int)(__R)))
 
 // 512 bit : float -> long
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi64(__m256 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
       (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+_mm512_mask_cvtts_ps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
       (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
+_mm512_maskz_cvtts_ps_epi64(__mmask8 __U, __m256 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
       (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
       _MM_FROUND_CUR_DIRECTION));
@@ -265,20 +269,20 @@ _mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
       (const int)(__R)))
 
 // 512 bit : float -> ulong
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu64(__m256 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
       (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+_mm512_mask_cvtts_ps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
       (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epu64(__mmask8 __U, __m256 __A) {
+_mm512_maskz_cvtts_ps_epu64(__mmask8 __U, __m256 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
       (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
       _MM_FROUND_CUR_DIRECTION));
diff --git a/clang/lib/Headers/avx10_2_512satcvtintrin.h b/clang/lib/Headers/avx10_2_512satcvtintrin.h
index 7f41deb5212c5..b58e3db8956d6 100644
--- a/clang/lib/Headers/avx10_2_512satcvtintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtintrin.h
@@ -14,286 +14,286 @@
 #ifndef __AVX10_2_512SATCVTINTRIN_H
 #define __AVX10_2_512SATCVTINTRIN_H
 
-#define _mm512_ipcvtbf16_epi8(A)                                               \
+#define _mm512_ipcvts_bf16_epi8(A)                                             \
   ((__m512i)__builtin_ia32_vcvtbf162ibs512((__v32bf)(__m512bh)(A)))
 
-#define _mm512_mask_ipcvtbf16_epi8(W, U, A)                                    \
+#define _mm512_mask_ipcvts_bf16_epi8(W, U, A)                                  \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvtbf16_epi8(A),      \
+                                       (__v32hi)_mm512_ipcvts_bf16_epi8(A),    \
                                        (__v32hi)(__m512i)(W)))
 
-#define _mm512_maskz_ipcvtbf16_epi8(U, A)                                      \
+#define _mm512_maskz_ipcvts_bf16_epi8(U, A)                                    \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvtbf16_epi8(A),      \
+                                       (__v32hi)_mm512_ipcvts_bf16_epi8(A),    \
                                        (__v32hi)_mm512_setzero_si512()))
 
-#define _mm512_ipcvtbf16_epu8(A)                                               \
+#define _mm512_ipcvts_bf16_epu8(A)                                             \
   ((__m512i)__builtin_ia32_vcvtbf162iubs512((__v32bf)(__m512bh)(A)))
 
-#define _mm512_mask_ipcvtbf16_epu8(W, U, A)                                    \
+#define _mm512_mask_ipcvts_bf16_epu8(W, U, A)                                  \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvtbf16_epu8(A),      \
+                                       (__v32hi)_mm512_ipcvts_bf16_epu8(A),    \
                                        (__v32hi)(__m512i)(W)))
 
-#define _mm512_maskz_ipcvtbf16_epu8(U, A)                                      \
+#define _mm512_maskz_ipcvts_bf16_epu8(U, A)                                    \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvtbf16_epu8(A),      \
+                                       (__v32hi)_mm512_ipcvts_bf16_epu8(A),    \
                                        (__v32hi)_mm512_setzero_si512()))
 
-#define _mm512_ipcvttbf16_epi8(A)                                              \
+#define _mm512_ipcvtts_bf16_epi8(A)                                            \
   ((__m512i)__builtin_ia32_vcvttbf162ibs512((__v32bf)(__m512bh)(A)))
 
-#define _mm512_mask_ipcvttbf16_epi8(W, U, A)                                   \
+#define _mm512_mask_ipcvtts_bf16_epi8(W, U, A)                                 \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvttbf16_epi8(A),     \
+                                       (__v32hi)_mm512_ipcvtts_bf16_epi8(A),   \
                                        (__v32hi)(__m512i)(W)))
 
-#define _mm512_maskz_ipcvttbf16_epi8(U, A)                                     \
+#define _mm512_maskz_ipcvtts_bf16_epi8(U, A)                                   \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvttbf16_epi8(A),     \
+                                       (__v32hi)_mm512_ipcvtts_bf16_epi8(A),   \
                                        (__v32hi)_mm512_setzero_si512()))
 
-#define _mm512_ipcvttbf16_epu8(A)                                              \
+#define _mm512_ipcvtts_bf16_epu8(A)                                            \
   ((__m512i)__builtin_ia32_vcvttbf162iubs512((__v32bf)(__m512bh)(A)))
 
-#define _mm512_mask_ipcvttbf16_epu8(W, U, A)                                   \
+#define _mm512_mask_ipcvtts_bf16_epu8(W, U, A)                                 \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvttbf16_epu8(A),     \
+                                       (__v32hi)_mm512_ipcvtts_bf16_epu8(A),   \
                                        (__v32hi)(__m512i)(W)))
 
-#define _mm512_maskz_ipcvttbf16_epu8(U, A)                                     \
+#define _mm512_maskz_ipcvtts_bf16_epu8(U, A)                                   \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvttbf16_epu8(A),     \
+                                       (__v32hi)_mm512_ipcvtts_bf16_epu8(A),   \
                                        (__v32hi)_mm512_setzero_si512()))
 
-#define _mm512_ipcvtph_epi8(A)                                                 \
+#define _mm512_ipcvts_ph_epi8(A)                                               \
   ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
-#define _mm512_mask_ipcvtph_epi8(W, U, A)                                      \
+#define _mm512_mask_ipcvts_ph_epi8(W, U, A)                                    \
   ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
                                               (__v32hu)(W), (__mmask32)(U),    \
                                               _MM_FROUND_CUR_DIRECTION))
 
-#define _mm512_maskz_ipcvtph_epi8(U, A)                                        \
+#define _mm512_maskz_ipcvts_ph_epi8(U, A)                                      \
   ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
       (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
       _MM_FROUND_CUR_DIRECTION))
 
-#define _mm512_ipcvt_roundph_epi8(A, R)                                        \
+#define _mm512_ipcvts_roundph_epi8(A, R)                                       \
   ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
                   ...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Mar 19, 2025

@llvm/pr-subscribers-clang

Author: Phoebe Wang (phoebewang)

Changes
  • Add '_' after cvt[t]s intrinsics when 's' is for saturation;
  • Add 's_' for all ipcvt[t] intrinsics since they are all saturation ones;

This is to solve potential confusion since 's' before a type usually represents for scalar.

Synced with GCC folks and they will change in the same way.


Patch is 234.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131592.diff

15 Files Affected:

  • (modified) clang/lib/Headers/avx10_2_512convertintrin.h (+18-16)
  • (modified) clang/lib/Headers/avx10_2_512satcvtdsintrin.h (+28-24)
  • (modified) clang/lib/Headers/avx10_2_512satcvtintrin.h (+84-84)
  • (modified) clang/lib/Headers/avx10_2convertintrin.h (+32-32)
  • (modified) clang/lib/Headers/avx10_2satcvtdsintrin.h (+52-48)
  • (modified) clang/lib/Headers/avx10_2satcvtintrin.h (+136-136)
  • (modified) clang/test/CodeGen/X86/avx10_2_512convert-builtins.c (+36-36)
  • (modified) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins-error.c (+96-96)
  • (modified) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c (+180-180)
  • (modified) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c (+48-48)
  • (modified) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c (+37-37)
  • (modified) clang/test/CodeGen/X86/avx10_2convert-builtins.c (+72-72)
  • (modified) clang/test/CodeGen/X86/avx10_2satcvt-builtins.c (+288-288)
  • (modified) clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c (+84-84)
  • (modified) clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c (+72-72)
diff --git a/clang/lib/Headers/avx10_2_512convertintrin.h b/clang/lib/Headers/avx10_2_512convertintrin.h
index 516ccc68672d6..429faa930ecf8 100644
--- a/clang/lib/Headers/avx10_2_512convertintrin.h
+++ b/clang/lib/Headers/avx10_2_512convertintrin.h
@@ -157,21 +157,21 @@ _mm512_maskz_cvt2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvts2ph_bf8(__m512h __A, __m512h __B) {
+_mm512_cvts_2ph_bf8(__m512h __A, __m512h __B) {
   return (__m512i)__builtin_ia32_vcvt2ph2bf8s_512((__v32hf)(__A),
                                                   (__v32hf)(__B));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvts2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_mask_cvts_2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
   return (__m512i)__builtin_ia32_selectb_512(
-      (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B), (__v64qi)__W);
+      (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B), (__v64qi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvts2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_maskz_cvts_2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
   return (__m512i)__builtin_ia32_selectb_512(
-      (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B),
+      (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B),
       (__v64qi)(__m512i)_mm512_setzero_si512());
 }
 
@@ -195,21 +195,21 @@ _mm512_maskz_cvt2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvts2ph_hf8(__m512h __A, __m512h __B) {
+_mm512_cvts_2ph_hf8(__m512h __A, __m512h __B) {
   return (__m512i)__builtin_ia32_vcvt2ph2hf8s_512((__v32hf)(__A),
                                                   (__v32hf)(__B));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvts2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_mask_cvts_2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
   return (__m512i)__builtin_ia32_selectb_512(
-      (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B), (__v64qi)__W);
+      (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B), (__v64qi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvts2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_maskz_cvts_2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
   return (__m512i)__builtin_ia32_selectb_512(
-      (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B),
+      (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B),
       (__v64qi)(__m512i)_mm512_setzero_si512());
 }
 
@@ -247,19 +247,20 @@ _mm512_maskz_cvtph_bf8(__mmask32 __U, __m512h __A) {
       (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_bf8(__m512h __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvts_ph_bf8(__m512h __A) {
   return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
       (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtsph_bf8(__m256i __W, __mmask32 __U, __m512h __A) {
+_mm512_mask_cvts_ph_bf8(__m256i __W, __mmask32 __U, __m512h __A) {
   return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
       (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtsph_bf8(__mmask32 __U, __m512h __A) {
+_mm512_maskz_cvts_ph_bf8(__mmask32 __U, __m512h __A) {
   return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
       (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
 }
@@ -281,19 +282,20 @@ _mm512_maskz_cvtph_hf8(__mmask32 __U, __m512h __A) {
       (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_hf8(__m512h __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvts_ph_hf8(__m512h __A) {
   return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
       (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtsph_hf8(__m256i __W, __mmask32 __U, __m512h __A) {
+_mm512_mask_cvts_ph_hf8(__m256i __W, __mmask32 __U, __m512h __A) {
   return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
       (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtsph_hf8(__mmask32 __U, __m512h __A) {
+_mm512_maskz_cvts_ph_hf8(__mmask32 __U, __m512h __A) {
   return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
       (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
 }
diff --git a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
index 5970ab0331444..012a6282b5b18 100644
--- a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
@@ -20,20 +20,21 @@
                  __min_vector_width__(512)))
 
 // 512 bit : Double -> Int
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epi32(__m512d __A) {
   return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
       (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
   return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
       (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epi32(__mmask8 __U, __m512d __A) {
   return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
       (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
       _MM_FROUND_CUR_DIRECTION));
@@ -55,20 +56,21 @@ _mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
       (const int)(__R)))
 
 // 512 bit : Double -> uInt
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epu32(__m512d __A) {
   return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
       (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
   return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
       (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epu32(__mmask8 __U, __m512d __A) {
   return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
       (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
       _MM_FROUND_CUR_DIRECTION));
@@ -91,18 +93,19 @@ _mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
 
 //  512 bit : Double -> Long
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epi64(__m512d __A) {
   return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
       (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
   return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
       (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epi64(__mmask8 __U, __m512d __A) {
   return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
       (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
       _MM_FROUND_CUR_DIRECTION));
@@ -125,20 +128,21 @@ _mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
 
 // 512 bit : Double -> ULong
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epu64(__m512d __A) {
   return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
       (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
   return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
       (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epu64(__mmask8 __U, __m512d __A) {
   return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
       (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
       _MM_FROUND_CUR_DIRECTION));
@@ -160,20 +164,20 @@ _mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
       (const int)(__R)))
 
 // 512 bit: Float -> int
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi32(__m512 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
       (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
+_mm512_mask_cvtts_ps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
       (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
+_mm512_maskz_cvtts_ps_epi32(__mmask16 __U, __m512 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
       (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
       _MM_FROUND_CUR_DIRECTION));
@@ -195,20 +199,20 @@ _mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
       (__mmask16)(__U), (const int)(__R)))
 
 // 512 bit: Float -> uint
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu32(__m512 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
       (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
+_mm512_mask_cvtts_ps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
       (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
+_mm512_maskz_cvtts_ps_epu32(__mmask16 __U, __m512 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
       (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
       _MM_FROUND_CUR_DIRECTION));
@@ -230,20 +234,20 @@ _mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
       (__mmask16)(__U), (const int)(__R)))
 
 // 512 bit : float -> long
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi64(__m256 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
       (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+_mm512_mask_cvtts_ps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
       (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
+_mm512_maskz_cvtts_ps_epi64(__mmask8 __U, __m256 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
       (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
       _MM_FROUND_CUR_DIRECTION));
@@ -265,20 +269,20 @@ _mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
       (const int)(__R)))
 
 // 512 bit : float -> ulong
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu64(__m256 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
       (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
       _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+_mm512_mask_cvtts_ps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
       (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epu64(__mmask8 __U, __m256 __A) {
+_mm512_maskz_cvtts_ps_epu64(__mmask8 __U, __m256 __A) {
   return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
       (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
       _MM_FROUND_CUR_DIRECTION));
diff --git a/clang/lib/Headers/avx10_2_512satcvtintrin.h b/clang/lib/Headers/avx10_2_512satcvtintrin.h
index 7f41deb5212c5..b58e3db8956d6 100644
--- a/clang/lib/Headers/avx10_2_512satcvtintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtintrin.h
@@ -14,286 +14,286 @@
 #ifndef __AVX10_2_512SATCVTINTRIN_H
 #define __AVX10_2_512SATCVTINTRIN_H
 
-#define _mm512_ipcvtbf16_epi8(A)                                               \
+#define _mm512_ipcvts_bf16_epi8(A)                                             \
   ((__m512i)__builtin_ia32_vcvtbf162ibs512((__v32bf)(__m512bh)(A)))
 
-#define _mm512_mask_ipcvtbf16_epi8(W, U, A)                                    \
+#define _mm512_mask_ipcvts_bf16_epi8(W, U, A)                                  \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvtbf16_epi8(A),      \
+                                       (__v32hi)_mm512_ipcvts_bf16_epi8(A),    \
                                        (__v32hi)(__m512i)(W)))
 
-#define _mm512_maskz_ipcvtbf16_epi8(U, A)                                      \
+#define _mm512_maskz_ipcvts_bf16_epi8(U, A)                                    \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvtbf16_epi8(A),      \
+                                       (__v32hi)_mm512_ipcvts_bf16_epi8(A),    \
                                        (__v32hi)_mm512_setzero_si512()))
 
-#define _mm512_ipcvtbf16_epu8(A)                                               \
+#define _mm512_ipcvts_bf16_epu8(A)                                             \
   ((__m512i)__builtin_ia32_vcvtbf162iubs512((__v32bf)(__m512bh)(A)))
 
-#define _mm512_mask_ipcvtbf16_epu8(W, U, A)                                    \
+#define _mm512_mask_ipcvts_bf16_epu8(W, U, A)                                  \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvtbf16_epu8(A),      \
+                                       (__v32hi)_mm512_ipcvts_bf16_epu8(A),    \
                                        (__v32hi)(__m512i)(W)))
 
-#define _mm512_maskz_ipcvtbf16_epu8(U, A)                                      \
+#define _mm512_maskz_ipcvts_bf16_epu8(U, A)                                    \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvtbf16_epu8(A),      \
+                                       (__v32hi)_mm512_ipcvts_bf16_epu8(A),    \
                                        (__v32hi)_mm512_setzero_si512()))
 
-#define _mm512_ipcvttbf16_epi8(A)                                              \
+#define _mm512_ipcvtts_bf16_epi8(A)                                            \
   ((__m512i)__builtin_ia32_vcvttbf162ibs512((__v32bf)(__m512bh)(A)))
 
-#define _mm512_mask_ipcvttbf16_epi8(W, U, A)                                   \
+#define _mm512_mask_ipcvtts_bf16_epi8(W, U, A)                                 \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvttbf16_epi8(A),     \
+                                       (__v32hi)_mm512_ipcvtts_bf16_epi8(A),   \
                                        (__v32hi)(__m512i)(W)))
 
-#define _mm512_maskz_ipcvttbf16_epi8(U, A)                                     \
+#define _mm512_maskz_ipcvtts_bf16_epi8(U, A)                                   \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvttbf16_epi8(A),     \
+                                       (__v32hi)_mm512_ipcvtts_bf16_epi8(A),   \
                                        (__v32hi)_mm512_setzero_si512()))
 
-#define _mm512_ipcvttbf16_epu8(A)                                              \
+#define _mm512_ipcvtts_bf16_epu8(A)                                            \
   ((__m512i)__builtin_ia32_vcvttbf162iubs512((__v32bf)(__m512bh)(A)))
 
-#define _mm512_mask_ipcvttbf16_epu8(W, U, A)                                   \
+#define _mm512_mask_ipcvtts_bf16_epu8(W, U, A)                                 \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvttbf16_epu8(A),     \
+                                       (__v32hi)_mm512_ipcvtts_bf16_epu8(A),   \
                                        (__v32hi)(__m512i)(W)))
 
-#define _mm512_maskz_ipcvttbf16_epu8(U, A)                                     \
+#define _mm512_maskz_ipcvtts_bf16_epu8(U, A)                                   \
   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U),                         \
-                                       (__v32hi)_mm512_ipcvttbf16_epu8(A),     \
+                                       (__v32hi)_mm512_ipcvtts_bf16_epu8(A),   \
                                        (__v32hi)_mm512_setzero_si512()))
 
-#define _mm512_ipcvtph_epi8(A)                                                 \
+#define _mm512_ipcvts_ph_epi8(A)                                               \
   ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
-#define _mm512_mask_ipcvtph_epi8(W, U, A)                                      \
+#define _mm512_mask_ipcvts_ph_epi8(W, U, A)                                    \
   ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
                                               (__v32hu)(W), (__mmask32)(U),    \
                                               _MM_FROUND_CUR_DIRECTION))
 
-#define _mm512_maskz_ipcvtph_epi8(U, A)                                        \
+#define _mm512_maskz_ipcvts_ph_epi8(U, A)                                      \
   ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
       (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
       _MM_FROUND_CUR_DIRECTION))
 
-#define _mm512_ipcvt_roundph_epi8(A, R)                                        \
+#define _mm512_ipcvts_roundph_epi8(A, R)                                       \
   ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
                   ...
[truncated]

@phoebewang
Copy link
Contributor Author

@e-kud
Copy link
Contributor

e-kud commented Mar 19, 2025

Went through the patch with my eyes, I'm not sure how useful it was but wasn't able to spot anything.

Here is GCC patch: https://gcc.gnu.org/pipermail/gcc-patches/2025-March/678227.html

I haven't found any changes regarding vcvt2ph2 and no other similar threads. Have I missed it? Or it will be next step?

@phoebewang
Copy link
Contributor Author

Went through the patch with my eyes, I'm not sure how useful it was but wasn't able to spot anything.

Here is GCC patch: https://gcc.gnu.org/pipermail/gcc-patches/2025-March/678227.html

I haven't found any changes regarding vcvt2ph2 and no other similar threads. Have I missed it? Or it will be next step?

No idea. @Rejur @jianghc724

@jianghc724
Copy link

Went through the patch with my eyes, I'm not sure how useful it was but wasn't able to spot anything.

Here is GCC patch: https://gcc.gnu.org/pipermail/gcc-patches/2025-March/678227.html

I haven't found any changes regarding vcvt2ph2 and no other similar threads. Have I missed it? Or it will be next step?

No idea. @Rejur @jianghc724

That is missing since it is not in satcvt, GCC will need to add that.

@Rejur
Copy link

Rejur commented Mar 20, 2025

Went through the patch with my eyes, I'm not sure how useful it was but wasn't able to spot anything.

Here is GCC patch: https://gcc.gnu.org/pipermail/gcc-patches/2025-March/678227.html

I haven't found any changes regarding vcvt2ph2 and no other similar threads. Have I missed it? Or it will be next step?

No idea. @Rejur @jianghc724

That is missing since it is not in satcvt, GCC will need to add that.

I have a question, do we need to modify VCVTBIASPH2BF8S's intrin name to cvtbiass_ph_bf8.

@phoebewang
Copy link
Contributor Author

phoebewang commented Mar 20, 2025

Went through the patch with my eyes, I'm not sure how useful it was but wasn't able to spot anything.

Here is GCC patch: https://gcc.gnu.org/pipermail/gcc-patches/2025-March/678227.html

I haven't found any changes regarding vcvt2ph2 and no other similar threads. Have I missed it? Or it will be next step?

No idea. @Rejur @jianghc724

That is missing since it is not in satcvt, GCC will need to add that.

I have a question, do we need to modify VCVTBIASPH2BF8S's intrin name to cvtbiass_ph_bf8.

Good question! Discussed with @jianghc724, we prefer to cvts_biasph_hf8

@jianghc724
Copy link

should be cvts_biasph_hf8 :)

@phoebewang
Copy link
Contributor Author

should be cvts_biasph_hf8 :)

Good catch! Proves moving the s ahead is wisdom :)

Copy link
Contributor

@e-kud e-kud left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@phoebewang phoebewang merged commit 19d2023 into llvm:main Mar 21, 2025
11 checks passed
@phoebewang phoebewang deleted the AVX10 branch March 21, 2025 03:00
@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 21, 2025

LLVM Buildbot has detected a new failure on builder openmp-offload-amdgpu-runtime running on omp-vega20-0 while building clang at step 7 "Add check check-offload".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/18087

Here is the relevant piece of the build log for the reference
Step 7 (Add check check-offload) failure: test (failure)
******************** TEST 'libomptarget :: amdgcn-amd-amdhsa :: offloading/gpupgo/pgo2.c' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/clang -fopenmp    -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -nogpulib -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib  -fopenmp-targets=amdgcn-amd-amdhsa /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c -o /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a -fprofile-generate
# executed command: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/clang -fopenmp -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -nogpulib -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib -fopenmp-targets=amdgcn-amd-amdhsa /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c -o /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a -fprofile-generate
# note: command had no output on stdout or stderr
# RUN: at line 2
env LLVM_PROFILE_FILE=pgo2.c.llvm.profraw      /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp 2>&1
# executed command: env LLVM_PROFILE_FILE=pgo2.c.llvm.profraw /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp
# note: command had no output on stdout or stderr
# RUN: at line 4
llvm-profdata show --all-functions --counts      pgo2.c.llvm.profraw | /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c      --check-prefix="LLVM-HOST"
# executed command: llvm-profdata show --all-functions --counts pgo2.c.llvm.profraw
# note: command had no output on stdout or stderr
# executed command: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c --check-prefix=LLVM-HOST
# note: command had no output on stdout or stderr
# RUN: at line 7
llvm-profdata show --all-functions --counts      amdgcn-amd-amdhsa.pgo2.c.llvm.profraw      | /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c --check-prefix="LLVM-DEVICE"
# executed command: llvm-profdata show --all-functions --counts amdgcn-amd-amdhsa.pgo2.c.llvm.profraw
# note: command had no output on stdout or stderr
# executed command: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c --check-prefix=LLVM-DEVICE
# note: command had no output on stdout or stderr
# RUN: at line 11
/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/clang -fopenmp    -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -nogpulib -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib  -fopenmp-targets=amdgcn-amd-amdhsa /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c -o /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a -fprofile-instr-generate
# executed command: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/clang -fopenmp -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test -I /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib -L /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -nogpulib -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib -fopenmp-targets=amdgcn-amd-amdhsa /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c -o /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./lib/libomptarget.devicertl.a -fprofile-instr-generate
# note: command had no output on stdout or stderr
# RUN: at line 12
env LLVM_PROFILE_FILE=pgo2.c.clang.profraw      /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp 2>&1
# executed command: env LLVM_PROFILE_FILE=pgo2.c.clang.profraw /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/runtimes/runtimes-bins/offload/test/amdgcn-amd-amdhsa/offloading/gpupgo/Output/pgo2.c.tmp
# note: command had no output on stdout or stderr
# RUN: at line 14
llvm-profdata show --all-functions --counts      pgo2.c.clang.profraw | /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c      --check-prefix="CLANG-HOST"
# executed command: llvm-profdata show --all-functions --counts pgo2.c.clang.profraw
# note: command had no output on stdout or stderr
# executed command: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c --check-prefix=CLANG-HOST
# note: command had no output on stdout or stderr
# RUN: at line 17
llvm-profdata show --all-functions --counts      amdgcn-amd-amdhsa.pgo2.c.clang.profraw |      /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c --check-prefix="CLANG-DEV"
# executed command: llvm-profdata show --all-functions --counts amdgcn-amd-amdhsa.pgo2.c.clang.profraw
# note: command had no output on stdout or stderr
# executed command: /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.build/./bin/FileCheck /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c --check-prefix=CLANG-DEV
# .---command stderr------------
# | /home/ompworker/bbot/openmp-offload-amdgpu-runtime/llvm.src/offload/test/offloading/gpupgo/pgo2.c:101:15: error: CLANG-DEV: expected string not found in input
# | // CLANG-DEV: Block counts: [11]
# |               ^
# | <stdin>:5:19: note: scanning from here
# |  Function count: 0
...

phoebewang added a commit to phoebewang/llvm-project that referenced this pull request Mar 27, 2025
- Add '_' after cvt[t]s intrinsics when 's' is for saturation;
- Add 's_' for all ipcvt[t] intrinsics since they are all saturation
ones;
- Move 's' after 'cvt' and add '_' after it for prior `biass`
intrinsics;

This is to solve potential confusion since 's' before a type usually
represents for scalar.

Synced with GCC folks and they will change in the same way.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

6 participants