Add missing intrinsics to cuda headers #143664

vitor1001 · 2025-06-11T08:25:40Z

LLVM prevents the sm_32_intrinsics.hpp header from being included with a #define SM_32_INTRINSICS_HPP. It also provides drop-in replacements of the functions defined in the CUDA header.

One issue is that some intrinsics were added after the replacement was written, and thus have no replacement, breaking code that calls them (Raft is one example).

This CL backport the code from sm_32_intrinsics.hpp for the missing intrinsics.

github-actions · 2025-06-11T08:25:58Z

Thank you for submitting a Pull Request (PR) to the LLVM Project!

This PR will be automatically labeled and the relevant teams will be notified.

If you wish to, you can add reviewers by using the "Reviewers" section on this page.

If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using @ followed by their GitHub username.

If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers.

If you have further questions, they may be answered by the LLVM GitHub User Guide.

You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums.

llvmbot · 2025-06-11T08:26:36Z

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: None (vitor1001)

Changes

LLVM prevents the sm_32_intrinsics.hpp header from being included with a #define SM_32_INTRINSICS_HPP. It also provides drop-in replacements of the functions defined in the CUDA header.

One issue is that some intrinsics were added after the replacement was written, and thus have no replacement, breaking code that calls them (Raft is one example).

This CL backport the code from sm_32_intrinsics.hpp for the missing intrinsics.

Full diff: https://github.com/llvm/llvm-project/pull/143664.diff

1 Files Affected:

(modified) clang/lib/Headers/__clang_cuda_intrinsics.h (+434)

diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h
index 8b230af6f6647..96f4f18d99128 100644
--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -479,6 +479,440 @@ inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,
   return ret;
 }
 
+inline __device__ char __ldcg(const char *ptr) {
+  unsigned int ret;
+  asm("ld.global.cg.s8 %0, [%1];" : "=r"(ret) : "l"(ptr));
+  return (char)ret;
+}
+inline __device__ signed char __ldcg(const signed char *ptr) {
+  unsigned int ret;
+  asm("ld.global.cg.s8 %0, [%1];" : "=r"(ret) : "l"(ptr));
+  return (signed char)ret;
+}
+inline __device__ short __ldcg(const short *ptr) {
+  unsigned short ret;
+  asm("ld.global.cg.s16 %0, [%1];" : "=h"(ret) : "l"(ptr));
+  return (short)ret;
+}
+inline __device__ int __ldcg(const int *ptr) {
+  unsigned int ret;
+  asm("ld.global.cg.s32 %0, [%1];" : "=r"(ret) : "l"(ptr));
+  return (int)ret;
+}
+inline __device__ long long __ldcg(const long long *ptr) {
+  unsigned long long ret;
+  asm("ld.global.cg.s64 %0, [%1];" : "=l"(ret) : "l"(ptr));
+  return (long long)ret;
+}
+inline __device__ char2 __ldcg(const char2 *ptr) {
+  char2 ret;
+  int2 tmp;
+  asm("ld.global.cg.v2.s8 {%0,%1}, [%2];"
+      : "=r"(tmp.x), "=r"(tmp.y)
+      : "l"(ptr));
+  ret.x = (char)tmp.x;
+  ret.y = (char)tmp.y;
+  return ret;
+}
+inline __device__ char4 __ldcg(const char4 *ptr) {
+  char4 ret;
+  int4 tmp;
+  asm("ld.global.cg.v4.s8 {%0,%1,%2,%3}, [%4];"
+      : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w)
+      : "l"(ptr));
+  ret.x = (char)tmp.x;
+  ret.y = (char)tmp.y;
+  ret.z = (char)tmp.z;
+  ret.w = (char)tmp.w;
+  return ret;
+}
+inline __device__ short2 __ldcg(const short2 *ptr) {
+  short2 ret;
+  asm("ld.global.cg.v2.s16 {%0,%1}, [%2];"
+      : "=h"(ret.x), "=h"(ret.y)
+      : "l"(ptr));
+  return ret;
+}
+inline __device__ short4 __ldcg(const short4 *ptr) {
+  short4 ret;
+  asm("ld.global.cg.v4.s16 {%0,%1,%2,%3}, [%4];"
+      : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w)
+      : "l"(ptr));
+  return ret;
+}
+inline __device__ int2 __ldcg(const int2 *ptr) {
+  int2 ret;
+  asm("ld.global.cg.v2.s32 {%0,%1}, [%2];"
+      : "=r"(ret.x), "=r"(ret.y)
+      : "l"(ptr));
+  return ret;
+}
+inline __device__ int4 __ldcg(const int4 *ptr) {
+  int4 ret;
+  asm("ld.global.cg.v4.s32 {%0,%1,%2,%3}, [%4];"
+      : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w)
+      : "l"(ptr));
+  return ret;
+}
+inline __device__ longlong2 __ldcg(const longlong2 *ptr) {
+  longlong2 ret;
+  asm("ld.global.cg.v2.s64 {%0,%1}, [%2];"
+      : "=l"(ret.x), "=l"(ret.y)
+      : "l"(ptr));
+  return ret;
+}
+
+inline __device__ unsigned char __ldcg(const unsigned char *ptr) {
+  unsigned int ret;
+  asm("ld.global.cg.u8 %0, [%1];" : "=r"(ret) : "l"(ptr));
+  return (unsigned char)ret;
+}
+inline __device__ unsigned short __ldcg(const unsigned short *ptr) {
+  unsigned short ret;
+  asm("ld.global.cg.u16 %0, [%1];" : "=h"(ret) : "l"(ptr));
+  return ret;
+}
+inline __device__ unsigned int __ldcg(const unsigned int *ptr) {
+  unsigned int ret;
+  asm("ld.global.cg.u32 %0, [%1];" : "=r"(ret) : "l"(ptr));
+  return ret;
+}
+inline __device__ unsigned long long __ldcg(const unsigned long long *ptr) {
+  unsigned long long ret;
+  asm("ld.global.cg.u64 %0, [%1];" : "=l"(ret) : "l"(ptr));
+  return ret;
+}
+inline __device__ uchar2 __ldcg(const uchar2 *ptr) {
+  uchar2 ret;
+  uint2 tmp;
+  asm("ld.global.cg.v2.u8 {%0,%1}, [%2];"
+      : "=r"(tmp.x), "=r"(tmp.y)
+      : "l"(ptr));
+  ret.x = (unsigned char)tmp.x;
+  ret.y = (unsigned char)tmp.y;
+  return ret;
+}
+inline __device__ uchar4 __ldcg(const uchar4 *ptr) {
+  uchar4 ret;
+  uint4 tmp;
+  asm("ld.global.cg.v4.u8 {%0,%1,%2,%3}, [%4];"
+      : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w)
+      : "l"(ptr));
+  ret.x = (unsigned char)tmp.x;
+  ret.y = (unsigned char)tmp.y;
+  ret.z = (unsigned char)tmp.z;
+  ret.w = (unsigned char)tmp.w;
+  return ret;
+}
+inline __device__ ushort2 __ldcg(const ushort2 *ptr) {
+  ushort2 ret;
+  asm("ld.global.cg.v2.u16 {%0,%1}, [%2];"
+      : "=h"(ret.x), "=h"(ret.y)
+      : "l"(ptr));
+  return ret;
+}
+inline __device__ ushort4 __ldcg(const ushort4 *ptr) {
+  ushort4 ret;
+  asm("ld.global.cg.v4.u16 {%0,%1,%2,%3}, [%4];"
+      : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w)
+      : "l"(ptr));
+  return ret;
+}
+inline __device__ uint2 __ldcg(const uint2 *ptr) {
+  uint2 ret;
+  asm("ld.global.cg.v2.u32 {%0,%1}, [%2];"
+      : "=r"(ret.x), "=r"(ret.y)
+      : "l"(ptr));
+  return ret;
+}
+inline __device__ uint4 __ldcg(const uint4 *ptr) {
+  uint4 ret;
+  asm("ld.global.cg.v4.u32 {%0,%1,%2,%3}, [%4];"
+      : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w)
+      : "l"(ptr));
+  return ret;
+}
+inline __device__ ulonglong2 __ldcg(const ulonglong2 *ptr) {
+  ulonglong2 ret;
+  asm("ld.global.cg.v2.u64 {%0,%1}, [%2];"
+      : "=l"(ret.x), "=l"(ret.y)
+      : "l"(ptr));
+  return ret;
+}
+
+inline __device__ float __ldcg(const float *ptr) {
+  float ret;
+  asm("ld.global.cg.f32 %0, [%1];" : "=f"(ret) : "l"(ptr));
+  return ret;
+}
+inline __device__ double __ldcg(const double *ptr) {
+  double ret;
+  asm("ld.global.cg.f64 %0, [%1];" : "=d"(ret) : "l"(ptr));
+  return ret;
+}
+inline __device__ float2 __ldcg(const float2 *ptr) {
+  float2 ret;
+  asm("ld.global.cg.v2.f32 {%0,%1}, [%2];"
+      : "=f"(ret.x), "=f"(ret.y)
+      : "l"(ptr));
+  return ret;
+}
+inline __device__ float4 __ldcg(const float4 *ptr) {
+  float4 ret;
+  asm("ld.global.cg.v4.f32 {%0,%1,%2,%3}, [%4];"
+      : "=f"(ret.x), "=f"(ret.y), "=f"(ret.z), "=f"(ret.w)
+      : "l"(ptr));
+  return ret;
+}
+inline __device__ double2 __ldcg(const double2 *ptr) {
+  double2 ret;
+  asm("ld.global.cg.v2.f64 {%0,%1}, [%2];"
+      : "=d"(ret.x), "=d"(ret.y)
+      : "l"(ptr));
+  return ret;
+}
+
+inline __device__ unsigned char __ldcv(const unsigned char *ptr) {
+  unsigned int ret;
+  asm("ld.global.cv.u8 %0, [%1];" : "=r"(ret) : "l"(ptr) : "memory");
+  return (unsigned char)ret;
+}
+inline __device__ unsigned short __ldcv(const unsigned short *ptr) {
+  unsigned short ret;
+  asm("ld.global.cv.u16 %0, [%1];" : "=h"(ret) : "l"(ptr) : "memory");
+  return ret;
+}
+inline __device__ unsigned int __ldcv(const unsigned int *ptr) {
+  unsigned int ret;
+  asm("ld.global.cv.u32 %0, [%1];" : "=r"(ret) : "l"(ptr) : "memory");
+  return ret;
+}
+inline __device__ unsigned long long __ldcv(const unsigned long long *ptr) {
+  unsigned long long ret;
+  asm("ld.global.cv.u64 %0, [%1];" : "=l"(ret) : "l"(ptr) : "memory");
+  return ret;
+}
+inline __device__ uchar2 __ldcv(const uchar2 *ptr) {
+  uchar2 ret;
+  uint2 tmp;
+  asm("ld.global.cv.v2.u8 {%0,%1}, [%2];"
+      : "=r"(tmp.x), "=r"(tmp.y)
+      : "l"(ptr)
+      : "memory");
+  ret.x = (unsigned char)tmp.x;
+  ret.y = (unsigned char)tmp.y;
+  return ret;
+}
+inline __device__ uchar4 __ldcv(const uchar4 *ptr) {
+  uchar4 ret;
+  uint4 tmp;
+  asm("ld.global.cv.v4.u8 {%0,%1,%2,%3}, [%4];"
+      : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w)
+      : "l"(ptr)
+      : "memory");
+  ret.x = (unsigned char)tmp.x;
+  ret.y = (unsigned char)tmp.y;
+  ret.z = (unsigned char)tmp.z;
+  ret.w = (unsigned char)tmp.w;
+  return ret;
+}
+inline __device__ ushort2 __ldcv(const ushort2 *ptr) {
+  ushort2 ret;
+  asm("ld.global.cv.v2.u16 {%0,%1}, [%2];"
+      : "=h"(ret.x), "=h"(ret.y)
+      : "l"(ptr)
+      : "memory");
+  return ret;
+}
+inline __device__ ushort4 __ldcv(const ushort4 *ptr) {
+  ushort4 ret;
+  asm("ld.global.cv.v4.u16 {%0,%1,%2,%3}, [%4];"
+      : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w)
+      : "l"(ptr)
+      : "memory");
+  return ret;
+}
+inline __device__ uint2 __ldcv(const uint2 *ptr) {
+  uint2 ret;
+  asm("ld.global.cv.v2.u32 {%0,%1}, [%2];"
+      : "=r"(ret.x), "=r"(ret.y)
+      : "l"(ptr)
+      : "memory");
+  return ret;
+}
+inline __device__ uint4 __ldcv(const uint4 *ptr) {
+  uint4 ret;
+  asm("ld.global.cv.v4.u32 {%0,%1,%2,%3}, [%4];"
+      : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w)
+      : "l"(ptr)
+      : "memory");
+  return ret;
+}
+inline __device__ ulonglong2 __ldcv(const ulonglong2 *ptr) {
+  ulonglong2 ret;
+  asm("ld.global.cv.v2.u64 {%0,%1}, [%2];"
+      : "=l"(ret.x), "=l"(ret.y)
+      : "l"(ptr)
+      : "memory");
+  return ret;
+}
+inline __device__ float __ldcv(const float *ptr) {
+  float ret;
+  asm("ld.global.cv.f32 %0, [%1];" : "=f"(ret) : "l"(ptr) : "memory");
+  return ret;
+}
+inline __device__ double __ldcv(const double *ptr) {
+  double ret;
+  asm("ld.global.cv.f64 %0, [%1];" : "=d"(ret) : "l"(ptr) : "memory");
+  return ret;
+}
+inline __device__ float2 __ldcv(const float2 *ptr) {
+  float2 ret;
+  asm("ld.global.cv.v2.f32 {%0,%1}, [%2];"
+      : "=f"(ret.x), "=f"(ret.y)
+      : "l"(ptr)
+      : "memory");
+  return ret;
+}
+inline __device__ float4 __ldcv(const float4 *ptr) {
+  float4 ret;
+  asm("ld.global.cv.v4.f32 {%0,%1,%2,%3}, [%4];"
+      : "=f"(ret.x), "=f"(ret.y), "=f"(ret.z), "=f"(ret.w)
+      : "l"(ptr)
+      : "memory");
+  return ret;
+}
+inline __device__ double2 __ldcv(const double2 *ptr) {
+  double2 ret;
+  asm("ld.global.cv.v2.f64 {%0,%1}, [%2];"
+      : "=d"(ret.x), "=d"(ret.y)
+      : "l"(ptr)
+      : "memory");
+  return ret;
+}
+
+inline __device__ void __stwt(char *ptr, char value) {
+  asm("st.global.wt.s8 [%0], %1;" ::"l"(ptr), "r"((int)value) : "memory");
+}
+inline __device__ void __stwt(signed char *ptr, signed char value) {
+  asm("st.global.wt.s8 [%0], %1;" ::"l"(ptr), "r"((int)value) : "memory");
+}
+inline __device__ void __stwt(short *ptr, short value) {
+  asm("st.global.wt.s16 [%0], %1;" ::"l"(ptr), "h"(value) : "memory");
+}
+inline __device__ void __stwt(int *ptr, int value) {
+  asm("st.global.wt.s32 [%0], %1;" ::"l"(ptr), "r"(value) : "memory");
+}
+inline __device__ void __stwt(long long *ptr, long long value) {
+  asm("st.global.wt.s64 [%0], %1;" ::"l"(ptr), "l"(value) : "memory");
+}
+inline __device__ void __stwt(char2 *ptr, char2 value) {
+  const int x = value.x, y = value.y;
+  asm("st.global.wt.v2.s8 [%0], {%1,%2};" ::"l"(ptr), "r"(x), "r"(y)
+      : "memory");
+}
+inline __device__ void __stwt(char4 *ptr, char4 value) {
+  const int x = value.x, y = value.y, z = value.z, w = value.w;
+  asm("st.global.wt.v4.s8 [%0], {%1,%2,%3,%4};" ::"l"(ptr), "r"(x), "r"(y),
+      "r"(z), "r"(w)
+      : "memory");
+}
+inline __device__ void __stwt(short2 *ptr, short2 value) {
+  asm("st.global.wt.v2.s16 [%0], {%1,%2};" ::"l"(ptr), "h"(value.x),
+      "h"(value.y)
+      : "memory");
+}
+inline __device__ void __stwt(short4 *ptr, short4 value) {
+  asm("st.global.wt.v4.s16 [%0], {%1,%2,%3,%4};" ::"l"(ptr), "h"(value.x),
+      "h"(value.y), "h"(value.z), "h"(value.w)
+      : "memory");
+}
+inline __device__ void __stwt(int2 *ptr, int2 value) {
+  asm("st.global.wt.v2.s32 [%0], {%1,%2};" ::"l"(ptr), "r"(value.x),
+      "r"(value.y)
+      : "memory");
+}
+inline __device__ void __stwt(int4 *ptr, int4 value) {
+  asm("st.global.wt.v4.s32 [%0], {%1,%2,%3,%4};" ::"l"(ptr), "r"(value.x),
+      "r"(value.y), "r"(value.z), "r"(value.w)
+      : "memory");
+}
+inline __device__ void __stwt(longlong2 *ptr, longlong2 value) {
+  asm("st.global.wt.v2.s64 [%0], {%1,%2};" ::"l"(ptr), "l"(value.x),
+      "l"(value.y)
+      : "memory");
+}
+inline __device__ void __stwt(unsigned char *ptr, unsigned char value) {
+  asm("st.global.wt.u8 [%0], %1;" ::"l"(ptr), "r"((int)value) : "memory");
+}
+inline __device__ void __stwt(unsigned short *ptr, unsigned short value) {
+  asm("st.global.wt.u16 [%0], %1;" ::"l"(ptr), "h"(value) : "memory");
+}
+inline __device__ void __stwt(unsigned int *ptr, unsigned int value) {
+  asm("st.global.wt.u32 [%0], %1;" ::"l"(ptr), "r"(value) : "memory");
+}
+inline __device__ void __stwt(unsigned long long *ptr,
+                              unsigned long long value) {
+  asm("st.global.wt.u64 [%0], %1;" ::"l"(ptr), "l"(value) : "memory");
+}
+inline __device__ void __stwt(uchar2 *ptr, uchar2 value) {
+  const int x = value.x, y = value.y;
+  asm("st.global.wt.v2.u8 [%0], {%1,%2};" ::"l"(ptr), "r"(x), "r"(y)
+      : "memory");
+}
+inline __device__ void __stwt(uchar4 *ptr, uchar4 value) {
+  const int x = value.x, y = value.y, z = value.z, w = value.w;
+  asm("st.global.wt.v4.u8 [%0], {%1,%2,%3,%4};" ::"l"(ptr), "r"(x), "r"(y),
+      "r"(z), "r"(w)
+      : "memory");
+}
+inline __device__ void __stwt(ushort2 *ptr, ushort2 value) {
+  asm("st.global.wt.v2.u16 [%0], {%1,%2};" ::"l"(ptr), "h"(value.x),
+      "h"(value.y)
+      : "memory");
+}
+inline __device__ void __stwt(ushort4 *ptr, ushort4 value) {
+  asm("st.global.wt.v4.u16 [%0], {%1,%2,%3,%4};" ::"l"(ptr), "h"(value.x),
+      "h"(value.y), "h"(value.z), "h"(value.w)
+      : "memory");
+}
+inline __device__ void __stwt(uint2 *ptr, uint2 value) {
+  asm("st.global.wt.v2.u32 [%0], {%1,%2};" ::"l"(ptr), "r"(value.x),
+      "r"(value.y)
+      : "memory");
+}
+inline __device__ void __stwt(uint4 *ptr, uint4 value) {
+  asm("st.global.wt.v4.u32 [%0], {%1,%2,%3,%4};" ::"l"(ptr), "r"(value.x),
+      "r"(value.y), "r"(value.z), "r"(value.w)
+      : "memory");
+}
+inline __device__ void __stwt(ulonglong2 *ptr, ulonglong2 value) {
+  asm("st.global.wt.v2.u64 [%0], {%1,%2};" ::"l"(ptr), "l"(value.x),
+      "l"(value.y)
+      : "memory");
+}
+inline __device__ void __stwt(float *ptr, float value) {
+  asm("st.global.wt.f32 [%0], %1;" ::"l"(ptr), "f"(value) : "memory");
+}
+inline __device__ void __stwt(double *ptr, double value) {
+  asm("st.global.wt.f64 [%0], %1;" ::"l"(ptr), "d"(value) : "memory");
+}
+inline __device__ void __stwt(float2 *ptr, float2 value) {
+  asm("st.global.wt.v2.f32 [%0], {%1,%2};" ::"l"(ptr), "f"(value.x),
+      "f"(value.y)
+      : "memory");
+}
+inline __device__ void __stwt(float4 *ptr, float4 value) {
+  asm("st.global.wt.v4.f32 [%0], {%1,%2,%3,%4};" ::"l"(ptr), "f"(value.x),
+      "f"(value.y), "f"(value.z), "f"(value.w)
+      : "memory");
+}
+inline __device__ void __stwt(double2 *ptr, double2 value) {
+  asm("st.global.wt.v2.f64 [%0], {%1,%2};" ::"l"(ptr), "d"(value.x),
+      "d"(value.y)
+      : "memory");
+}
+
 #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
 
 #if CUDA_VERSION >= 11000

rnk · 2025-06-11T21:12:06Z

Erich, is there a good point-of-contact at nvidia who can review CUDA intrinsic header changes in the future?

erichkeane · 2025-06-11T21:49:31Z

Erich, is there a good point-of-contact at nvidia who can review CUDA intrinsic header changes in the future?

For the most part, we don't have ANYTHING to do with CUDA in clang as far as I know. I also don't know much about CUDA, but I'll ping an internal slack channel to see if there is anyone who can help.

github-actions · 2025-06-12T18:10:37Z

✅ With the latest revision this PR passed the C/C++ code formatter.

Artem-B

Nice. I like this approach better. There are few more things to polish up, but it looks good overall.

clang/lib/Headers/__clang_cuda_intrinsics.h

vitor1001 · 2025-06-13T08:16:27Z

Thanks, good suggestions, all done.

clang/lib/Headers/__clang_cuda_intrinsics.h

Artem-B

LGTM with one last nit.

vitor1001 · 2025-06-15T14:29:36Z

LGTM with one last nit.

Thanks, all temporary variables (tmp, ptr and value) are now prefixed with __.

rnk

@erichkeane, thanks for looking into it, no worries if nobody gets back. I think we should go ahead and land this in the mean time.

LLVM prevents the sm_32_intrinsics.hpp header from being included with a #define __SM_32_INTRINSICS_HPP__. It also provides drop-in replacements of the functions defined in the CUDA header. One issue is that some intrinsics were added after the replacement was written, and thus have no replacement, breaking code that calls them (Raft is one example). This CL backport the code from sm_32_intrinsics.hpp for the missing intrinsics.

github-actions · 2025-06-17T17:53:13Z

@vitor1001 Congratulations on having your first Pull Request (PR) merged into the LLVM Project!

Your changes will be combined with recent changes from other authors, then tested by our build bots. If there is a problem with a build, you may receive a report in an email or a comment on this PR.

Please check whether problems have been caused by your change specifically, as the builds can include changes from many authors. It is not uncommon for your change to be included in a build that fails due to someone else's changes, or infrastructure issues.

How to do this, and the rest of the post-merge process, is covered in detail here.

If your change does cause a problem, it may be reverted, or you can revert it yourself. This is a normal part of LLVM development. You can fix your changes and open a new PR to merge them again.

If you don't get any reports, no action is required from you. Your changes are working as expected, well done!

vitor1001 marked this pull request as ready for review June 11, 2025 08:26

llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Jun 11, 2025

Artem-B self-requested a review June 11, 2025 17:52

Artem-B approved these changes Jun 11, 2025

View reviewed changes

rnk requested a review from erichkeane June 11, 2025 21:11

vitor1001 force-pushed the main branch 2 times, most recently from 78ac958 to f841892 Compare June 12, 2025 15:47

Artem-B requested changes Jun 12, 2025

View reviewed changes

clang/lib/Headers/__clang_cuda_intrinsics.h Outdated Show resolved Hide resolved

clang/lib/Headers/__clang_cuda_intrinsics.h Outdated Show resolved Hide resolved

vitor1001 force-pushed the main branch from f841892 to 079672a Compare June 13, 2025 08:15

vitor1001 requested a review from Artem-B June 13, 2025 08:17

Artem-B reviewed Jun 13, 2025

View reviewed changes

clang/lib/Headers/__clang_cuda_intrinsics.h Outdated Show resolved Hide resolved

Artem-B approved these changes Jun 13, 2025

View reviewed changes

vitor1001 force-pushed the main branch from 079672a to dd356e9 Compare June 15, 2025 14:28

rnk approved these changes Jun 16, 2025

View reviewed changes

vitor1001 force-pushed the main branch from dd356e9 to 539e694 Compare June 17, 2025 06:28

Artem-B merged commit b876b3f into llvm:main Jun 17, 2025
7 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Add missing intrinsics to cuda headers #143664

Add missing intrinsics to cuda headers #143664

vitor1001 commented Jun 11, 2025

Uh oh!

github-actions bot commented Jun 11, 2025

Uh oh!

llvmbot commented Jun 11, 2025 •

edited

Loading

Uh oh!

rnk commented Jun 11, 2025

Uh oh!

erichkeane commented Jun 11, 2025

Uh oh!

github-actions bot commented Jun 12, 2025 •

edited

Loading

Uh oh!

Artem-B left a comment

Uh oh!

Uh oh!

Uh oh!

vitor1001 commented Jun 13, 2025

Uh oh!

Uh oh!

Artem-B left a comment

Uh oh!

vitor1001 commented Jun 15, 2025

Uh oh!

rnk left a comment

Uh oh!

Uh oh!

github-actions bot commented Jun 17, 2025

Uh oh!

Uh oh!

Add missing intrinsics to cuda headers #143664

Add missing intrinsics to cuda headers #143664

Conversation

vitor1001 commented Jun 11, 2025

Uh oh!

github-actions bot commented Jun 11, 2025

Uh oh!

llvmbot commented Jun 11, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

rnk commented Jun 11, 2025

Uh oh!

erichkeane commented Jun 11, 2025

Uh oh!

github-actions bot commented Jun 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Artem-B left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

vitor1001 commented Jun 13, 2025

Uh oh!

Uh oh!

Artem-B left a comment

Choose a reason for hiding this comment

Uh oh!

vitor1001 commented Jun 15, 2025

Uh oh!

rnk left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

github-actions bot commented Jun 17, 2025

Uh oh!

Uh oh!

llvmbot commented Jun 11, 2025 •

edited

Loading

github-actions bot commented Jun 12, 2025 •

edited

Loading