Skip to content

Commit

Permalink
[AMDGPU][Clang] Skip adding noundef attribute to AMDGPU HIP device fu…
Browse files Browse the repository at this point in the history
…nctions

Change-Id: I9c9769ab4b5352dd43c37b42bd24835a03c623e1
  • Loading branch information
skc7 authored and amd-aakash committed Aug 27, 2022
1 parent ce57820 commit accf36c
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 18 deletions.
10 changes: 7 additions & 3 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2327,8 +2327,13 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
getLangOpts().Sanitize.has(SanitizerKind::Memory) ||
getLangOpts().Sanitize.has(SanitizerKind::Return);

// Enable noundef attribute based on codegen options and
// skip adding the attribute to HIP device functions.
bool EnableNoundefAttrs = CodeGenOpts.EnableNoundefAttrs &&
!(getLangOpts().HIP && getLangOpts().CUDAIsDevice);

// Determine if the return type could be partially undef
if (CodeGenOpts.EnableNoundefAttrs && HasStrictReturn) {
if (EnableNoundefAttrs && HasStrictReturn) {
if (!RetTy->isVoidType() && RetAI.getKind() != ABIArgInfo::Indirect &&
DetermineNoUndef(RetTy, getTypes(), DL, RetAI))
RetAttrs.addAttribute(llvm::Attribute::NoUndef);
Expand Down Expand Up @@ -2462,8 +2467,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
}

// Decide whether the argument we're handling could be partially undef
if (CodeGenOpts.EnableNoundefAttrs &&
DetermineNoUndef(ParamType, getTypes(), DL, AI)) {
if (EnableNoundefAttrs && DetermineNoUndef(ParamType, getTypes(), DL, AI)) {
Attrs.addAttribute(llvm::Attribute::NoUndef);
}

Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenCUDA/builtins-amdgcn.cu
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ __device__ void func(float *x);
// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(float addrspace(3)* [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false)
// CHECK-NEXT: store volatile float [[TMP4]], float* [[X_ASCAST]], align 4
// CHECK-NEXT: [[TMP5:%.*]] = load float*, float** [[SHARED_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @_Z4funcPf(float* noundef [[TMP5]]) #[[ATTR8:[0-9]+]]
// CHECK-NEXT: call void @_Z4funcPf(float* [[TMP5]]) #[[ATTR8:[0-9]+]]
// CHECK-NEXT: ret void
//
__global__ void test_ds_fmin_func(float src, float *__restrict shared) {
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenCUDA/lambda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@
// DEV-LABEL: define{{.*}} amdgpu_kernel void @_Z1gIZ12test_resolvevEUlvE_EvT_
// DEV: call void @_ZZ12test_resolvevENKUlvE_clEv
// DEV-LABEL: define internal void @_ZZ12test_resolvevENKUlvE_clEv
// DEV: call noundef i32 @_Z10overloadedIiET_v
// DEV-LABEL: define linkonce_odr noundef i32 @_Z10overloadedIiET_v
// DEV: call i32 @_Z10overloadedIiET_v
// DEV-LABEL: define linkonce_odr i32 @_Z10overloadedIiET_v
// DEV: ret i32 1

__device__ int a;
Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGenCUDA/unnamed-types.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@ __device__ float d1(float x) {
}

// DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
// DEVICE: define internal noundef float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
template <typename F>
__global__ void k0(float *p, F f) {
p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
}

// DEVICE: amdgpu_kernel void @_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_(
// DEVICE: define internal noundef float @_ZZ2f1PfENKUlfE_clEf(
// DEVICE: define internal noundef float @_ZZ2f1PfENKUlffE_clEff(
// DEVICE: define internal noundef float @_ZZ2f1PfENKUlfE0_clEf(
// DEVICE: define internal float @_ZZ2f1PfENKUlfE_clEf(
// DEVICE: define internal float @_ZZ2f1PfENKUlffE_clEff(
// DEVICE: define internal float @_ZZ2f1PfENKUlfE0_clEf(
template <typename F0, typename F1, typename F2>
__global__ void k1(float *p, F0 f0, F1 f1, F2 f2) {
p[0] = f0(p[0]) + f1(p[1], p[2]) + f2(p[3]);
Expand Down
10 changes: 5 additions & 5 deletions clang/test/CodeGenHIP/hipspv-addr-spaces.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,30 +25,30 @@ __device__ struct foo_t {
// Check literals are placed in address space 1 (CrossWorkGroup/__global).
// CHECK: @.str ={{.*}} unnamed_addr addrspace(1) constant

// CHECK: define{{.*}} spir_func noundef i32 addrspace(4)* @_Z3barPi(i32 addrspace(4)*
// CHECK: define{{.*}} spir_func i32 addrspace(4)* @_Z3barPi(i32 addrspace(4)*
__device__ int* bar(int *x) {
return x;
}

// CHECK: define{{.*}} spir_func noundef i32 addrspace(4)* @_Z5baz_dv()
// CHECK: define{{.*}} spir_func i32 addrspace(4)* @_Z5baz_dv()
__device__ int* baz_d() {
// CHECK: ret i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @d to i32 addrspace(4)*
return &d;
}

// CHECK: define{{.*}} spir_func noundef i32 addrspace(4)* @_Z5baz_cv()
// CHECK: define{{.*}} spir_func i32 addrspace(4)* @_Z5baz_cv()
__device__ int* baz_c() {
// CHECK: ret i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @c to i32 addrspace(4)*
return &c;
}

// CHECK: define{{.*}} spir_func noundef i32 addrspace(4)* @_Z5baz_sv()
// CHECK: define{{.*}} spir_func i32 addrspace(4)* @_Z5baz_sv()
__device__ int* baz_s() {
// CHECK: ret i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @s to i32 addrspace(4)*
return &s;
}

// CHECK: define{{.*}} spir_func noundef i8 addrspace(4)* @_Z3quzv()
// CHECK: define{{.*}} spir_func i8 addrspace(4)* @_Z3quzv()
__device__ const char* quz() {
return "abc";
}
4 changes: 2 additions & 2 deletions clang/test/CodeGenHIP/maybe_undef-attr-verify.hip
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
// CHECK-NEXT: [[TMP4:%.*]] = addrspacecast i32 addrspace(5)* [[TMP2:%.*]] to i32*
// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3:%.*]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = freeze i32 [[TMP5:%.*]]
// CHECK-NEXT: %call = call noundef i32 @_Z11__shfl_synciii(i32 noundef [[TMP6:%.*]], i32 noundef 64, i32 noundef 0) #4
// CHECK-NEXT: %call = call i32 @_Z11__shfl_synciii(i32 [[TMP6:%.*]], i32 64, i32 0) #4
// CHECK-NEXT: store i32 %call, i32* [[TMP4:%.*]], align 4
// CHECK-NEXT: ret void

// CHECK: define linkonce_odr noundef i32 @_Z11__shfl_synciii(i32 noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]], i32 noundef [[TMP3:%.*]])
// CHECK: define linkonce_odr i32 @_Z11__shfl_synciii(i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]])

#define __global__ __attribute__((global))
#define __device__ __attribute__((device))
Expand Down
38 changes: 38 additions & 0 deletions clang/test/CodeGenHIP/noundef-attr-verify.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip -fcuda-is-device -emit-llvm %s \
// RUN: -o - | FileCheck %s

#define __global__ __attribute__((global))
#define __device__ __attribute__((device))
#define WARP_SIZE 64

static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE;

__device__ static inline unsigned int __lane_id() {
return __builtin_amdgcn_mbcnt_hi(
-1, __builtin_amdgcn_mbcnt_lo(-1, 0));
}

__device__
inline
int __shfl(int var, int src_lane, int width = warpSize) {
int self = __lane_id();
int index = src_lane + (self & ~(width-1));
return __builtin_amdgcn_ds_bpermute(index<<2, var);
}

template <typename T>
static __device__
T __shfl_sync(unsigned mask, T val, int src_line, int width=WARP_SIZE)
{
return __shfl(val, src_line, width);
}

// CHECK-LABEL: @_Z13shufflekernelv(
// CHECK: call i32 @_ZL11__shfl_syncIiET_jS0_ii(i32 64, i32 %0, i32 0, i32 64)

__global__ void
shufflekernel()
{
int res, t;
res = __shfl_sync(WARP_SIZE, t, 0);
}
2 changes: 1 addition & 1 deletion clang/test/CodeGenHIP/unsafe-atomic-ops-gfx90a.hip
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#define __global__ __attribute__((global))
#define __device__ __attribute__((device))

// CHECK-LABEL: @_Z15unsafeAtomicAddPff(ptr noundef %addr, float noundef %value
// CHECK-LABEL: @_Z15unsafeAtomicAddPff(ptr %addr, float %value
__device__ inline float unsafeAtomicAdd(float* addr, float value) {
// CHECK: %[[ADDR_ADDR:.*]] = alloca ptr, align 8, addrspace(5)
// CHECK: %[[ADDR_ADDR_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ADDR_ADDR]] to ptr
Expand Down

0 comments on commit accf36c

Please sign in to comment.