Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 33 additions & 32 deletions amd/device-libs/ockl/src/image.cl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#define EII() __oclc_ISA_version != 9010

#define RATTR __attribute__((pure))
#define CRATTR __attribute__((pure, target("cube-insts")))
#define ERATTR __attribute__((pure, target("extended-image-insts")))
#define WATTR
#define GATTR __attribute__((const))
Expand Down Expand Up @@ -510,14 +511,14 @@ OCKL_MANGLE_T(image_load,3D)(TSHARP i, int4 c)
return my_image_load_3d_v4f32_i32(c.x, c.y, c.z, LOAD_TSHARP(i));
}

RATTR float4
OCKL_MANGLE_T(image_load,CM)(TSHARP i, int2 c, int f)
CRATTR float4
OCKL_MANGLE_T(image_load, CM)(TSHARP i, int2 c, int f)
{
return my_image_load_cube_v4f32_i32(c.x, c.y, f, LOAD_TSHARP(i));
}

RATTR float4
OCKL_MANGLE_T(image_load,CMa)(TSHARP i, int4 c, int f)
CRATTR float4
OCKL_MANGLE_T(image_load, CMa)(TSHARP i, int4 c, int f)
{
f = LS_ARRAY_FACE(c.z, f);
return my_image_load_cube_v4f32_i32(c.x, c.y, f, LOAD_TSHARP(i));
Expand Down Expand Up @@ -565,14 +566,14 @@ OCKL_MANGLE_T(image_load_lod,3D)(TSHARP i, int4 c, int l)
return my_image_load_mip_3d_v4f32_i32(c.x, c.y, c.z, l, LOAD_TSHARP(i));
}

RATTR float4
OCKL_MANGLE_T(image_load_lod,CM)(TSHARP i, int2 c, int f, int l)
CRATTR float4
OCKL_MANGLE_T(image_load_lod, CM)(TSHARP i, int2 c, int f, int l)
{
return my_image_load_mip_cube_v4f32_i32(c.x, c.y, f, l, LOAD_TSHARP(i));
}

RATTR float4
OCKL_MANGLE_T(image_load_lod,CMa)(TSHARP i, int4 c, int f, int l)
CRATTR float4
OCKL_MANGLE_T(image_load_lod, CMa)(TSHARP i, int4 c, int f, int l)
{
f = LS_ARRAY_FACE(c.z, f);
return my_image_load_mip_cube_v4f32_i32(c.x, c.y, f, l, LOAD_TSHARP(i));
Expand Down Expand Up @@ -614,14 +615,14 @@ OCKL_MANGLE_T(image_loadh,3D)(TSHARP i, int4 c)
return my_image_load_3d_v4f16_i32(c.x, c.y, c.z, LOAD_TSHARP(i));
}

RATTR half4
OCKL_MANGLE_T(image_loadh,CM)(TSHARP i, int2 c, int f)
CRATTR half4
OCKL_MANGLE_T(image_loadh, CM)(TSHARP i, int2 c, int f)
{
return my_image_load_cube_v4f16_i32(c.x, c.y, f, LOAD_TSHARP(i));
}

RATTR half4
OCKL_MANGLE_T(image_loadh,CMa)(TSHARP i, int4 c, int f)
CRATTR half4
OCKL_MANGLE_T(image_loadh, CMa)(TSHARP i, int4 c, int f)
{
f = LS_ARRAY_FACE(c.z, f);
return my_image_load_cube_v4f16_i32(c.x, c.y, f, LOAD_TSHARP(i));
Expand Down Expand Up @@ -657,14 +658,14 @@ OCKL_MANGLE_T(image_loadh_lod,3D)(TSHARP i, int4 c, int l)
return my_image_load_mip_3d_v4f16_i32(c.x, c.y, c.z, l, LOAD_TSHARP(i));
}

RATTR half4
OCKL_MANGLE_T(image_loadh_lod,CM)(TSHARP i, int2 c, int f, int l)
CRATTR half4
OCKL_MANGLE_T(image_loadh_lod, CM)(TSHARP i, int2 c, int f, int l)
{
return my_image_load_mip_cube_v4f16_i32(c.x, c.y, f, l, LOAD_TSHARP(i));
}

RATTR half4
OCKL_MANGLE_T(image_loadh_lod,CMa)(TSHARP i, int4 c, int f, int l)
CRATTR half4
OCKL_MANGLE_T(image_loadh_lod, CMa)(TSHARP i, int4 c, int f, int l)
{
f = LS_ARRAY_FACE(c.z, f);
return my_image_load_mip_cube_v4f16_i32(c.x, c.y, f, l, LOAD_TSHARP(i));
Expand Down Expand Up @@ -950,8 +951,8 @@ OCKL_MANGLE_T(image_sample,3D)(TSHARP i, SSHARP s, float4 c)
return my_image_sample_3d_v4f32_f32(c.x, c.y, c.z, LOAD_TSHARP(i), LOAD_SSHARP(s));
}

RATTR float4
OCKL_MANGLE_T(image_sample,CM)(TSHARP i, SSHARP s, float4 c)
CRATTR float4
OCKL_MANGLE_T(image_sample, CM)(TSHARP i, SSHARP s, float4 c)
{
CUBE_PREP(c);
if (EII())
Expand All @@ -960,8 +961,8 @@ OCKL_MANGLE_T(image_sample,CM)(TSHARP i, SSHARP s, float4 c)
return my_image_sample_cube_v4f32_f32(c.x, c.y, c.z, LOAD_TSHARP(i), LOAD_SSHARP(s));
}

RATTR float4
OCKL_MANGLE_T(image_sample,CMa)(TSHARP i, SSHARP s, float4 c)
CRATTR float4
OCKL_MANGLE_T(image_sample, CMa)(TSHARP i, SSHARP s, float4 c)
{
CUBE_PREP(c);
c.z = SAMPLE_ARRAY_FACE(c.w, c.z);
Expand Down Expand Up @@ -1068,15 +1069,15 @@ OCKL_MANGLE_T(image_sample_lod,3D)(TSHARP i, SSHARP s, float4 c, float l)
return my_image_sample_l_3d_v4f32_f32(c.x, c.y, c.z, l, LOAD_TSHARP(i), LOAD_SSHARP(s));
}

RATTR float4
OCKL_MANGLE_T(image_sample_lod,CM)(TSHARP i, SSHARP s, float4 c, float l)
CRATTR float4
OCKL_MANGLE_T(image_sample_lod, CM)(TSHARP i, SSHARP s, float4 c, float l)
{
CUBE_PREP(c);
return my_image_sample_l_cube_v4f32_f32(c.x, c.y, c.z, l, LOAD_TSHARP(i), LOAD_SSHARP(s));
}

RATTR float4
OCKL_MANGLE_T(image_sample_lod,CMa)(TSHARP i, SSHARP s, float4 c, float l)
CRATTR float4
OCKL_MANGLE_T(image_sample_lod, CMa)(TSHARP i, SSHARP s, float4 c, float l)
{
CUBE_PREP(c);
c.z = SAMPLE_ARRAY_FACE(c.w, c.z);
Expand Down Expand Up @@ -1135,8 +1136,8 @@ OCKL_MANGLE_T(image_sampleh,3D)(TSHARP i, SSHARP s, float4 c)
return my_image_sample_3d_v4f16_f32(c.x, c.y, c.z, LOAD_TSHARP(i), LOAD_SSHARP(s));
}

RATTR half4
OCKL_MANGLE_T(image_sampleh,CM)(TSHARP i, SSHARP s, float4 c)
CRATTR half4
OCKL_MANGLE_T(image_sampleh, CM)(TSHARP i, SSHARP s, float4 c)
{
CUBE_PREP(c);
if (EII())
Expand All @@ -1145,8 +1146,8 @@ OCKL_MANGLE_T(image_sampleh,CM)(TSHARP i, SSHARP s, float4 c)
return my_image_sample_cube_v4f16_f32(c.x, c.y, c.z, LOAD_TSHARP(i), LOAD_SSHARP(s));
}

RATTR half4
OCKL_MANGLE_T(image_sampleh,CMa)(TSHARP i, SSHARP s, float4 c)
CRATTR half4
OCKL_MANGLE_T(image_sampleh, CMa)(TSHARP i, SSHARP s, float4 c)
{
CUBE_PREP(c);
c.z = SAMPLE_ARRAY_FACE(c.w, c.z);
Expand Down Expand Up @@ -1225,15 +1226,15 @@ OCKL_MANGLE_T(image_sampleh_lod,3D)(TSHARP i, SSHARP s, float4 c, float l)
return my_image_sample_l_3d_v4f16_f32(c.x, c.y, c.z, l, LOAD_TSHARP(i), LOAD_SSHARP(s));
}

RATTR half4
OCKL_MANGLE_T(image_sampleh_lod,CM)(TSHARP i, SSHARP s, float4 c, float l)
CRATTR half4
OCKL_MANGLE_T(image_sampleh_lod, CM)(TSHARP i, SSHARP s, float4 c, float l)
{
CUBE_PREP(c);
return my_image_sample_l_cube_v4f16_f32(c.x, c.y, c.z, l, LOAD_TSHARP(i), LOAD_SSHARP(s));
}

RATTR half4
OCKL_MANGLE_T(image_sampleh_lod,CMa)(TSHARP i, SSHARP s, float4 c, float l)
CRATTR half4
OCKL_MANGLE_T(image_sampleh_lod, CMa)(TSHARP i, SSHARP s, float4 c, float l)
{
CUBE_PREP(c);
c.z = SAMPLE_ARRAY_FACE(c.w, c.z);
Expand Down
13 changes: 8 additions & 5 deletions amd/device-libs/ockl/src/media.cl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#pragma OPENCL EXTENSION cl_khr_fp16 : enable

#define CATTR __attribute__((const))
#define LCATTR __attribute__((const, target("lerp-inst")))
#define QCATTR __attribute__((const, target("qsad-insts")))
#define SCATTR __attribute__((const, target("sad-insts")))
#define AS_UCHAR4(X) __builtin_astype(X, uchar4)

CATTR uint
Expand Down Expand Up @@ -44,7 +47,7 @@ OCKL_MANGLE_U32(bytealign)(uint a, uint b, uint c)
return __builtin_amdgcn_alignbyte(a, b, c);
}

CATTR uint
LCATTR uint
OCKL_MANGLE_U32(lerp)(uint a, uint b, uint c)
{
return __builtin_amdgcn_lerp(a, b, c);
Expand Down Expand Up @@ -155,13 +158,13 @@ OCKL_MANGLE_U32(pack)(float4 a)
__builtin_amdgcn_cvt_pk_u8_f32(a.s0, 0, 0))));
}

CATTR ulong
QCATTR ulong
OCKL_MANGLE_U64(qsad)(ulong a, uint b, ulong c)
{
return __builtin_amdgcn_qsad_pk_u16_u8(a, b, c);
}

CATTR uint
SCATTR uint
OCKL_MANGLE_U32(sad)(uint a, uint b, uint c)
{
return __builtin_amdgcn_sad_u8(a, b, c);
Expand All @@ -174,13 +177,13 @@ OCKL_MANGLE_U32(sadd)(uint a, uint b, uint c)
return (a > b ? a : b) - (a < b ? a : b) + c;
}

CATTR uint
SCATTR uint
OCKL_MANGLE_U32(sadhi)(uint a, uint b, uint c)
{
return __builtin_amdgcn_sad_hi_u8(a, b, c);
}

CATTR uint
SCATTR uint
OCKL_MANGLE_U32(sadw)(uint a, uint b, uint c)
{
return __builtin_amdgcn_sad_u16(a, b, c);
Expand Down
22 changes: 11 additions & 11 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,13 @@ BUILTIN(__builtin_amdgcn_frexp_exp, "id", "nc")
BUILTIN(__builtin_amdgcn_frexp_expf, "if", "nc")
BUILTIN(__builtin_amdgcn_fract, "dd", "nc")
BUILTIN(__builtin_amdgcn_fractf, "ff", "nc")
BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc")
TARGET_BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc", "lerp-inst")
BUILTIN(__builtin_amdgcn_class, "bdi", "nc")
BUILTIN(__builtin_amdgcn_classf, "bfi", "nc")
BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc")
BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc")
BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc")
BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc")
TARGET_BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc", "cube-insts")
TARGET_BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc", "cube-insts")
TARGET_BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc", "cube-insts")
TARGET_BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc", "cube-insts")
BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n")
Expand All @@ -149,17 +149,17 @@ BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc")
BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc")
BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc", "cvt-pknorm-vop2-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc", "cvt-pknorm-vop2-insts")
BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fi", "nc")
BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "sad-insts")
TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "sad-insts")
TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "sad-insts")
TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "qsad-insts")
BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")

Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGen/link-builtin-bitcode.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in
// CHECK-LABEL: @attr_incompatible
// CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] {

// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" }
// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" }
// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" }
// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" }
// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" }
// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" }
// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" }
4 changes: 2 additions & 2 deletions clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ kernel void foo(global int *p) { *p = 1; }
// CHECK-NEXT: ret void
//
//.
// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" }
// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" }
// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" }
// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" }
// CHECK: attributes #[[ATTR2]] = { convergent nounwind }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
Expand Down
Loading