-
Notifications
You must be signed in to change notification settings - Fork 14.1k
[NFC][AMDGPU] Add D16 test for multiple fptrunc image sample #141771
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Harrison Hao (harrisonGPU) ChangesAdd a test for image sample with multiple fptrunc to half in support of #141758 Patch is 35.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141771.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll b/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
index 30431ad724843..a3624986efbd9 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
@@ -1,8 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx700 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX7 %s
; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx810 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
-; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx900 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
-; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1010 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx900 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX9 %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1010 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX10PLUS %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1100 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX11 %s
define amdgpu_ps half @image_sample_2d_fptrunc_to_d16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
; GFX7-LABEL: @image_sample_2d_fptrunc_to_d16(
@@ -16,6 +17,21 @@ define amdgpu_ps half @image_sample_2d_fptrunc_to_d16(<8 x i32> inreg %rsrc, <4
; GFX81PLUS-NEXT: [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
; GFX81PLUS-NEXT: ret half [[TEX]]
;
+; GFX9-LABEL: @image_sample_2d_fptrunc_to_d16(
+; GFX9-NEXT: main_body:
+; GFX9-NEXT: [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT: ret half [[TEX]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_fptrunc_to_d16(
+; GFX10PLUS-NEXT: main_body:
+; GFX10PLUS-NEXT: [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT: ret half [[TEX]]
+;
+; GFX11-LABEL: @image_sample_2d_fptrunc_to_d16(
+; GFX11-NEXT: main_body:
+; GFX11-NEXT: [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT: ret half [[TEX]]
+;
main_body:
%tex = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
%tex_half = fptrunc float %tex to half
@@ -40,6 +56,30 @@ define amdgpu_ps half @image_sample_2d_v2f32(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX81PLUS-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
; GFX81PLUS-NEXT: ret half [[ADDF_SUM_0]]
;
+; GFX9-LABEL: @image_sample_2d_v2f32(
+; GFX9-NEXT: main_body:
+; GFX9-NEXT: [[TEX:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32.v8i32.v4i32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT: [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX]], i64 0
+; GFX9-NEXT: [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX]], i64 1
+; GFX9-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT: ret half [[ADDF_SUM_0]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_v2f32(
+; GFX10PLUS-NEXT: main_body:
+; GFX10PLUS-NEXT: [[TEX:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32.v8i32.v4i32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT: [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT: [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT: ret half [[ADDF_SUM_0]]
+;
+; GFX11-LABEL: @image_sample_2d_v2f32(
+; GFX11-NEXT: main_body:
+; GFX11-NEXT: [[TEX:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32.v8i32.v4i32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT: [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX]], i64 0
+; GFX11-NEXT: [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX]], i64 1
+; GFX11-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT: ret half [[ADDF_SUM_0]]
+;
main_body:
%tex = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32.v8i32.v4i32(i32 3, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
%tex_2_half = fptrunc <2 x float> %tex to <2 x half>
@@ -71,6 +111,36 @@ define amdgpu_ps half @image_sample_2d_v3f32(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX81PLUS-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
; GFX81PLUS-NEXT: ret half [[ADDF_SUM_1]]
;
+; GFX9-LABEL: @image_sample_2d_v3f32(
+; GFX9-NEXT: main_body:
+; GFX9-NEXT: [[TEX:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32.v8i32.v4i32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT: [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX]], i64 0
+; GFX9-NEXT: [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX]], i64 1
+; GFX9-NEXT: [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX]], i64 2
+; GFX9-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
+; GFX9-NEXT: ret half [[ADDF_SUM_1]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_v3f32(
+; GFX10PLUS-NEXT: main_body:
+; GFX10PLUS-NEXT: [[TEX:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32.v8i32.v4i32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT: [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT: [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT: [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX]], i64 2
+; GFX10PLUS-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
+; GFX10PLUS-NEXT: ret half [[ADDF_SUM_1]]
+;
+; GFX11-LABEL: @image_sample_2d_v3f32(
+; GFX11-NEXT: main_body:
+; GFX11-NEXT: [[TEX:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32.v8i32.v4i32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT: [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX]], i64 0
+; GFX11-NEXT: [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX]], i64 1
+; GFX11-NEXT: [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX]], i64 2
+; GFX11-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
+; GFX11-NEXT: ret half [[ADDF_SUM_1]]
+;
main_body:
%tex = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
%tex_3_half = fptrunc <3 x float> %tex to <3 x half>
@@ -108,6 +178,42 @@ define amdgpu_ps half @image_sample_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX81PLUS-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
; GFX81PLUS-NEXT: ret half [[ADDF_SUM_2]]
;
+; GFX9-LABEL: @image_sample_2d_v4f32(
+; GFX9-NEXT: main_body:
+; GFX9-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX9-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX9-NEXT: [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX9-NEXT: [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX9-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX9-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX9-NEXT: ret half [[ADDF_SUM_2]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_v4f32(
+; GFX10PLUS-NEXT: main_body:
+; GFX10PLUS-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT: [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX10PLUS-NEXT: [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX10PLUS-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX10PLUS-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX10PLUS-NEXT: ret half [[ADDF_SUM_2]]
+;
+; GFX11-LABEL: @image_sample_2d_v4f32(
+; GFX11-NEXT: main_body:
+; GFX11-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX11-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX11-NEXT: [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX11-NEXT: [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX11-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX11-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX11-NEXT: ret half [[ADDF_SUM_2]]
+;
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
%tex_4_half = fptrunc <4 x float> %tex to <4 x half>
@@ -121,6 +227,91 @@ main_body:
ret half %addf_sum.2
}
+define void @image_sample_2d_multi_fptrunc_to_d16(<8 x i32> %surf_desc, <4 x i32> %samp, float %u, float %v, ptr addrspace(7) %out) {
+; GFX7-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX7-NEXT: main_body:
+; GFX7-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0)
+; GFX7-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX7-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half
+; GFX7-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX7-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half
+; GFX7-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX7-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to half
+; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
+; GFX7-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]]
+; GFX7-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX7-NEXT: ret void
+;
+; GFX81PLUS-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX81PLUS-NEXT: main_body:
+; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX81PLUS-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX81PLUS-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half
+; GFX81PLUS-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX81PLUS-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half
+; GFX81PLUS-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX81PLUS-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to half
+; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
+; GFX81PLUS-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]]
+; GFX81PLUS-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX81PLUS-NEXT: ret void
+;
+; GFX9-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX9-NEXT: main_body:
+; GFX9-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX9-NEXT: [[HALF_EXT2:%.*]] = fptrunc float [[E0]] to half
+; GFX9-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX9-NEXT: [[HALF_EXT1:%.*]] = fptrunc float [[E1]] to half
+; GFX9-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX9-NEXT: [[HALF_EXT:%.*]] = fptrunc float [[E2]] to half
+; GFX9-NEXT: [[MUL:%.*]] = fmul half [[HALF_EXT2]], [[HALF_EXT1]]
+; GFX9-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[HALF_EXT]]
+; GFX9-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX9-NEXT: ret void
+;
+; GFX10PLUS-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX10PLUS-NEXT: main_body:
+; GFX10PLUS-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX10PLUS-NEXT: [[HALF_EXT2:%.*]] = fptrunc float [[E0]] to half
+; GFX10PLUS-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX10PLUS-NEXT: [[HALF_EXT1:%.*]] = fptrunc float [[E1]] to half
+; GFX10PLUS-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX10PLUS-NEXT: [[HALF_EXT:%.*]] = fptrunc float [[E2]] to half
+; GFX10PLUS-NEXT: [[MUL:%.*]] = fmul half [[HALF_EXT2]], [[HALF_EXT1]]
+; GFX10PLUS-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[HALF_EXT]]
+; GFX10PLUS-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX10PLUS-NEXT: ret void
+;
+; GFX11-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX11-NEXT: main_body:
+; GFX11-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX11-NEXT: [[HALF_EXT2:%.*]] = fptrunc float [[E0]] to half
+; GFX11-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX11-NEXT: [[HALF_EXT1:%.*]] = fptrunc float [[E1]] to half
+; GFX11-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX11-NEXT: [[HALF_EXT:%.*]] = fptrunc float [[E2]] to half
+; GFX11-NEXT: [[MUL:%.*]] = fmul half [[HALF_EXT2]], [[HALF_EXT1]]
+; GFX11-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[HALF_EXT]]
+; GFX11-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX11-NEXT: ret void
+;
+main_body:
+ %sample = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %u, float %v, <8 x i32> %surf_desc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+ %e0 = extractelement <4 x float> %sample, i32 0
+ %h0 = fptrunc float %e0 to half
+ %e1 = extractelement <4 x float> %sample, i32 1
+ %h1 = fptrunc float %e1 to half
+ %e2 = extractelement <4 x float> %sample, i32 2
+ %h2 = fptrunc float %e2 to half
+ %mul = fmul half %h0, %h1
+ %res = fadd half %mul, %h2
+ store half %res, ptr addrspace(7) %out, align 2
+ ret void
+}
+
define amdgpu_ps half @image_gather4_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
; GFX7-LABEL: @image_gather4_2d_v4f32(
; GFX7-NEXT: main_body:
@@ -147,6 +338,42 @@ define amdgpu_ps half @image_gather4_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX81PLUS-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
; GFX81PLUS-NEXT: ret half [[ADDF_SUM_2]]
;
+; GFX9-LABEL: @image_gather4_2d_v4f32(
+; GFX9-NEXT: main_body:
+; GFX9-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.gather4.2d.v4f16.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX9-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX9-NEXT: [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX9-NEXT: [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX9-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX9-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX9-NEXT: ret half [[ADDF_SUM_2]]
+;
+; GFX10PLUS-LABEL: @image_gather4_2d_v4f32(
+; GFX10PLUS-NEXT: main_body:
+; GFX10PLUS-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.gather4.2d.v4f16.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT: [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX10PLUS-NEXT: [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX10PLUS-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX10PLUS-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX10PLUS-NEXT: ret half [[ADDF_SUM_2]]
+;
+; GFX11-LABEL: @image_gather4_2d_v4f32(
+; GFX11-NEXT: main_body:
+; GFX11-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.gather4.2d.v4f16.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX11-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX11-NEXT: [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX11-NEXT: [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX11-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX11-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX11-NEXT: ret half [[ADDF_SUM_2]]
+;
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16.v8i32.v4i32(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
%tex_4_half = fptrunc <4 x float> %tex to <4 x half>
@@ -169,6 +396,18 @@ define amdgpu_ps half @load_1d(i16 %s, <8 x i32> inreg %rsrc) {
; GFX81PLUS-LABEL: @load_1d(
; GFX81PLUS-NEXT: [[S_FLOAT:%.*]] = call half @llvm.amdgcn.image.load.1d.f16.i16.v8i32(i32 1, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
; GFX81PLUS-NEXT: ret half [[S_FLOAT]]
+;
+; GFX9-LABEL: @load_1d(
+; GFX9-NEXT: [[S_FLOAT:%.*]] = call half @llvm.amdgcn.image.load.1d.f16.i16.v8i32(i32 1, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; GFX9-NEXT: ret half [[S_FLOAT]]
+;
+; GFX10PLUS-LABEL: @load_1d(
+; GFX10PLUS-NEXT: [[...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Harrison Hao (harrisonGPU) ChangesAdd a test for image sample with multiple fptrunc to half in support of #141758 Patch is 35.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141771.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll b/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
index 30431ad724843..a3624986efbd9 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/image-d16.ll
@@ -1,8 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx700 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX7 %s
; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx810 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
-; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx900 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
-; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1010 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx900 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX9 %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1010 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX10PLUS %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1100 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX11 %s
define amdgpu_ps half @image_sample_2d_fptrunc_to_d16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
; GFX7-LABEL: @image_sample_2d_fptrunc_to_d16(
@@ -16,6 +17,21 @@ define amdgpu_ps half @image_sample_2d_fptrunc_to_d16(<8 x i32> inreg %rsrc, <4
; GFX81PLUS-NEXT: [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
; GFX81PLUS-NEXT: ret half [[TEX]]
;
+; GFX9-LABEL: @image_sample_2d_fptrunc_to_d16(
+; GFX9-NEXT: main_body:
+; GFX9-NEXT: [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT: ret half [[TEX]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_fptrunc_to_d16(
+; GFX10PLUS-NEXT: main_body:
+; GFX10PLUS-NEXT: [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT: ret half [[TEX]]
+;
+; GFX11-LABEL: @image_sample_2d_fptrunc_to_d16(
+; GFX11-NEXT: main_body:
+; GFX11-NEXT: [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT: ret half [[TEX]]
+;
main_body:
%tex = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
%tex_half = fptrunc float %tex to half
@@ -40,6 +56,30 @@ define amdgpu_ps half @image_sample_2d_v2f32(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX81PLUS-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
; GFX81PLUS-NEXT: ret half [[ADDF_SUM_0]]
;
+; GFX9-LABEL: @image_sample_2d_v2f32(
+; GFX9-NEXT: main_body:
+; GFX9-NEXT: [[TEX:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32.v8i32.v4i32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT: [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX]], i64 0
+; GFX9-NEXT: [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX]], i64 1
+; GFX9-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT: ret half [[ADDF_SUM_0]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_v2f32(
+; GFX10PLUS-NEXT: main_body:
+; GFX10PLUS-NEXT: [[TEX:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32.v8i32.v4i32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT: [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT: [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT: ret half [[ADDF_SUM_0]]
+;
+; GFX11-LABEL: @image_sample_2d_v2f32(
+; GFX11-NEXT: main_body:
+; GFX11-NEXT: [[TEX:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32.v8i32.v4i32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT: [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX]], i64 0
+; GFX11-NEXT: [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX]], i64 1
+; GFX11-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT: ret half [[ADDF_SUM_0]]
+;
main_body:
%tex = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32.v8i32.v4i32(i32 3, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
%tex_2_half = fptrunc <2 x float> %tex to <2 x half>
@@ -71,6 +111,36 @@ define amdgpu_ps half @image_sample_2d_v3f32(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX81PLUS-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
; GFX81PLUS-NEXT: ret half [[ADDF_SUM_1]]
;
+; GFX9-LABEL: @image_sample_2d_v3f32(
+; GFX9-NEXT: main_body:
+; GFX9-NEXT: [[TEX:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32.v8i32.v4i32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT: [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX]], i64 0
+; GFX9-NEXT: [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX]], i64 1
+; GFX9-NEXT: [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX]], i64 2
+; GFX9-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
+; GFX9-NEXT: ret half [[ADDF_SUM_1]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_v3f32(
+; GFX10PLUS-NEXT: main_body:
+; GFX10PLUS-NEXT: [[TEX:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32.v8i32.v4i32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT: [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT: [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT: [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX]], i64 2
+; GFX10PLUS-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
+; GFX10PLUS-NEXT: ret half [[ADDF_SUM_1]]
+;
+; GFX11-LABEL: @image_sample_2d_v3f32(
+; GFX11-NEXT: main_body:
+; GFX11-NEXT: [[TEX:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32.v8i32.v4i32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT: [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX]], i64 0
+; GFX11-NEXT: [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX]], i64 1
+; GFX11-NEXT: [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX]], i64 2
+; GFX11-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
+; GFX11-NEXT: ret half [[ADDF_SUM_1]]
+;
main_body:
%tex = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
%tex_3_half = fptrunc <3 x float> %tex to <3 x half>
@@ -108,6 +178,42 @@ define amdgpu_ps half @image_sample_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> in
; GFX81PLUS-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
; GFX81PLUS-NEXT: ret half [[ADDF_SUM_2]]
;
+; GFX9-LABEL: @image_sample_2d_v4f32(
+; GFX9-NEXT: main_body:
+; GFX9-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX9-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX9-NEXT: [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX9-NEXT: [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX9-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX9-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX9-NEXT: ret half [[ADDF_SUM_2]]
+;
+; GFX10PLUS-LABEL: @image_sample_2d_v4f32(
+; GFX10PLUS-NEXT: main_body:
+; GFX10PLUS-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT: [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX10PLUS-NEXT: [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX10PLUS-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX10PLUS-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX10PLUS-NEXT: ret half [[ADDF_SUM_2]]
+;
+; GFX11-LABEL: @image_sample_2d_v4f32(
+; GFX11-NEXT: main_body:
+; GFX11-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX11-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX11-NEXT: [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX11-NEXT: [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX11-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX11-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX11-NEXT: ret half [[ADDF_SUM_2]]
+;
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
%tex_4_half = fptrunc <4 x float> %tex to <4 x half>
@@ -121,6 +227,91 @@ main_body:
ret half %addf_sum.2
}
+define void @image_sample_2d_multi_fptrunc_to_d16(<8 x i32> %surf_desc, <4 x i32> %samp, float %u, float %v, ptr addrspace(7) %out) {
+; GFX7-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX7-NEXT: main_body:
+; GFX7-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0)
+; GFX7-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX7-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half
+; GFX7-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX7-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half
+; GFX7-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX7-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to half
+; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
+; GFX7-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]]
+; GFX7-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX7-NEXT: ret void
+;
+; GFX81PLUS-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX81PLUS-NEXT: main_body:
+; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX81PLUS-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX81PLUS-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half
+; GFX81PLUS-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX81PLUS-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half
+; GFX81PLUS-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX81PLUS-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to half
+; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
+; GFX81PLUS-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]]
+; GFX81PLUS-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX81PLUS-NEXT: ret void
+;
+; GFX9-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX9-NEXT: main_body:
+; GFX9-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX9-NEXT: [[HALF_EXT2:%.*]] = fptrunc float [[E0]] to half
+; GFX9-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX9-NEXT: [[HALF_EXT1:%.*]] = fptrunc float [[E1]] to half
+; GFX9-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX9-NEXT: [[HALF_EXT:%.*]] = fptrunc float [[E2]] to half
+; GFX9-NEXT: [[MUL:%.*]] = fmul half [[HALF_EXT2]], [[HALF_EXT1]]
+; GFX9-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[HALF_EXT]]
+; GFX9-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX9-NEXT: ret void
+;
+; GFX10PLUS-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX10PLUS-NEXT: main_body:
+; GFX10PLUS-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX10PLUS-NEXT: [[HALF_EXT2:%.*]] = fptrunc float [[E0]] to half
+; GFX10PLUS-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX10PLUS-NEXT: [[HALF_EXT1:%.*]] = fptrunc float [[E1]] to half
+; GFX10PLUS-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX10PLUS-NEXT: [[HALF_EXT:%.*]] = fptrunc float [[E2]] to half
+; GFX10PLUS-NEXT: [[MUL:%.*]] = fmul half [[HALF_EXT2]], [[HALF_EXT1]]
+; GFX10PLUS-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[HALF_EXT]]
+; GFX10PLUS-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX10PLUS-NEXT: ret void
+;
+; GFX11-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
+; GFX11-NEXT: main_body:
+; GFX11-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
+; GFX11-NEXT: [[HALF_EXT2:%.*]] = fptrunc float [[E0]] to half
+; GFX11-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
+; GFX11-NEXT: [[HALF_EXT1:%.*]] = fptrunc float [[E1]] to half
+; GFX11-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
+; GFX11-NEXT: [[HALF_EXT:%.*]] = fptrunc float [[E2]] to half
+; GFX11-NEXT: [[MUL:%.*]] = fmul half [[HALF_EXT2]], [[HALF_EXT1]]
+; GFX11-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[HALF_EXT]]
+; GFX11-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
+; GFX11-NEXT: ret void
+;
+main_body:
+ %sample = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %u, float %v, <8 x i32> %surf_desc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+ %e0 = extractelement <4 x float> %sample, i32 0
+ %h0 = fptrunc float %e0 to half
+ %e1 = extractelement <4 x float> %sample, i32 1
+ %h1 = fptrunc float %e1 to half
+ %e2 = extractelement <4 x float> %sample, i32 2
+ %h2 = fptrunc float %e2 to half
+ %mul = fmul half %h0, %h1
+ %res = fadd half %mul, %h2
+ store half %res, ptr addrspace(7) %out, align 2
+ ret void
+}
+
define amdgpu_ps half @image_gather4_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
; GFX7-LABEL: @image_gather4_2d_v4f32(
; GFX7-NEXT: main_body:
@@ -147,6 +338,42 @@ define amdgpu_ps half @image_gather4_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> i
; GFX81PLUS-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
; GFX81PLUS-NEXT: ret half [[ADDF_SUM_2]]
;
+; GFX9-LABEL: @image_gather4_2d_v4f32(
+; GFX9-NEXT: main_body:
+; GFX9-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.gather4.2d.v4f16.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX9-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX9-NEXT: [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX9-NEXT: [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX9-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX9-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX9-NEXT: ret half [[ADDF_SUM_2]]
+;
+; GFX10PLUS-LABEL: @image_gather4_2d_v4f32(
+; GFX10PLUS-NEXT: main_body:
+; GFX10PLUS-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.gather4.2d.v4f16.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX10PLUS-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX10PLUS-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX10PLUS-NEXT: [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX10PLUS-NEXT: [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX10PLUS-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX10PLUS-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX10PLUS-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX10PLUS-NEXT: ret half [[ADDF_SUM_2]]
+;
+; GFX11-LABEL: @image_gather4_2d_v4f32(
+; GFX11-NEXT: main_body:
+; GFX11-NEXT: [[TEX:%.*]] = call <4 x half> @llvm.amdgcn.image.gather4.2d.v4f16.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX11-NEXT: [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX11-NEXT: [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX11-NEXT: [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX11-NEXT: [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX11-NEXT: [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX11-NEXT: [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX11-NEXT: [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX11-NEXT: ret half [[ADDF_SUM_2]]
+;
main_body:
%tex = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16.v8i32.v4i32(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
%tex_4_half = fptrunc <4 x float> %tex to <4 x half>
@@ -169,6 +396,18 @@ define amdgpu_ps half @load_1d(i16 %s, <8 x i32> inreg %rsrc) {
; GFX81PLUS-LABEL: @load_1d(
; GFX81PLUS-NEXT: [[S_FLOAT:%.*]] = call half @llvm.amdgcn.image.load.1d.f16.i16.v8i32(i32 1, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
; GFX81PLUS-NEXT: ret half [[S_FLOAT]]
+;
+; GFX9-LABEL: @load_1d(
+; GFX9-NEXT: [[S_FLOAT:%.*]] = call half @llvm.amdgcn.image.load.1d.f16.i16.v8i32(i32 1, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; GFX9-NEXT: ret half [[S_FLOAT]]
+;
+; GFX10PLUS-LABEL: @load_1d(
+; GFX10PLUS-NEXT: [[...
[truncated]
|
; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1010 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX10PLUS %s | ||
; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1100 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX11 %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All 4 of these support d16, so what's the point of testing all of them? These at least need to use a common check prefix since the output is identical to the 81PLUS case
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I noticed that using update_test_checks.py produces a warning about using same check prefixes. I understand your point!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have updated it. Thanks!
4a7fe89
to
6deaf23
Compare
6deaf23
to
62a2e1c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What does "D16" mean?
|
It's the name of a bit on MIMG instructions |
Add a test for image sample with multiple fptrunc to half in support of #141758