|
3 | 3 | ; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx810 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
|
4 | 4 | ; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx900 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
|
5 | 5 | ; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1010 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
|
| 6 | +; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1100 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s |
6 | 7 |
|
7 | 8 | define amdgpu_ps half @image_sample_2d_fptrunc_to_d16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
|
8 | 9 | ; GFX7-LABEL: @image_sample_2d_fptrunc_to_d16(
|
@@ -121,6 +122,49 @@ main_body:
|
121 | 122 | ret half %addf_sum.2
|
122 | 123 | }
|
123 | 124 |
|
| 125 | +define void @image_sample_2d_multi_fptrunc_to_d16(<8 x i32> %surf_desc, <4 x i32> %samp, float %u, float %v, ptr addrspace(7) %out) { |
| 126 | +; GFX7-LABEL: @image_sample_2d_multi_fptrunc_to_d16( |
| 127 | +; GFX7-NEXT: main_body: |
| 128 | +; GFX7-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| 129 | +; GFX7-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0 |
| 130 | +; GFX7-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half |
| 131 | +; GFX7-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1 |
| 132 | +; GFX7-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half |
| 133 | +; GFX7-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2 |
| 134 | +; GFX7-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to half |
| 135 | +; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]] |
| 136 | +; GFX7-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]] |
| 137 | +; GFX7-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2 |
| 138 | +; GFX7-NEXT: ret void |
| 139 | +; |
| 140 | +; GFX81PLUS-LABEL: @image_sample_2d_multi_fptrunc_to_d16( |
| 141 | +; GFX81PLUS-NEXT: main_body: |
| 142 | +; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| 143 | +; GFX81PLUS-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0 |
| 144 | +; GFX81PLUS-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half |
| 145 | +; GFX81PLUS-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1 |
| 146 | +; GFX81PLUS-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half |
| 147 | +; GFX81PLUS-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2 |
| 148 | +; GFX81PLUS-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to half |
| 149 | +; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]] |
| 150 | +; GFX81PLUS-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]] |
| 151 | +; GFX81PLUS-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2 |
| 152 | +; GFX81PLUS-NEXT: ret void |
| 153 | +; |
| 154 | +main_body: |
| 155 | + %sample = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %u, float %v, <8 x i32> %surf_desc, <4 x i32> %samp, i1 false, i32 0, i32 0) |
| 156 | + %e0 = extractelement <4 x float> %sample, i32 0 |
| 157 | + %h0 = fptrunc float %e0 to half |
| 158 | + %e1 = extractelement <4 x float> %sample, i32 1 |
| 159 | + %h1 = fptrunc float %e1 to half |
| 160 | + %e2 = extractelement <4 x float> %sample, i32 2 |
| 161 | + %h2 = fptrunc float %e2 to half |
| 162 | + %mul = fmul half %h0, %h1 |
| 163 | + %res = fadd half %mul, %h2 |
| 164 | + store half %res, ptr addrspace(7) %out, align 2 |
| 165 | + ret void |
| 166 | +} |
| 167 | + |
124 | 168 | define amdgpu_ps half @image_gather4_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
|
125 | 169 | ; GFX7-LABEL: @image_gather4_2d_v4f32(
|
126 | 170 | ; GFX7-NEXT: main_body:
|
|
0 commit comments