|
3 | 3 | ; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx810 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
|
4 | 4 | ; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx900 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
|
5 | 5 | ; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1010 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s
|
| 6 | +; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1100 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81PLUS %s |
6 | 7 |
|
7 | 8 | define amdgpu_ps half @image_sample_2d_fptrunc_to_d16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
|
8 | 9 | ; GFX7-LABEL: @image_sample_2d_fptrunc_to_d16(
|
@@ -121,6 +122,123 @@ main_body:
|
121 | 122 | ret half %addf_sum.2
|
122 | 123 | }
|
123 | 124 |
|
| 125 | +define amdgpu_ps half @image_sample_2d_multi_fptrunc_to_d16(<8 x i32> %surf_desc, <4 x i32> %samp, float %u, float %v) { |
| 126 | +; GFX7-LABEL: @image_sample_2d_multi_fptrunc_to_d16( |
| 127 | +; GFX7-NEXT: main_body: |
| 128 | +; GFX7-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| 129 | +; GFX7-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0 |
| 130 | +; GFX7-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half |
| 131 | +; GFX7-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1 |
| 132 | +; GFX7-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half |
| 133 | +; GFX7-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2 |
| 134 | +; GFX7-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to half |
| 135 | +; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]] |
| 136 | +; GFX7-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]] |
| 137 | +; GFX7-NEXT: ret half [[RES]] |
| 138 | +; |
| 139 | +; GFX81PLUS-LABEL: @image_sample_2d_multi_fptrunc_to_d16( |
| 140 | +; GFX81PLUS-NEXT: main_body: |
| 141 | +; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| 142 | +; GFX81PLUS-NEXT: [[H0:%.*]] = extractelement <3 x half> [[SAMPLE]], i64 0 |
| 143 | +; GFX81PLUS-NEXT: [[H1:%.*]] = extractelement <3 x half> [[SAMPLE]], i64 1 |
| 144 | +; GFX81PLUS-NEXT: [[H2:%.*]] = extractelement <3 x half> [[SAMPLE]], i64 2 |
| 145 | +; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]] |
| 146 | +; GFX81PLUS-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]] |
| 147 | +; GFX81PLUS-NEXT: ret half [[RES]] |
| 148 | +; |
| 149 | +main_body: |
| 150 | + %sample = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %u, float %v, <8 x i32> %surf_desc, <4 x i32> %samp, i1 false, i32 0, i32 0) |
| 151 | + %e0 = extractelement <4 x float> %sample, i32 0 |
| 152 | + %h0 = fptrunc float %e0 to half |
| 153 | + %e1 = extractelement <4 x float> %sample, i32 1 |
| 154 | + %h1 = fptrunc float %e1 to half |
| 155 | + %e2 = extractelement <4 x float> %sample, i32 2 |
| 156 | + %h2 = fptrunc float %e2 to half |
| 157 | + %mul = fmul half %h0, %h1 |
| 158 | + %res = fadd half %mul, %h2 |
| 159 | + ret half %res |
| 160 | +} |
| 161 | + |
| 162 | +define amdgpu_ps half @image_sample_2d_extractelement_multi_use_no_d16(<8 x i32> %surf_desc, <4 x i32> %sampler_desc, float %u, float %v) { |
| 163 | +; GFX7-LABEL: @image_sample_2d_extractelement_multi_use_no_d16( |
| 164 | +; GFX7-NEXT: main_body: |
| 165 | +; GFX7-NEXT: [[SAMPLE:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32.v8i32.v4i32(i32 3, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0) |
| 166 | +; GFX7-NEXT: [[E0:%.*]] = extractelement <2 x float> [[SAMPLE]], i64 0 |
| 167 | +; GFX7-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half |
| 168 | +; GFX7-NEXT: [[USER2:%.*]] = fadd float [[E0]], 1.000000e+00 |
| 169 | +; GFX7-NEXT: [[HALF:%.*]] = fptrunc float [[USER2]] to half |
| 170 | +; GFX7-NEXT: [[E1:%.*]] = extractelement <2 x float> [[SAMPLE]], i64 1 |
| 171 | +; GFX7-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half |
| 172 | +; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]] |
| 173 | +; GFX7-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[HALF]] |
| 174 | +; GFX7-NEXT: ret half [[RES]] |
| 175 | +; |
| 176 | +; GFX81PLUS-LABEL: @image_sample_2d_extractelement_multi_use_no_d16( |
| 177 | +; GFX81PLUS-NEXT: main_body: |
| 178 | +; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32.v8i32.v4i32(i32 3, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0) |
| 179 | +; GFX81PLUS-NEXT: [[E0:%.*]] = extractelement <2 x float> [[SAMPLE]], i64 0 |
| 180 | +; GFX81PLUS-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half |
| 181 | +; GFX81PLUS-NEXT: [[USER2:%.*]] = fadd float [[E0]], 1.000000e+00 |
| 182 | +; GFX81PLUS-NEXT: [[HALF:%.*]] = fptrunc float [[USER2]] to half |
| 183 | +; GFX81PLUS-NEXT: [[E1:%.*]] = extractelement <2 x float> [[SAMPLE]], i64 1 |
| 184 | +; GFX81PLUS-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half |
| 185 | +; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]] |
| 186 | +; GFX81PLUS-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[HALF]] |
| 187 | +; GFX81PLUS-NEXT: ret half [[RES]] |
| 188 | +; |
| 189 | +main_body: |
| 190 | + %sample = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %u, float %v, <8 x i32> %surf_desc, <4 x i32> %sampler_desc, i1 false, i32 0, i32 0) |
| 191 | + %e0 = extractelement <4 x float> %sample, i32 0 |
| 192 | + %h0 = fptrunc float %e0 to half |
| 193 | + %user2 = fadd float %e0, 1.0 |
| 194 | + %half = fptrunc float %user2 to half |
| 195 | + %e1 = extractelement <4 x float> %sample, i32 1 |
| 196 | + %h1 = fptrunc float %e1 to half |
| 197 | + %mul = fmul half %h0, %h1 |
| 198 | + %res = fadd half %mul, %half |
| 199 | + ret half %res |
| 200 | +} |
| 201 | + |
| 202 | +define amdgpu_ps bfloat @image_sample_2d_multi_fptrunc_non_half_no_d16(<8 x i32> %surf_desc, <4 x i32> %sampler_desc, float %u, float %v) { |
| 203 | +; GFX7-LABEL: @image_sample_2d_multi_fptrunc_non_half_no_d16( |
| 204 | +; GFX7-NEXT: main_body: |
| 205 | +; GFX7-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0) |
| 206 | +; GFX7-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0 |
| 207 | +; GFX7-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to bfloat |
| 208 | +; GFX7-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1 |
| 209 | +; GFX7-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to bfloat |
| 210 | +; GFX7-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2 |
| 211 | +; GFX7-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to bfloat |
| 212 | +; GFX7-NEXT: [[MUL:%.*]] = fmul bfloat [[H0]], [[H1]] |
| 213 | +; GFX7-NEXT: [[RES:%.*]] = fadd bfloat [[MUL]], [[H2]] |
| 214 | +; GFX7-NEXT: ret bfloat [[RES]] |
| 215 | +; |
| 216 | +; GFX81PLUS-LABEL: @image_sample_2d_multi_fptrunc_non_half_no_d16( |
| 217 | +; GFX81PLUS-NEXT: main_body: |
| 218 | +; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0) |
| 219 | +; GFX81PLUS-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0 |
| 220 | +; GFX81PLUS-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to bfloat |
| 221 | +; GFX81PLUS-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1 |
| 222 | +; GFX81PLUS-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to bfloat |
| 223 | +; GFX81PLUS-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2 |
| 224 | +; GFX81PLUS-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to bfloat |
| 225 | +; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul bfloat [[H0]], [[H1]] |
| 226 | +; GFX81PLUS-NEXT: [[RES:%.*]] = fadd bfloat [[MUL]], [[H2]] |
| 227 | +; GFX81PLUS-NEXT: ret bfloat [[RES]] |
| 228 | +; |
| 229 | +main_body: |
| 230 | + %sample = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 15, float %u, float %v, <8 x i32> %surf_desc, <4 x i32> %sampler_desc, i1 false, i32 0, i32 0) |
| 231 | + %e0 = extractelement <4 x float> %sample, i32 0 |
| 232 | + %h0 = fptrunc float %e0 to bfloat |
| 233 | + %e1 = extractelement <4 x float> %sample, i32 1 |
| 234 | + %h1 = fptrunc float %e1 to bfloat |
| 235 | + %e2 = extractelement <4 x float> %sample, i32 2 |
| 236 | + %h2 = fptrunc float %e2 to bfloat |
| 237 | + %mul = fmul bfloat %h0, %h1 |
| 238 | + %res = fadd bfloat %mul, %h2 |
| 239 | + ret bfloat %res |
| 240 | +} |
| 241 | + |
124 | 242 | define amdgpu_ps half @image_gather4_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
|
125 | 243 | ; GFX7-LABEL: @image_gather4_2d_v4f32(
|
126 | 244 | ; GFX7-NEXT: main_body:
|
|
0 commit comments