@@ -162,6 +162,92 @@ main_body:
162
162
ret void
163
163
}
164
164
165
+ define void @image_sample_2d_extractelement_multi_use_no_d16 (<8 x i32 > %surf_desc , <4 x i32 > %sampler_desc , float %u , float %v , ptr addrspace (7 ) %out ) {
166
+ ; GFX7-LABEL: @image_sample_2d_extractelement_multi_use_no_d16(
167
+ ; GFX7-NEXT: main_body:
168
+ ; GFX7-NEXT: [[SAMPLE:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32.v8i32.v4i32(i32 3, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0)
169
+ ; GFX7-NEXT: [[E0:%.*]] = extractelement <2 x float> [[SAMPLE]], i64 0
170
+ ; GFX7-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half
171
+ ; GFX7-NEXT: [[USER2:%.*]] = fadd float [[E0]], 1.000000e+00
172
+ ; GFX7-NEXT: [[HALF:%.*]] = fptrunc float [[USER2]] to half
173
+ ; GFX7-NEXT: [[E1:%.*]] = extractelement <2 x float> [[SAMPLE]], i64 1
174
+ ; GFX7-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half
175
+ ; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
176
+ ; GFX7-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[HALF]]
177
+ ; GFX7-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
178
+ ; GFX7-NEXT: ret void
179
+ ;
180
+ ; GFX81PLUS-LABEL: @image_sample_2d_extractelement_multi_use_no_d16(
181
+ ; GFX81PLUS-NEXT: main_body:
182
+ ; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32.v8i32.v4i32(i32 3, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0)
183
+ ; GFX81PLUS-NEXT: [[E0:%.*]] = extractelement <2 x float> [[SAMPLE]], i64 0
184
+ ; GFX81PLUS-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half
185
+ ; GFX81PLUS-NEXT: [[USER2:%.*]] = fadd float [[E0]], 1.000000e+00
186
+ ; GFX81PLUS-NEXT: [[HALF:%.*]] = fptrunc float [[USER2]] to half
187
+ ; GFX81PLUS-NEXT: [[E1:%.*]] = extractelement <2 x float> [[SAMPLE]], i64 1
188
+ ; GFX81PLUS-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half
189
+ ; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
190
+ ; GFX81PLUS-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[HALF]]
191
+ ; GFX81PLUS-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
192
+ ; GFX81PLUS-NEXT: ret void
193
+ ;
194
+ main_body:
195
+ %sample = call <4 x float > @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32 (i32 15 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %sampler_desc , i1 false , i32 0 , i32 0 )
196
+ %e0 = extractelement <4 x float > %sample , i32 0
197
+ %h0 = fptrunc float %e0 to half
198
+ %user2 = fadd float %e0 , 1 .0
199
+ %half = fptrunc float %user2 to half
200
+ %e1 = extractelement <4 x float > %sample , i32 1
201
+ %h1 = fptrunc float %e1 to half
202
+ %mul = fmul half %h0 , %h1
203
+ %res = fadd half %mul , %half
204
+ store half %res , ptr addrspace (7 ) %out , align 2
205
+ ret void
206
+ }
207
+
208
+ define void @image_sample_2d_multi_fptrunc_non_half_no_d16 (<8 x i32 > %surf_desc , <4 x i32 > %sampler_desc , float %u , float %v , ptr addrspace (7 ) %out ) {
209
+ ; GFX7-LABEL: @image_sample_2d_multi_fptrunc_non_half_no_d16(
210
+ ; GFX7-NEXT: entry:
211
+ ; GFX7-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0)
212
+ ; GFX7-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
213
+ ; GFX7-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to bfloat
214
+ ; GFX7-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
215
+ ; GFX7-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to bfloat
216
+ ; GFX7-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
217
+ ; GFX7-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to bfloat
218
+ ; GFX7-NEXT: [[MUL:%.*]] = fmul bfloat [[H0]], [[H1]]
219
+ ; GFX7-NEXT: [[RES:%.*]] = fadd bfloat [[MUL]], [[H2]]
220
+ ; GFX7-NEXT: store bfloat [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
221
+ ; GFX7-NEXT: ret void
222
+ ;
223
+ ; GFX81PLUS-LABEL: @image_sample_2d_multi_fptrunc_non_half_no_d16(
224
+ ; GFX81PLUS-NEXT: entry:
225
+ ; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0)
226
+ ; GFX81PLUS-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
227
+ ; GFX81PLUS-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to bfloat
228
+ ; GFX81PLUS-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
229
+ ; GFX81PLUS-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to bfloat
230
+ ; GFX81PLUS-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
231
+ ; GFX81PLUS-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to bfloat
232
+ ; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul bfloat [[H0]], [[H1]]
233
+ ; GFX81PLUS-NEXT: [[RES:%.*]] = fadd bfloat [[MUL]], [[H2]]
234
+ ; GFX81PLUS-NEXT: store bfloat [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
235
+ ; GFX81PLUS-NEXT: ret void
236
+ ;
237
+ entry:
238
+ %sample = call <4 x float > @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32 (i32 15 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %sampler_desc , i1 false , i32 0 , i32 0 )
239
+ %e0 = extractelement <4 x float > %sample , i32 0
240
+ %h0 = fptrunc float %e0 to bfloat
241
+ %e1 = extractelement <4 x float > %sample , i32 1
242
+ %h1 = fptrunc float %e1 to bfloat
243
+ %e2 = extractelement <4 x float > %sample , i32 2
244
+ %h2 = fptrunc float %e2 to bfloat
245
+ %mul = fmul bfloat %h0 , %h1
246
+ %res = fadd bfloat %mul , %h2
247
+ store bfloat %res , ptr addrspace (7 ) %out , align 2
248
+ ret void
249
+ }
250
+
165
251
define amdgpu_ps half @image_gather4_2d_v4f32 (<8 x i32 > inreg %rsrc , <4 x i32 > inreg %samp , half %s , half %t ) {
166
252
; GFX7-LABEL: @image_gather4_2d_v4f32(
167
253
; GFX7-NEXT: main_body:
0 commit comments