@@ -122,7 +122,7 @@ main_body:
122
122
ret half %addf_sum.2
123
123
}
124
124
125
- define void @image_sample_2d_multi_fptrunc_to_d16 (<8 x i32 > %surf_desc , <4 x i32 > %samp , float %u , float %v , ptr addrspace ( 7 ) %out ) {
125
+ define amdgpu_gs half @image_sample_2d_multi_fptrunc_to_d16 (<8 x i32 > %surf_desc , <4 x i32 > %samp , float %u , float %v ) {
126
126
; GFX7-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
127
127
; GFX7-NEXT: main_body:
128
128
; GFX7-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
@@ -134,8 +134,7 @@ define void @image_sample_2d_multi_fptrunc_to_d16(<8 x i32> %surf_desc, <4 x i32
134
134
; GFX7-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to half
135
135
; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
136
136
; GFX7-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]]
137
- ; GFX7-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
138
- ; GFX7-NEXT: ret void
137
+ ; GFX7-NEXT: ret half [[RES]]
139
138
;
140
139
; GFX81PLUS-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
141
140
; GFX81PLUS-NEXT: main_body:
@@ -145,8 +144,7 @@ define void @image_sample_2d_multi_fptrunc_to_d16(<8 x i32> %surf_desc, <4 x i32
145
144
; GFX81PLUS-NEXT: [[H2:%.*]] = extractelement <3 x half> [[SAMPLE]], i64 2
146
145
; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
147
146
; GFX81PLUS-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]]
148
- ; GFX81PLUS-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
149
- ; GFX81PLUS-NEXT: ret void
147
+ ; GFX81PLUS-NEXT: ret half [[RES]]
150
148
;
151
149
main_body:
152
150
%sample = call <4 x float > @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32 (i32 15 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %samp , i1 false , i32 0 , i32 0 )
@@ -158,8 +156,87 @@ main_body:
158
156
%h2 = fptrunc float %e2 to half
159
157
%mul = fmul half %h0 , %h1
160
158
%res = fadd half %mul , %h2
161
- store half %res , ptr addrspace (7 ) %out , align 2
162
- ret void
159
+ ret half %res
160
+ }
161
+
162
+ define amdgpu_gs half @image_sample_2d_extractelement_multi_use_no_d16 (<8 x i32 > %surf_desc , <4 x i32 > %sampler_desc , float %u , float %v ) {
163
+ ; GFX7-LABEL: @image_sample_2d_extractelement_multi_use_no_d16(
164
+ ; GFX7-NEXT: main_body:
165
+ ; GFX7-NEXT: [[SAMPLE:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32.v8i32.v4i32(i32 3, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0)
166
+ ; GFX7-NEXT: [[E0:%.*]] = extractelement <2 x float> [[SAMPLE]], i64 0
167
+ ; GFX7-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half
168
+ ; GFX7-NEXT: [[USER2:%.*]] = fadd float [[E0]], 1.000000e+00
169
+ ; GFX7-NEXT: [[HALF:%.*]] = fptrunc float [[USER2]] to half
170
+ ; GFX7-NEXT: [[E1:%.*]] = extractelement <2 x float> [[SAMPLE]], i64 1
171
+ ; GFX7-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half
172
+ ; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
173
+ ; GFX7-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[HALF]]
174
+ ; GFX7-NEXT: ret half [[RES]]
175
+ ;
176
+ ; GFX81PLUS-LABEL: @image_sample_2d_extractelement_multi_use_no_d16(
177
+ ; GFX81PLUS-NEXT: main_body:
178
+ ; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32.v8i32.v4i32(i32 3, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0)
179
+ ; GFX81PLUS-NEXT: [[E0:%.*]] = extractelement <2 x float> [[SAMPLE]], i64 0
180
+ ; GFX81PLUS-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to half
181
+ ; GFX81PLUS-NEXT: [[USER2:%.*]] = fadd float [[E0]], 1.000000e+00
182
+ ; GFX81PLUS-NEXT: [[HALF:%.*]] = fptrunc float [[USER2]] to half
183
+ ; GFX81PLUS-NEXT: [[E1:%.*]] = extractelement <2 x float> [[SAMPLE]], i64 1
184
+ ; GFX81PLUS-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to half
185
+ ; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
186
+ ; GFX81PLUS-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[HALF]]
187
+ ; GFX81PLUS-NEXT: ret half [[RES]]
188
+ ;
189
+ main_body:
190
+ %sample = call <4 x float > @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32 (i32 15 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %sampler_desc , i1 false , i32 0 , i32 0 )
191
+ %e0 = extractelement <4 x float > %sample , i32 0
192
+ %h0 = fptrunc float %e0 to half
193
+ %user2 = fadd float %e0 , 1 .0
194
+ %half = fptrunc float %user2 to half
195
+ %e1 = extractelement <4 x float > %sample , i32 1
196
+ %h1 = fptrunc float %e1 to half
197
+ %mul = fmul half %h0 , %h1
198
+ %res = fadd half %mul , %half
199
+ ret half %res
200
+ }
201
+
202
+ define amdgpu_gs bfloat @image_sample_2d_multi_fptrunc_non_half_no_d16 (<8 x i32 > %surf_desc , <4 x i32 > %sampler_desc , float %u , float %v ) {
203
+ ; GFX7-LABEL: @image_sample_2d_multi_fptrunc_non_half_no_d16(
204
+ ; GFX7-NEXT: main_body:
205
+ ; GFX7-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0)
206
+ ; GFX7-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
207
+ ; GFX7-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to bfloat
208
+ ; GFX7-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
209
+ ; GFX7-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to bfloat
210
+ ; GFX7-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
211
+ ; GFX7-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to bfloat
212
+ ; GFX7-NEXT: [[MUL:%.*]] = fmul bfloat [[H0]], [[H1]]
213
+ ; GFX7-NEXT: [[RES:%.*]] = fadd bfloat [[MUL]], [[H2]]
214
+ ; GFX7-NEXT: ret bfloat [[RES]]
215
+ ;
216
+ ; GFX81PLUS-LABEL: @image_sample_2d_multi_fptrunc_non_half_no_d16(
217
+ ; GFX81PLUS-NEXT: main_body:
218
+ ; GFX81PLUS-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMPLER_DESC:%.*]], i1 false, i32 0, i32 0)
219
+ ; GFX81PLUS-NEXT: [[E0:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 0
220
+ ; GFX81PLUS-NEXT: [[H0:%.*]] = fptrunc float [[E0]] to bfloat
221
+ ; GFX81PLUS-NEXT: [[E1:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 1
222
+ ; GFX81PLUS-NEXT: [[H1:%.*]] = fptrunc float [[E1]] to bfloat
223
+ ; GFX81PLUS-NEXT: [[E2:%.*]] = extractelement <3 x float> [[SAMPLE]], i64 2
224
+ ; GFX81PLUS-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to bfloat
225
+ ; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul bfloat [[H0]], [[H1]]
226
+ ; GFX81PLUS-NEXT: [[RES:%.*]] = fadd bfloat [[MUL]], [[H2]]
227
+ ; GFX81PLUS-NEXT: ret bfloat [[RES]]
228
+ ;
229
+ main_body:
230
+ %sample = call <4 x float > @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32 (i32 15 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %sampler_desc , i1 false , i32 0 , i32 0 )
231
+ %e0 = extractelement <4 x float > %sample , i32 0
232
+ %h0 = fptrunc float %e0 to bfloat
233
+ %e1 = extractelement <4 x float > %sample , i32 1
234
+ %h1 = fptrunc float %e1 to bfloat
235
+ %e2 = extractelement <4 x float > %sample , i32 2
236
+ %h2 = fptrunc float %e2 to bfloat
237
+ %mul = fmul bfloat %h0 , %h1
238
+ %res = fadd bfloat %mul , %h2
239
+ ret bfloat %res
163
240
}
164
241
165
242
define amdgpu_ps half @image_gather4_2d_v4f32 (<8 x i32 > inreg %rsrc , <4 x i32 > inreg %samp , half %s , half %t ) {
0 commit comments