@@ -122,7 +122,7 @@ main_body:
122
122
ret half %addf_sum.2
123
123
}
124
124
125
- define void @image_sample_2d_multi_fptrunc_to_d16 (<8 x i32 > %surf_desc , <4 x i32 > %samp , float %u , float %v , ptr addrspace ( 7 ) %out ) {
125
+ define amdgpu_ps half @image_sample_2d_multi_fptrunc_to_d16 (<8 x i32 > %surf_desc , <4 x i32 > %samp , float %u , float %v ) {
126
126
; GFX7-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
127
127
; GFX7-NEXT: main_body:
128
128
; GFX7-NEXT: [[SAMPLE:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32.v8i32.v4i32(i32 7, float [[U:%.*]], float [[V:%.*]], <8 x i32> [[SURF_DESC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
@@ -134,8 +134,7 @@ define void @image_sample_2d_multi_fptrunc_to_d16(<8 x i32> %surf_desc, <4 x i32
134
134
; GFX7-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to half
135
135
; GFX7-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
136
136
; GFX7-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]]
137
- ; GFX7-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
138
- ; GFX7-NEXT: ret void
137
+ ; GFX7-NEXT: ret half [[RES]]
139
138
;
140
139
; GFX81PLUS-LABEL: @image_sample_2d_multi_fptrunc_to_d16(
141
140
; GFX81PLUS-NEXT: main_body:
@@ -148,8 +147,7 @@ define void @image_sample_2d_multi_fptrunc_to_d16(<8 x i32> %surf_desc, <4 x i32
148
147
; GFX81PLUS-NEXT: [[H2:%.*]] = fptrunc float [[E2]] to half
149
148
; GFX81PLUS-NEXT: [[MUL:%.*]] = fmul half [[H0]], [[H1]]
150
149
; GFX81PLUS-NEXT: [[RES:%.*]] = fadd half [[MUL]], [[H2]]
151
- ; GFX81PLUS-NEXT: store half [[RES]], ptr addrspace(7) [[OUT:%.*]], align 2
152
- ; GFX81PLUS-NEXT: ret void
150
+ ; GFX81PLUS-NEXT: ret half [[RES]]
153
151
;
154
152
main_body:
155
153
%sample = call <4 x float > @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32 (i32 15 , float %u , float %v , <8 x i32 > %surf_desc , <4 x i32 > %samp , i1 false , i32 0 , i32 0 )
@@ -161,8 +159,7 @@ main_body:
161
159
%h2 = fptrunc float %e2 to half
162
160
%mul = fmul half %h0 , %h1
163
161
%res = fadd half %mul , %h2
164
- store half %res , ptr addrspace (7 ) %out , align 2
165
- ret void
162
+ ret half %res
166
163
}
167
164
168
165
define amdgpu_ps half @image_gather4_2d_v4f32 (<8 x i32 > inreg %rsrc , <4 x i32 > inreg %samp , half %s , half %t ) {
@@ -444,4 +441,3 @@ declare <3 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v3f32.i32(i32, i32, i3
444
441
declare <4 x float > @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32 (i32 , i32 , i32 , i32 , <8 x i32 >, i32 , i32 ) #0
445
442
446
443
attributes #0 = { nounwind readonly willreturn}
447
-
0 commit comments