Skip to content

Commit 5fdd877

Browse files
committed
AMDGPU: Fix cost model for 16-bit operations on gfx8
We should only divide the number of pieces to fit the packed instructions if we actually have pk instructions. This increases the cost of copysign, but is closer to the current codegen output. It could be much cheaper than it is now.
1 parent 87b13ad commit 5fdd877

File tree

4 files changed

+34
-32
lines changed

4 files changed

+34
-32
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -721,7 +721,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
721721
if (SLT == MVT::f64)
722722
return LT.first * NElts * get64BitInstrCost(CostKind);
723723

724-
if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
724+
if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
725725
(ST->hasPackedFP32Ops() && SLT == MVT::f32))
726726
NElts = (NElts + 1) / 2;
727727

llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ define void @canonicalize_f16() {
2222
;
2323
; GFX8-LABEL: 'canonicalize_f16'
2424
; GFX8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.canonicalize.f16(half undef)
25-
; GFX8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef)
26-
; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef)
27-
; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef)
28-
; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef)
29-
; GFX8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef)
30-
; GFX8-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v17f16 = call <17 x half> @llvm.canonicalize.v17f16(<17 x half> undef)
25+
; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef)
26+
; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef)
27+
; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef)
28+
; GFX8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef)
29+
; GFX8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef)
30+
; GFX8-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v17f16 = call <17 x half> @llvm.canonicalize.v17f16(<17 x half> undef)
3131
; GFX8-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
3232
;
3333
; GFX9-LABEL: 'canonicalize_f16'
@@ -62,12 +62,12 @@ define void @canonicalize_f16() {
6262
;
6363
; GFX8-SIZE-LABEL: 'canonicalize_f16'
6464
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.canonicalize.f16(half undef)
65-
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef)
66-
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef)
67-
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef)
68-
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef)
69-
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef)
70-
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v17f16 = call <17 x half> @llvm.canonicalize.v17f16(<17 x half> undef)
65+
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef)
66+
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef)
67+
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef)
68+
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef)
69+
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef)
70+
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v17f16 = call <17 x half> @llvm.canonicalize.v17f16(<17 x half> undef)
7171
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
7272
;
7373
; GFX9-SIZE-LABEL: 'canonicalize_f16'

llvm/test/Analysis/CostModel/AMDGPU/copysign.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ define void @copysign_f16() {
2323
;
2424
; GFX8-LABEL: 'copysign_f16'
2525
; GFX8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.copysign.f16(half undef, half undef)
26-
; GFX8-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
27-
; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
28-
; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
29-
; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
30-
; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
31-
; GFX8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
32-
; GFX8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
26+
; GFX8-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
27+
; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
28+
; GFX8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
29+
; GFX8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
30+
; GFX8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
31+
; GFX8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
32+
; GFX8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
3333
; GFX8-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
3434
;
3535
; GFX9-LABEL: 'copysign_f16'
@@ -67,13 +67,13 @@ define void @copysign_f16() {
6767
;
6868
; GFX8-SIZE-LABEL: 'copysign_f16'
6969
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.copysign.f16(half undef, half undef)
70-
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
71-
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
72-
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
73-
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
74-
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
75-
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
76-
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
70+
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
71+
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = call <3 x half> @llvm.copysign.v3f16(<3 x half> undef, <3 x half> undef)
72+
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
73+
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = call <5 x half> @llvm.copysign.v5f16(<5 x half> undef, <5 x half> undef)
74+
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
75+
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9f16 = call <9 x half> @llvm.copysign.v9f16(<9 x half> undef, <9 x half> undef)
76+
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
7777
; GFX8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
7878
;
7979
; GFX9-SIZE-LABEL: 'copysign_f16'

llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f16.ll

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,9 @@ bb:
271271
}
272272

273273
; GCN-LABEL: @copysign_combine_v2f16
274-
; GCN: call <2 x half> @llvm.copysign.v2f16(
274+
; GFX8: call half @llvm.copysign.f16(
275+
; GFX8: call half @llvm.copysign.f16(
276+
; GFX9: call <2 x half> @llvm.copysign.v2f16(
275277
define void @copysign_combine_v2f16(ptr addrspace(1) %arg, half %sign) {
276278
bb:
277279
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -290,8 +292,6 @@ bb:
290292

291293
; FIXME: Should always vectorize
292294
; GCN-LABEL: @copysign_combine_v4f16
293-
; GCN: call <2 x half> @llvm.copysign.v2f16(
294-
295295
; GFX8: call half @llvm.copysign.f16(
296296
; GFX8: call half @llvm.copysign.f16(
297297

@@ -327,8 +327,10 @@ bb:
327327
}
328328

329329
; GCN-LABEL: @canonicalize_combine_v4f16
330-
; GCN: call <2 x half> @llvm.canonicalize.v2f16(
331-
; GCN: call <2 x half> @llvm.canonicalize.v2f16(
330+
; GFX8: call half @llvm.canonicalize.f16(
331+
; GFX8: call half @llvm.canonicalize.f16(
332+
333+
; GFX9: call <2 x half> @llvm.canonicalize.v2f16(
332334
define void @canonicalize_combine_v4f16(ptr addrspace(1) %arg) {
333335
bb:
334336
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()

0 commit comments

Comments
 (0)