Skip to content

Commit 19ab42a

Browse files
committed
AMDGPU: Reduce cost of f64 copysign
The real implementation is 1 real instruction plus a constant materialize. Call that a 1, it's not a real f64 operation.
1 parent 017304b commit 19ab42a

File tree

2 files changed

+23
-21
lines changed

2 files changed

+23
-21
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -718,9 +718,6 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
718718

719719
MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
720720

721-
if (SLT == MVT::f64)
722-
return LT.first * NElts * get64BitInstrCost(CostKind);
723-
724721
if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
725722
(ST->hasPackedFP32Ops() && SLT == MVT::f32))
726723
NElts = (NElts + 1) / 2;
@@ -731,6 +728,11 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
731728
switch (ICA.getID()) {
732729
case Intrinsic::fma:
733730
case Intrinsic::fmuladd:
731+
if (SLT == MVT::f64) {
732+
InstRate = get64BitInstrCost(CostKind);
733+
break;
734+
}
735+
734736
if ((SLT == MVT::f32 && ST->hasFastFMAF32()) || SLT == MVT::f16)
735737
InstRate = getFullRateInstrCost();
736738
else {
@@ -741,8 +743,8 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
741743
case Intrinsic::copysign:
742744
return NElts * getFullRateInstrCost();
743745
case Intrinsic::canonicalize: {
744-
assert(SLT != MVT::f64);
745-
InstRate = getFullRateInstrCost();
746+
InstRate =
747+
SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost();
746748
break;
747749
}
748750
case Intrinsic::uadd_sat:

llvm/test/Analysis/CostModel/AMDGPU/copysign.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -245,25 +245,25 @@ define void @copysign_bf16() {
245245

246246
define void @copysign_f64() {
247247
; ALL-LABEL: 'copysign_f64'
248-
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef)
249-
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
250-
; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
251-
; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
252-
; ALL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
253-
; ALL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
254-
; ALL-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
255-
; ALL-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef)
248+
; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef)
249+
; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
250+
; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
251+
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
252+
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
253+
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
254+
; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
255+
; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef)
256256
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
257257
;
258258
; ALL-SIZE-LABEL: 'copysign_f64'
259-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef)
260-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
261-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
262-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
263-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
264-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
265-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
266-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef)
259+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.copysign.f64(double undef, double undef)
260+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
261+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
262+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
263+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f64 = call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
264+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
265+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
266+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x double> undef)
267267
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
268268
;
269269
%f64 = call double @llvm.copysign.f64(double undef, double undef)

0 commit comments

Comments
 (0)