Skip to content

Commit 13d4fd4

Browse files
committed
AMDGPU: Directly select minimumnum/maximumnum with ieee_mode=0
The hardware min/max follow the IR rules with IEEE mode disabled, so we can avoid the canonicalizes of the input. We lose the quieting of a signaling nan if both inputs are nans, but we only require that with strictfp.
1 parent b91936a commit 13d4fd4

File tree

9 files changed

+1368
-1673
lines changed

9 files changed

+1368
-1673
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().F
9292
def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
9393
def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">;
9494
def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
95+
def IEEEModeEnabled : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().IEEE">;
96+
def IEEEModeDisabled : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().IEEE">;
9597
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
9698
}
9799

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -957,12 +957,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
957957
FPOpActions.clampMaxNumElementsStrict(0, S32, 2);
958958
}
959959

960-
auto &MinNumMaxNum = getActionDefinitionsBuilder({
961-
G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
962-
963-
// TODO: These should be custom lowered and are directly legal with IEEE=0
964-
auto &MinimumNumMaximumNum =
965-
getActionDefinitionsBuilder({G_FMINIMUMNUM, G_FMAXIMUMNUM});
960+
auto &MinNumMaxNum = getActionDefinitionsBuilder(
961+
{G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM, G_FMINNUM_IEEE,
962+
G_FMAXNUM_IEEE});
966963

967964
if (ST.hasVOP3PInsts()) {
968965
MinNumMaxNum.customFor(FPTypesPK16)
@@ -980,8 +977,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
980977
.scalarize(0);
981978
}
982979

983-
MinimumNumMaximumNum.lower();
984-
985980
if (ST.hasVOP3PInsts())
986981
FPOpActions.clampMaxNumElementsStrict(0, S16, 2);
987982

@@ -2162,6 +2157,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(
21622157
return legalizeFPTOI(MI, MRI, B, false);
21632158
case TargetOpcode::G_FMINNUM:
21642159
case TargetOpcode::G_FMAXNUM:
2160+
case TargetOpcode::G_FMINIMUMNUM:
2161+
case TargetOpcode::G_FMAXIMUMNUM:
21652162
case TargetOpcode::G_FMINNUM_IEEE:
21662163
case TargetOpcode::G_FMAXNUM_IEEE:
21672164
return legalizeMinNumMaxNum(Helper, MI);
@@ -2741,9 +2738,17 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,
27412738
MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
27422739

27432740
// With ieee_mode disabled, the instructions have the correct behavior
2744-
// already for G_FMINNUM/G_FMAXNUM
2745-
if (!MFI->getMode().IEEE)
2741+
// already for G_FMINIMUMNUM/G_FMAXIMUMNUM.
2742+
//
2743+
// FIXME: G_FMINNUM/G_FMAXNUM should match the behavior with ieee_mode
2744+
// enabled.
2745+
if (!MFI->getMode().IEEE) {
2746+
if (MI.getOpcode() == AMDGPU::G_FMINIMUMNUM ||
2747+
MI.getOpcode() == AMDGPU::G_FMAXIMUMNUM)
2748+
return true;
2749+
27462750
return !IsIEEEOp;
2751+
}
27472752

27482753
if (IsIEEEOp)
27492754
return true;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4009,6 +4009,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
40094009
case AMDGPU::G_FMAXNUM:
40104010
case AMDGPU::G_FMINIMUM:
40114011
case AMDGPU::G_FMAXIMUM:
4012+
case AMDGPU::G_FMINIMUMNUM:
4013+
case AMDGPU::G_FMAXIMUMNUM:
40124014
case AMDGPU::G_INTRINSIC_TRUNC:
40134015
case AMDGPU::G_STRICT_FADD:
40144016
case AMDGPU::G_STRICT_FSUB:

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -531,8 +531,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
531531
setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, {MVT::i16, MVT::i32},
532532
Legal);
533533

534-
setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, {MVT::f32, MVT::f64},
535-
Custom);
534+
setOperationAction(
535+
{ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM},
536+
{MVT::f32, MVT::f64}, Custom);
536537

537538
// These are really only legal for ieee_mode functions. We should be avoiding
538539
// them for functions that don't have ieee_mode enabled, so just say they are
@@ -771,7 +772,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
771772
MVT::v32f16, MVT::v32bf16},
772773
Custom);
773774

774-
setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, MVT::f16, Custom);
775+
setOperationAction(
776+
{ISD::FMAXNUM, ISD::FMINNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM},
777+
MVT::f16, Custom);
775778
setOperationAction({ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE}, MVT::f16, Legal);
776779

777780
setOperationAction({ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, ISD::FMINIMUMNUM,
@@ -825,8 +828,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
825828
setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FCANONICALIZE},
826829
VT, Custom);
827830

828-
setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, {MVT::v2f16, MVT::v4f16},
829-
Custom);
831+
setOperationAction(
832+
{ISD::FMAXNUM, ISD::FMINNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM},
833+
{MVT::v2f16, MVT::v4f16}, Custom);
830834

831835
setOperationAction(ISD::FEXP, MVT::v2f16, Custom);
832836
setOperationAction(ISD::SELECT, {MVT::v4i16, MVT::v4f16, MVT::v4bf16},
@@ -6062,6 +6066,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
60626066
case ISD::FMINNUM:
60636067
case ISD::FMAXNUM:
60646068
return lowerFMINNUM_FMAXNUM(Op, DAG);
6069+
case ISD::FMINIMUMNUM:
6070+
case ISD::FMAXIMUMNUM:
6071+
return lowerFMINIMUMNUM_FMAXIMUMNUM(Op, DAG);
60656072
case ISD::FMINIMUM:
60666073
case ISD::FMAXIMUM:
60676074
return lowerFMINIMUM_FMAXIMUM(Op, DAG);
@@ -6086,8 +6093,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
60866093
case ISD::FMUL:
60876094
case ISD::FMINNUM_IEEE:
60886095
case ISD::FMAXNUM_IEEE:
6089-
case ISD::FMINIMUMNUM:
6090-
case ISD::FMAXIMUMNUM:
60916096
case ISD::UADDSAT:
60926097
case ISD::USUBSAT:
60936098
case ISD::SADDSAT:
@@ -6995,6 +7000,23 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
69957000
return Op;
69967001
}
69977002

7003+
SDValue
7004+
SITargetLowering::lowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op,
7005+
SelectionDAG &DAG) const {
7006+
EVT VT = Op.getValueType();
7007+
const MachineFunction &MF = DAG.getMachineFunction();
7008+
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
7009+
bool IsIEEEMode = Info->getMode().IEEE;
7010+
7011+
if (IsIEEEMode)
7012+
return expandFMINIMUMNUM_FMAXIMUMNUM(Op.getNode(), DAG);
7013+
7014+
if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16 ||
7015+
VT == MVT::v16bf16)
7016+
return splitBinaryVectorOp(Op, DAG);
7017+
return Op;
7018+
}
7019+
69987020
SDValue SITargetLowering::lowerFMINIMUM_FMAXIMUM(SDValue Op,
69997021
SelectionDAG &DAG) const {
70007022
EVT VT = Op.getValueType();

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
147147
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
148148
SDValue splitFP_ROUNDVectorOp(SDValue Op, SelectionDAG &DAG) const;
149149
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
150+
SDValue lowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op, SelectionDAG &DAG) const;
150151
SDValue lowerFMINIMUM_FMAXIMUM(SDValue Op, SelectionDAG &DAG) const;
151152
SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
152153
SDValue promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1390,6 +1390,55 @@ def : GCNPat<
13901390
(S_ADD_U64_PSEUDO $src0, $src1)>;
13911391
}
13921392

1393+
//===----------------------------------------------------------------------===//
1394+
// FP min/max patterns
1395+
//===----------------------------------------------------------------------===//
1396+
1397+
1398+
class FPBinOpPat <SDPatternOperator node, ValueType vt, Instruction inst>
1399+
: GCNPat <(vt (node (vt (VOP3Mods vt:$src0, i32:$src0_mods)),
1400+
(vt (VOP3Mods vt:$src1, i32:$src1_mods)))),
1401+
(inst $src0_mods, $src0, $src1_mods, $src1, DSTCLAMP.NONE, DSTOMOD.NONE)
1402+
>;
1403+
1404+
class FPPkBinOpPat <SDPatternOperator node, ValueType vt, Instruction inst>
1405+
: GCNPat <(vt (node (VOP3PMods v2f16:$src0, i32:$src0_mods),
1406+
(VOP3PMods v2f16:$src1, i32:$src1_mods))),
1407+
(inst $src0_mods, $src0, $src1_mods, $src1, DSTCLAMP.NONE)
1408+
>;
1409+
1410+
/// With IEEE=0, signalingness is ignored and the non-nan input will
1411+
/// be directly returned.
1412+
let OtherPredicates = [IEEEModeDisabled] in {
1413+
def : FPBinOpPat<fminimumnum, f32, V_MIN_F32_e64>;
1414+
def : FPBinOpPat<fmaximumnum, f32, V_MAX_F32_e64>;
1415+
def : FPBinOpPat<fminimumnum, f64, V_MIN_F64_e64>;
1416+
def : FPBinOpPat<fmaximumnum, f64, V_MAX_F64_e64>;
1417+
1418+
let SubtargetPredicate = Has16BitInsts,
1419+
True16Predicate = NotHasTrue16BitInsts in {
1420+
def : FPBinOpPat<fminimumnum, f16, V_MIN_F16_e64>;
1421+
def : FPBinOpPat<fmaximumnum, f16, V_MAX_F16_e64>;
1422+
}
1423+
1424+
let SubtargetPredicate = Has16BitInsts,
1425+
True16Predicate = UseRealTrue16Insts in {
1426+
def : FPBinOpPat<fminimumnum, f16, V_MIN_F16_t16_e64>;
1427+
def : FPBinOpPat<fmaximumnum, f16, V_MAX_F16_t16_e64>;
1428+
}
1429+
1430+
let SubtargetPredicate = Has16BitInsts,
1431+
True16Predicate = UseFakeTrue16Insts in {
1432+
def : FPBinOpPat<fminimumnum, f16, V_MIN_F16_fake16_e64>;
1433+
def : FPBinOpPat<fmaximumnum, f16, V_MAX_F16_fake16_e64>;
1434+
}
1435+
1436+
let SubtargetPredicate = HasVOP3PInsts in {
1437+
def : FPPkBinOpPat<fminimumnum, v2f16, V_PK_MIN_F16>;
1438+
def : FPPkBinOpPat<fmaximumnum, v2f16, V_PK_MAX_F16>;
1439+
}
1440+
}
1441+
13931442
/********** ============================================ **********/
13941443
/********** Extraction, Insertion, Building and Casting **********/
13951444
/********** ============================================ **********/

llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll

Lines changed: 12 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2019,9 +2019,7 @@ define float @v_fneg_minimumnum_f32_no_ieee(float %a, float %b) #4 {
20192019
; GCN-LABEL: v_fneg_minimumnum_f32_no_ieee:
20202020
; GCN: ; %bb.0:
20212021
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2022-
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
2023-
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2024-
; GCN-NEXT: v_max_f32_e32 v0, v0, v1
2022+
; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1
20252023
; GCN-NEXT: s_setpc_b64 s[30:31]
20262024
%min = call float @llvm.minimumnum.f32(float %a, float %b)
20272025
%fneg = fneg float %min
@@ -2044,8 +2042,7 @@ define float @v_fneg_self_minimumnum_f32_no_ieee(float %a) #4 {
20442042
; GCN-LABEL: v_fneg_self_minimumnum_f32_no_ieee:
20452043
; GCN: ; %bb.0:
20462044
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2047-
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2048-
; GCN-NEXT: v_max_f32_e32 v0, v0, v0
2045+
; GCN-NEXT: v_max_f32_e64 v0, -v0, -v0
20492046
; GCN-NEXT: s_setpc_b64 s[30:31]
20502047
%min = call float @llvm.minimumnum.f32(float %a, float %a)
20512048
%min.fneg = fneg float %min
@@ -2068,8 +2065,7 @@ define float @v_fneg_posk_minimumnum_f32_no_ieee(float %a) #4 {
20682065
; GCN-LABEL: v_fneg_posk_minimumnum_f32_no_ieee:
20692066
; GCN: ; %bb.0:
20702067
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2071-
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2072-
; GCN-NEXT: v_max_f32_e32 v0, -4.0, v0
2068+
; GCN-NEXT: v_max_f32_e64 v0, -v0, -4.0
20732069
; GCN-NEXT: s_setpc_b64 s[30:31]
20742070
%min = call float @llvm.minimumnum.f32(float 4.0, float %a)
20752071
%fneg = fneg float %min
@@ -2092,8 +2088,7 @@ define float @v_fneg_negk_minimumnum_f32_no_ieee(float %a) #4 {
20922088
; GCN-LABEL: v_fneg_negk_minimumnum_f32_no_ieee:
20932089
; GCN: ; %bb.0:
20942090
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2095-
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2096-
; GCN-NEXT: v_max_f32_e32 v0, 4.0, v0
2091+
; GCN-NEXT: v_max_f32_e64 v0, -v0, 4.0
20972092
; GCN-NEXT: s_setpc_b64 s[30:31]
20982093
%min = call float @llvm.minimumnum.f32(float -4.0, float %a)
20992094
%fneg = fneg float %min
@@ -2251,8 +2246,7 @@ define float @v_fneg_neg0_minimumnum_f32_no_ieee(float %a) #4 {
22512246
; GCN-LABEL: v_fneg_neg0_minimumnum_f32_no_ieee:
22522247
; GCN: ; %bb.0:
22532248
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2254-
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2255-
; GCN-NEXT: v_max_f32_e32 v0, 0, v0
2249+
; GCN-NEXT: v_max_f32_e64 v0, -v0, 0
22562250
; GCN-NEXT: s_setpc_b64 s[30:31]
22572251
%min = call float @llvm.minimumnum.f32(float -0.0, float %a)
22582252
%fneg = fneg float %min
@@ -2299,7 +2293,6 @@ define float @v_fneg_0_minimumnum_foldable_use_f32_no_ieee(float %a, float %b) #
22992293
; GCN-LABEL: v_fneg_0_minimumnum_foldable_use_f32_no_ieee:
23002294
; GCN: ; %bb.0:
23012295
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2302-
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
23032296
; GCN-NEXT: v_min_f32_e32 v0, 0, v0
23042297
; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1
23052298
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -2330,9 +2323,7 @@ define <2 x float> @v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee(float %a,
23302323
; GCN-LABEL: v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee:
23312324
; GCN: ; %bb.0:
23322325
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2333-
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
2334-
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2335-
; GCN-NEXT: v_max_f32_e32 v0, v0, v1
2326+
; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1
23362327
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
23372328
; GCN-NEXT: s_setpc_b64 s[30:31]
23382329
%min = call float @llvm.minimumnum.f32(float %a, float %b)
@@ -2364,9 +2355,7 @@ define float @v_fneg_maximumnum_f32_no_ieee(float %a, float %b) #4 {
23642355
; GCN-LABEL: v_fneg_maximumnum_f32_no_ieee:
23652356
; GCN: ; %bb.0:
23662357
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2367-
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
2368-
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2369-
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
2358+
; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
23702359
; GCN-NEXT: s_setpc_b64 s[30:31]
23712360
%max = call float @llvm.maximumnum.f32(float %a, float %b)
23722361
%fneg = fneg float %max
@@ -2389,8 +2378,7 @@ define float @v_fneg_self_maximumnum_f32_no_ieee(float %a) #4 {
23892378
; GCN-LABEL: v_fneg_self_maximumnum_f32_no_ieee:
23902379
; GCN: ; %bb.0:
23912380
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2392-
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2393-
; GCN-NEXT: v_min_f32_e32 v0, v0, v0
2381+
; GCN-NEXT: v_min_f32_e64 v0, -v0, -v0
23942382
; GCN-NEXT: s_setpc_b64 s[30:31]
23952383
%max = call float @llvm.maximumnum.f32(float %a, float %a)
23962384
%max.fneg = fneg float %max
@@ -2413,8 +2401,7 @@ define float @v_fneg_posk_maximumnum_f32_no_ieee(float %a) #4 {
24132401
; GCN-LABEL: v_fneg_posk_maximumnum_f32_no_ieee:
24142402
; GCN: ; %bb.0:
24152403
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2416-
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2417-
; GCN-NEXT: v_min_f32_e32 v0, -4.0, v0
2404+
; GCN-NEXT: v_min_f32_e64 v0, -v0, -4.0
24182405
; GCN-NEXT: s_setpc_b64 s[30:31]
24192406
%max = call float @llvm.maximumnum.f32(float 4.0, float %a)
24202407
%fneg = fneg float %max
@@ -2437,8 +2424,7 @@ define float @v_fneg_negk_maximumnum_f32_no_ieee(float %a) #4 {
24372424
; GCN-LABEL: v_fneg_negk_maximumnum_f32_no_ieee:
24382425
; GCN: ; %bb.0:
24392426
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2440-
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2441-
; GCN-NEXT: v_min_f32_e32 v0, 4.0, v0
2427+
; GCN-NEXT: v_min_f32_e64 v0, -v0, 4.0
24422428
; GCN-NEXT: s_setpc_b64 s[30:31]
24432429
%max = call float @llvm.maximumnum.f32(float -4.0, float %a)
24442430
%fneg = fneg float %max
@@ -2473,8 +2459,7 @@ define float @v_fneg_neg0_maximumnum_f32_no_ieee(float %a) #4 {
24732459
; GCN-LABEL: v_fneg_neg0_maximumnum_f32_no_ieee:
24742460
; GCN: ; %bb.0:
24752461
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2476-
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2477-
; GCN-NEXT: v_min_f32_e32 v0, 0, v0
2462+
; GCN-NEXT: v_min_f32_e64 v0, -v0, 0
24782463
; GCN-NEXT: s_setpc_b64 s[30:31]
24792464
%max = call float @llvm.maximumnum.f32(float -0.0, float %a)
24802465
%fneg = fneg float %max
@@ -2499,7 +2484,6 @@ define float @v_fneg_0_maximumnum_foldable_use_f32_no_ieee(float %a, float %b) #
24992484
; GCN-LABEL: v_fneg_0_maximumnum_foldable_use_f32_no_ieee:
25002485
; GCN: ; %bb.0:
25012486
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2502-
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
25032487
; GCN-NEXT: v_max_f32_e32 v0, 0, v0
25042488
; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1
25052489
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -2530,9 +2514,7 @@ define <2 x float> @v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee(float %a,
25302514
; GCN-LABEL: v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee:
25312515
; GCN: ; %bb.0:
25322516
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2533-
; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
2534-
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2535-
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
2517+
; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
25362518
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
25372519
; GCN-NEXT: s_setpc_b64 s[30:31]
25382520
%max = call float @llvm.maximumnum.f32(float %a, float %b)

0 commit comments

Comments
 (0)