Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SDAG] Handle insert_subvector in isKnownNeverNaN #131989

Open
wants to merge 2 commits into
base: main
Choose a base branch
from

Conversation

tclin914
Copy link
Contributor

Propagate nnan across insert_subvector.

@llvmbot llvmbot added backend:AMDGPU llvm:SelectionDAG SelectionDAGISel as well labels Mar 19, 2025
@llvmbot
Copy link
Member

llvmbot commented Mar 19, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Jim Lin (tclin914)

Changes

Propagate nnan across insert_subvector.


Full diff: https://github.com/llvm/llvm-project/pull/131989.diff

5 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+6)
  • (modified) llvm/test/CodeGen/AMDGPU/clamp.ll (+17-17)
  • (modified) llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll (+31-55)
  • (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll (+49)
  • (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll (+49)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d1f92c9ef00e9..4b3dd5e0d6975 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5625,6 +5625,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
            (SNaN && !C->getValueAPF().isSignaling());
   }
 
+  if (Op.isUndef())
+    return true;
+
   unsigned Opcode = Op.getOpcode();
   switch (Opcode) {
   case ISD::FADD:
@@ -5727,6 +5730,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
   case ISD::EXTRACT_SUBVECTOR: {
     return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
   }
+  case ISD::INSERT_SUBVECTOR:
+    return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+           isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
   case ISD::BUILD_VECTOR: {
     for (const SDValue &Opnd : Op->ops())
       if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1))
diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll
index 17c84d7371de1..a08228fc919b8 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp.ll
@@ -2986,14 +2986,14 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; GFX6-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; GFX6-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX6-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX6-NEXT:    v_max_f32_e32 v2, 0x7fc00000, v2
-; GFX6-NEXT:    v_med3_f32 v3, v3, 0, v4
+; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT:    v_max_f32_e32 v3, 0, v3
+; GFX6-NEXT:    v_max_f32_e32 v2, s0, v2
+; GFX6-NEXT:    v_min_f32_e32 v3, s0, v3
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX6-NEXT:    v_min_f32_e32 v2, 1.0, v2
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
@@ -3006,20 +3006,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX8-NEXT:    v_mov_b32_e32 v4, 0x7e00
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_dword v3, v[0:1]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; GFX8-NEXT:    v_mov_b32_e32 v4, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_max_f16_e32 v3, v3, v3
 ; GFX8-NEXT:    v_max_f16_e32 v2, 0, v2
-; GFX8-NEXT:    v_max_f16_e32 v3, 0x7e00, v3
+; GFX8-NEXT:    v_max_f16_e32 v3, s0, v3
 ; GFX8-NEXT:    v_min_f16_e32 v3, 1.0, v3
 ; GFX8-NEXT:    v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
@@ -3747,16 +3747,16 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; GFX6-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; GFX6-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX6-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
 ; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX6-NEXT:    v_max_f32_e32 v3, 0x7fc00000, v3
+; GFX6-NEXT:    v_max_f32_e32 v3, s0, v3
+; GFX6-NEXT:    v_max_f32_e32 v2, 0, v2
 ; GFX6-NEXT:    v_min_f32_e32 v3, 1.0, v3
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; GFX6-NEXT:    v_med3_f32 v2, v2, 0, v4
+; GFX6-NEXT:    v_min_f32_e32 v2, s0, v2
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
@@ -3779,9 +3779,9 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT:    v_max_f16_e32 v2, 0x7e00, v2
+; GFX8-NEXT:    v_max_f16_e32 v2, s0, v2
 ; GFX8-NEXT:    v_max_f16_e32 v3, 0, v3
-; GFX8-NEXT:    v_min_f16_e32 v3, 0x7e00, v3
+; GFX8-NEXT:    v_min_f16_e32 v3, s0, v3
 ; GFX8-NEXT:    v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
@@ -3845,14 +3845,14 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; GFX6-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; GFX6-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX6-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX6-NEXT:    v_max_f32_e32 v2, 0x7fc00000, v2
-; GFX6-NEXT:    v_med3_f32 v3, v3, 0, v4
+; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT:    v_max_f32_e32 v3, 0, v3
+; GFX6-NEXT:    v_max_f32_e32 v2, s0, v2
+; GFX6-NEXT:    v_min_f32_e32 v3, s0, v3
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX6-NEXT:    v_min_f32_e32 v2, 1.0, v2
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
@@ -3865,20 +3865,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX8-NEXT:    v_mov_b32_e32 v4, 0x7e00
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_dword v3, v[0:1]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; GFX8-NEXT:    v_mov_b32_e32 v4, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_max_f16_e32 v3, v3, v3
 ; GFX8-NEXT:    v_max_f16_e32 v2, 0, v2
-; GFX8-NEXT:    v_max_f16_e32 v3, 0x7e00, v3
+; GFX8-NEXT:    v_max_f16_e32 v3, s0, v3
 ; GFX8-NEXT:    v_min_f16_e32 v3, 1.0, v3
 ; GFX8-NEXT:    v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index 15cb404a3840a..7068ef9f99d73 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -1071,55 +1071,51 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
 ; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
 ; SDAG-GFX1100-TRUE16:       ; %bb.0:
 ; SDAG-GFX1100-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b16_e32 v6.l, v3.l
 ; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b16_e32 v3.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b16_e32 v5.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b16_e32 v2.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixlo_f16 v2, v3, v5, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT:    v_pack_b32_f16 v1, v0.l, 0
+; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b16_e32 v7.l, v2.l
+; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b16_e32 v8.l, v4.l
+; SDAG-GFX1100-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixlo_f16 v1, v1, v6, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixlo_f16 v3, v3, v7, v8 op_sel_hi:[1,1,1] clamp
 ; SDAG-GFX1100-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT:    v_pk_max_f16 v0, v2, v2 clamp
 ; SDAG-GFX1100-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b32_e32 v0, v3
 ; SDAG-GFX1100-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
 ; SDAG-GFX1100-FAKE16:       ; %bb.0:
 ; SDAG-GFX1100-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
 ; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
 ; SDAG-GFX1100-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT:    v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX1100-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT:    v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
 ; SDAG-GFX1100-FAKE16-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; SDAG-GFX1100-FAKE16-NEXT:    v_mov_b32_e32 v0, v6
 ; SDAG-GFX1100-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; SDAG-GFX900:       ; %bb.0:
-; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX900-NEXT:    v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT:    v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT:    v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX900-NEXT:    v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT:    v_pk_max_f16 v0, v6, v6 clamp
-; SDAG-GFX900-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
-; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; GFX900-NEXT:    v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT:    v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
+; GFX900-NEXT:    v_mov_b32_e32 v0, v3
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; SDAG-GFX906:       ; %bb.0:
-; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX906-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT:    v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT:    v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX906-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT:    v_pk_max_f16 v0, v6, v6 clamp
-; SDAG-GFX906-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
-; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
+; GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
+; GFX906:       ; %bb.0:
+; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT:    v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; GFX906-NEXT:    v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GFX906-NEXT:    v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX906-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
+; GFX906-NEXT:    v_mov_b32_e32 v0, v3
+; GFX906-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
 ; SDAG-VI:       ; %bb.0:
@@ -1193,26 +1189,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
 ; GISEL-GFX1100-NEXT:    v_mov_b32_e32 v0, v6
 ; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; GISEL-GFX900:       ; %bb.0:
-; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT:    v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
-; GISEL-GFX900-NEXT:    v_mov_b32_e32 v0, v3
-; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
-;
-; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; GISEL-GFX906:       ; %bb.0:
-; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT:    v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
-; GISEL-GFX906-NEXT:    v_mov_b32_e32 v0, v3
-; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
 ; GISEL-VI:       ; %bb.0:
 ; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
index 04e73ac1ea956..c6cd366497218 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
@@ -357,3 +357,52 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
   %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c)
   ret <2 x half> %v
 }
+
+declare <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half>, <2 x half>, i64)
+
+define <4 x half> @vfmax_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half> %b, <4 x half> %c) {
+; ZVFH-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfadd.vv v8, v8, v8
+; ZVFH-NEXT:    vfadd.vv v9, v9, v9
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vslideup.vi v8, v9, 2
+; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
+; ZVFH-NEXT:    vfmax.vv v8, v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfadd.vv v9, v11, v11
+; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vslideup.vi v11, v9, 2
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+  %d = fadd nnan <2 x half> %a, %a
+  %e = fadd nnan <2 x half> %b, %b
+  %f = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> undef, <2 x half> %d, i64 0)
+  %g = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> %f, <2 x half> %e, i64 2)
+  %v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %g, <4 x half> %c)
+  ret <4 x half> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
index a0334a9a5d20a..568923db83591 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
@@ -357,3 +357,52 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
   %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %c)
   ret <2 x half> %v
 }
+
+declare <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half>, <2 x half>, i64)
+
+define <4 x half> @vfmin_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half> %b, <4 x half> %c) {
+; ZVFH-LABEL: vfmin_v2f16_vv_nnan_insert_subvector:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfadd.vv v8, v8, v8
+; ZVFH-NEXT:    vfadd.vv v9, v9, v9
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vslideup.vi v8, v9, 2
+; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
+; ZVFH-NEXT:    vfmin.vv v8, v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnan_insert_subvector:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfadd.vv v9, v11, v11
+; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vslideup.vi v11, v9, 2
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+  %d = fadd nnan <2 x half> %a, %a
+  %e = fadd nnan <2 x half> %b, %b
+  %f = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> undef, <2 x half> %d, i64 0)
+  %g = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> %f, <2 x half> %e, i64 2)
+  %v = call <4 x half> @llvm.minimum.v4f16(<4 x half> %g, <4 x half> %c)
+  ret <4 x half> %v
+}

@llvmbot
Copy link
Member

llvmbot commented Mar 19, 2025

@llvm/pr-subscribers-llvm-selectiondag

Author: Jim Lin (tclin914)

Changes

Propagate nnan across insert_subvector.


Full diff: https://github.com/llvm/llvm-project/pull/131989.diff

5 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+6)
  • (modified) llvm/test/CodeGen/AMDGPU/clamp.ll (+17-17)
  • (modified) llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll (+31-55)
  • (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll (+49)
  • (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll (+49)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d1f92c9ef00e9..4b3dd5e0d6975 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5625,6 +5625,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
            (SNaN && !C->getValueAPF().isSignaling());
   }
 
+  if (Op.isUndef())
+    return true;
+
   unsigned Opcode = Op.getOpcode();
   switch (Opcode) {
   case ISD::FADD:
@@ -5727,6 +5730,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
   case ISD::EXTRACT_SUBVECTOR: {
     return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
   }
+  case ISD::INSERT_SUBVECTOR:
+    return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
+           isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
   case ISD::BUILD_VECTOR: {
     for (const SDValue &Opnd : Op->ops())
       if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1))
diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll
index 17c84d7371de1..a08228fc919b8 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp.ll
@@ -2986,14 +2986,14 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; GFX6-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; GFX6-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX6-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX6-NEXT:    v_max_f32_e32 v2, 0x7fc00000, v2
-; GFX6-NEXT:    v_med3_f32 v3, v3, 0, v4
+; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT:    v_max_f32_e32 v3, 0, v3
+; GFX6-NEXT:    v_max_f32_e32 v2, s0, v2
+; GFX6-NEXT:    v_min_f32_e32 v3, s0, v3
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX6-NEXT:    v_min_f32_e32 v2, 1.0, v2
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
@@ -3006,20 +3006,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX8-NEXT:    v_mov_b32_e32 v4, 0x7e00
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_dword v3, v[0:1]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; GFX8-NEXT:    v_mov_b32_e32 v4, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_max_f16_e32 v3, v3, v3
 ; GFX8-NEXT:    v_max_f16_e32 v2, 0, v2
-; GFX8-NEXT:    v_max_f16_e32 v3, 0x7e00, v3
+; GFX8-NEXT:    v_max_f16_e32 v3, s0, v3
 ; GFX8-NEXT:    v_min_f16_e32 v3, 1.0, v3
 ; GFX8-NEXT:    v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
@@ -3747,16 +3747,16 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; GFX6-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; GFX6-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX6-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
 ; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; GFX6-NEXT:    v_max_f32_e32 v3, 0x7fc00000, v3
+; GFX6-NEXT:    v_max_f32_e32 v3, s0, v3
+; GFX6-NEXT:    v_max_f32_e32 v2, 0, v2
 ; GFX6-NEXT:    v_min_f32_e32 v3, 1.0, v3
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; GFX6-NEXT:    v_med3_f32 v2, v2, 0, v4
+; GFX6-NEXT:    v_min_f32_e32 v2, s0, v2
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
@@ -3779,9 +3779,9 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_max_f16_e32 v3, v3, v3
-; GFX8-NEXT:    v_max_f16_e32 v2, 0x7e00, v2
+; GFX8-NEXT:    v_max_f16_e32 v2, s0, v2
 ; GFX8-NEXT:    v_max_f16_e32 v3, 0, v3
-; GFX8-NEXT:    v_min_f16_e32 v3, 0x7e00, v3
+; GFX8-NEXT:    v_min_f16_e32 v3, s0, v3
 ; GFX8-NEXT:    v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
@@ -3845,14 +3845,14 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; GFX6-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; GFX6-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; GFX6-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; GFX6-NEXT:    v_max_f32_e32 v2, 0x7fc00000, v2
-; GFX6-NEXT:    v_med3_f32 v3, v3, 0, v4
+; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX6-NEXT:    v_max_f32_e32 v3, 0, v3
+; GFX6-NEXT:    v_max_f32_e32 v2, s0, v2
+; GFX6-NEXT:    v_min_f32_e32 v3, s0, v3
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GFX6-NEXT:    v_min_f32_e32 v2, 1.0, v2
 ; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
@@ -3865,20 +3865,20 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX8-NEXT:    v_mov_b32_e32 v4, 0x7e00
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX8-NEXT:    flat_load_dword v3, v[0:1]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; GFX8-NEXT:    v_mov_b32_e32 v4, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_max_f16_sdwa v2, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_max_f16_e32 v3, v3, v3
 ; GFX8-NEXT:    v_max_f16_e32 v2, 0, v2
-; GFX8-NEXT:    v_max_f16_e32 v3, 0x7e00, v3
+; GFX8-NEXT:    v_max_f16_e32 v3, s0, v3
 ; GFX8-NEXT:    v_min_f16_e32 v3, 1.0, v3
 ; GFX8-NEXT:    v_min_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index 15cb404a3840a..7068ef9f99d73 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -1071,55 +1071,51 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
 ; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
 ; SDAG-GFX1100-TRUE16:       ; %bb.0:
 ; SDAG-GFX1100-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b16_e32 v6.l, v3.l
 ; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b16_e32 v3.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b16_e32 v5.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b16_e32 v2.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixlo_f16 v2, v3, v5, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT:    v_pack_b32_f16 v1, v0.l, 0
+; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b16_e32 v7.l, v2.l
+; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b16_e32 v8.l, v4.l
+; SDAG-GFX1100-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixlo_f16 v1, v1, v6, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixlo_f16 v3, v3, v7, v8 op_sel_hi:[1,1,1] clamp
 ; SDAG-GFX1100-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT:    v_pk_max_f16 v0, v2, v2 clamp
 ; SDAG-GFX1100-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-TRUE16-NEXT:    v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT:    v_mov_b32_e32 v0, v3
 ; SDAG-GFX1100-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
 ; SDAG-GFX1100-FAKE16:       ; %bb.0:
 ; SDAG-GFX1100-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
 ; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
 ; SDAG-GFX1100-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-FAKE16-NEXT:    v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX1100-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-FAKE16-NEXT:    v_pk_max_f16 v0, v6, v6 clamp
+; SDAG-GFX1100-FAKE16-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
 ; SDAG-GFX1100-FAKE16-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
+; SDAG-GFX1100-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; SDAG-GFX1100-FAKE16-NEXT:    v_mov_b32_e32 v0, v6
 ; SDAG-GFX1100-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; SDAG-GFX900:       ; %bb.0:
-; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX900-NEXT:    v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT:    v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT:    v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX900-NEXT:    v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX900-NEXT:    v_pk_max_f16 v0, v6, v6 clamp
-; SDAG-GFX900-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
-; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; GFX900-NEXT:    v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT:    v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX900-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
+; GFX900-NEXT:    v_mov_b32_e32 v0, v3
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; SDAG-GFX906:       ; %bb.0:
-; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX906-NEXT:    v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT:    v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT:    v_pack_b32_f16 v1, v1, 0
-; SDAG-GFX906-NEXT:    v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX906-NEXT:    v_pk_max_f16 v0, v6, v6 clamp
-; SDAG-GFX906-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
-; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
+; GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
+; GFX906:       ; %bb.0:
+; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT:    v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
+; GFX906-NEXT:    v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
+; GFX906-NEXT:    v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; GFX906-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
+; GFX906-NEXT:    v_mov_b32_e32 v0, v3
+; GFX906-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
 ; SDAG-VI:       ; %bb.0:
@@ -1193,26 +1189,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
 ; GISEL-GFX1100-NEXT:    v_mov_b32_e32 v0, v6
 ; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; GISEL-GFX900:       ; %bb.0:
-; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; GISEL-GFX900-NEXT:    v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT:    v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX900-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
-; GISEL-GFX900-NEXT:    v_mov_b32_e32 v0, v3
-; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
-;
-; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
-; GISEL-GFX906:       ; %bb.0:
-; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; GISEL-GFX906-NEXT:    v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT:    v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; GISEL-GFX906-NEXT:    v_pk_max_f16 v1, v1, v1 clamp
-; GISEL-GFX906-NEXT:    v_mov_b32_e32 v0, v3
-; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
 ; GISEL-VI:       ; %bb.0:
 ; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
index 04e73ac1ea956..c6cd366497218 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
@@ -357,3 +357,52 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
   %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c)
   ret <2 x half> %v
 }
+
+declare <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half>, <2 x half>, i64)
+
+define <4 x half> @vfmax_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half> %b, <4 x half> %c) {
+; ZVFH-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfadd.vv v8, v8, v8
+; ZVFH-NEXT:    vfadd.vv v9, v9, v9
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vslideup.vi v8, v9, 2
+; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
+; ZVFH-NEXT:    vfmax.vv v8, v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfadd.vv v9, v11, v11
+; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vslideup.vi v11, v9, 2
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+  %d = fadd nnan <2 x half> %a, %a
+  %e = fadd nnan <2 x half> %b, %b
+  %f = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> undef, <2 x half> %d, i64 0)
+  %g = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> %f, <2 x half> %e, i64 2)
+  %v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %g, <4 x half> %c)
+  ret <4 x half> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
index a0334a9a5d20a..568923db83591 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
@@ -357,3 +357,52 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
   %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %c)
   ret <2 x half> %v
 }
+
+declare <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half>, <2 x half>, i64)
+
+define <4 x half> @vfmin_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half> %b, <4 x half> %c) {
+; ZVFH-LABEL: vfmin_v2f16_vv_nnan_insert_subvector:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfadd.vv v8, v8, v8
+; ZVFH-NEXT:    vfadd.vv v9, v9, v9
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vmfeq.vv v0, v10, v10
+; ZVFH-NEXT:    vslideup.vi v8, v9, 2
+; ZVFH-NEXT:    vmerge.vvm v8, v10, v8, v0
+; ZVFH-NEXT:    vfmin.vv v8, v8, v10
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnan_insert_subvector:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfadd.vv v9, v11, v11
+; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vslideup.vi v11, v9, 2
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT:    vmerge.vvm v10, v8, v9, v0
+; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
+; ZVFHMIN-NEXT:    vmerge.vvm v8, v9, v8, v0
+; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    ret
+  %d = fadd nnan <2 x half> %a, %a
+  %e = fadd nnan <2 x half> %b, %b
+  %f = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> undef, <2 x half> %d, i64 0)
+  %g = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> %f, <2 x half> %e, i64 2)
+  %v = call <4 x half> @llvm.minimum.v4f16(<4 x half> %g, <4 x half> %c)
+  ret <4 x half> %v
+}

Copy link

⚠️ undef deprecator found issues in your code. ⚠️

You can test this locally with the following command:
git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 57288136fecfac35ef161cb8f147d269715d2634 1ece27bec3727f004acf2e5a83753af667f2491b llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/test/CodeGen/AMDGPU/clamp.ll llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll

The following files introduce new uses of undef:

  • llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
  • llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll

Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields undef. You should use poison values for placeholders instead.

In tests, avoid using undef and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.

For example, this is considered a bad practice:

define void @fn() {
  ...
  br i1 undef, ...
}

Please use the following instead:

define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}

Please refer to the Undefined Behavior Manual for more information.

Comment on lines +5628 to +5629
if (Op.isUndef())
return true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is incorrect. You could do this for poison, but not undef tttt

@@ -5727,6 +5730,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::EXTRACT_SUBVECTOR: {
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
}
case ISD::INSERT_SUBVECTOR:
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might need to consider adding DemandedElts handling to isKnownNeverNaN to correctly handle concatenation patterns:

insert_subvector(insert_subvector(undef,lo,0),hi,n/2)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU llvm:SelectionDAG SelectionDAGISel as well
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants