From e4a7f0182d4017593504982d72725c6fefa5c903 Mon Sep 17 00:00:00 2001
From: Richard Trieu <rtrieu@google.com>
Date: Thu, 29 Aug 2019 00:46:57 +0000
Subject: [PATCH 1/9] Add requirement to test.

-debug-only option for llc is only available in debug builds so
"REQUIRES: asserts" is needed in the tes.

llvm-svn: 370279
---
 llvm/test/CodeGen/AArch64/GlobalISel/call-translator-musttail.ll | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-musttail.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-musttail.ll
index a93184e237e170..7d6ba8489b70e3 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-musttail.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-musttail.ll
@@ -1,4 +1,5 @@
 ; RUN: not llc %s -mtriple aarch64-unknown-unknown -debug-only=aarch64-call-lowering -global-isel -o - 2>&1 | FileCheck %s
+; REQUIRES: asserts
 
 ; CHECK: Cannot lower musttail calls yet.
 ; CHECK-NEXT: LLVM ERROR: unable to translate instruction: call (in function: foo)

From 8ec5c1004265f5da323dfc3b2fe929c0557804c8 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 29 Aug 2019 01:13:41 +0000
Subject: [PATCH 2/9] GlobalISel/TableGen: Handle setcc patterns

This is a special case because one node maps to two different G_
instructions, and the operand order is changed.

This mostly enables G_FCMP for AMDPGPU. G_ICMP is still manually
selected for now since it has the SALU and VALU complication to deal
with.

llvm-svn: 370280
---
 .../CodeGen/GlobalISel/InstructionSelector.h  |   6 +
 .../GlobalISel/InstructionSelectorImpl.h      |  16 +-
 .../Target/GlobalISel/SelectionDAGCompat.td   |   9 +
 .../include/llvm/Target/TargetSelectionDAG.td |  36 +-
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |   7 +-
 llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td |   2 +-
 .../AMDGPU/GlobalISel/inst-select-fcmp.mir    | 799 ++++++++++++++++++
 .../GlobalISel/inst-select-fcmp.s16.mir       | 441 ++++++++++
 .../GlobalISel/instruction-select/mul.mir     |   8 +-
 .../Mips/GlobalISel/llvm-ir/bitwise.ll        |  10 +-
 .../CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll   |   4 -
 .../CodeGen/Mips/GlobalISel/llvm-ir/mul.ll    |   2 -
 .../Common/GlobalISelEmitterCommon.td         |   1 +
 llvm/test/TableGen/GlobalISelEmitter-setcc.td |  24 +
 llvm/utils/TableGen/GlobalISelEmitter.cpp     |  69 +-
 15 files changed, 1399 insertions(+), 35 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir
 create mode 100644 llvm/test/TableGen/GlobalISelEmitter-setcc.td

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index 9da899b6c8dac2..62010238d24cc8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -208,6 +208,12 @@ enum {
   /// - Expected Intrinsic ID
   GIM_CheckIntrinsicID,
 
+  /// Check the operand is a specific predicate
+  /// - InsnID - Instruction ID
+  /// - OpIdx - Operand index
+  /// - Expected predicate
+  GIM_CheckCmpPredicate,
+
   /// Check the specified operand is an MBB
   /// - InsnID - Instruction ID
   /// - OpIdx - Operand index
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
index 14bd4b2fcebbf0..7be95ee6889bd2 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
@@ -662,7 +662,21 @@ bool InstructionSelector::executeMatchTable(
           return false;
       break;
     }
-
+    case GIM_CheckCmpPredicate: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+      int64_t Value = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIM_CheckCmpPredicate(MIs["
+                             << InsnID << "]->getOperand(" << OpIdx
+                             << "), Value=" << Value << ")\n");
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+      MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+      if (!MO.isPredicate() || MO.getPredicate() != Value)
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      break;
+    }
     case GIM_CheckIsMBB: {
       int64_t InsnID = MatchTable[CurrentIdx++];
       int64_t OpIdx = MatchTable[CurrentIdx++];
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 3da336341bc983..87f0aa4aa1bd30 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -34,6 +34,10 @@ class GINodeEquiv<Instruction i, SDNode node> {
   // depending on the predicates on the node.
   Instruction IfSignExtend = ?;
   Instruction IfZeroExtend = ?;
+
+  // SelectionDAG has one setcc for all compares. This differentiates
+  // for G_ICMP and G_FCMP.
+  Instruction IfFloatingPoint = ?;
 }
 
 // These are defined in the same order as the G_* instructions.
@@ -122,6 +126,11 @@ def : GINodeEquiv<G_LOAD, ld> {
   let IfSignExtend = G_SEXTLOAD;
   let IfZeroExtend = G_ZEXTLOAD;
 }
+
+def : GINodeEquiv<G_ICMP, setcc> {
+  let IfFloatingPoint = G_FCMP;
+}
+
 // Broadly speaking G_STORE is equivalent to ISD::STORE but there are some
 // complications that tablegen must take care of. For example, predicates such
 // as isTruncStore require that this is not a perfect 1:1 mapping since a
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 9e30ca1f36d510..e474287ca0e08e 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -643,16 +643,32 @@ def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>;
 //===----------------------------------------------------------------------===//
 // Selection DAG Condition Codes
 
-class CondCode; // ISD::CondCode enums
-def SETOEQ : CondCode; def SETOGT : CondCode;
-def SETOGE : CondCode; def SETOLT : CondCode; def SETOLE : CondCode;
-def SETONE : CondCode; def SETO   : CondCode; def SETUO  : CondCode;
-def SETUEQ : CondCode; def SETUGT : CondCode; def SETUGE : CondCode;
-def SETULT : CondCode; def SETULE : CondCode; def SETUNE : CondCode;
-
-def SETEQ : CondCode; def SETGT : CondCode; def SETGE : CondCode;
-def SETLT : CondCode; def SETLE : CondCode; def SETNE : CondCode;
-
+class CondCode<string fcmpName = "", string icmpName = ""> {
+  string ICmpPredicate = icmpName;
+  string FCmpPredicate = fcmpName;
+}
+
+// ISD::CondCode enums, and mapping to CmpInst::Predicate names
+def SETOEQ : CondCode<"FCMP_OEQ">;
+def SETOGT : CondCode<"FCMP_OGT">;
+def SETOGE : CondCode<"FCMP_OGE">;
+def SETOLT : CondCode<"FCMP_OLT">;
+def SETOLE : CondCode<"FCMP_OLE">;
+def SETONE : CondCode<"FCMP_ONE">;
+def SETO   : CondCode<"FCMP_ORD">;
+def SETUO  : CondCode<"FCMP_UNO">;
+def SETUEQ : CondCode<"FCMP_UEQ">;
+def SETUGT : CondCode<"FCMP_UGT", "ICMP_UGT">;
+def SETUGE : CondCode<"FCMP_UGE", "ICMP_UGE">;
+def SETULT : CondCode<"FCMP_ULT", "ICMP_ULT">;
+def SETULE : CondCode<"FCMP_ULE", "ICMP_ULE">;
+def SETUNE : CondCode<"FCMP_UNE">;
+def SETEQ : CondCode<"", "ICMP_EQ">;
+def SETGT : CondCode<"", "ICMP_SGT">;
+def SETGE : CondCode<"", "ICMP_SGE">;
+def SETLT : CondCode<"", "ICMP_SLT">;
+def SETLE : CondCode<"", "ICMP_SLE">;
+def SETNE : CondCode<"", "ICMP_NE">;
 
 //===----------------------------------------------------------------------===//
 // Selection DAG Node Transformation Functions.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 361bf7ab9683f1..d0382bf3772ea6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -163,11 +163,10 @@ unsigned AMDGPURegisterBankInfo::getBreakDownCost(
 
 const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
     const TargetRegisterClass &RC) const {
+  if (&RC == &AMDGPU::SReg_1RegClass)
+    return AMDGPU::VCCRegBank;
 
-  if (TRI->isSGPRClass(&RC))
-    return getRegBank(AMDGPU::SGPRRegBankID);
-
-  return getRegBank(AMDGPU::VGPRRegBankID);
+  return TRI->isSGPRClass(&RC) ? AMDGPU::SGPRRegBank : AMDGPU::VGPRRegBank;
 }
 
 template <unsigned NumOps>
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index 9555694fb1064f..2ecb186cc88f6b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -17,4 +17,4 @@ def VGPRRegBank : RegisterBank<"VGPR",
 def SCCRegBank : RegisterBank <"SCC", [SReg_32, SCC_CLASS]>;
 
 // It is helpful to distinguish conditions from ordinary SGPRs.
-def VCCRegBank : RegisterBank <"VCC", [SReg_64]>;
+def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir
new file mode 100644
index 00000000000000..9f891e96d22878
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir
@@ -0,0 +1,799 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE32 %s
+
+---
+name: fcmp_false_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_false_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; WAVE64: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[COPY]](s32), [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[FCMP]](s1)
+    ; WAVE32-LABEL: name: fcmp_false_s32_vv
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; WAVE32: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[COPY]](s32), [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[FCMP]](s1)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(false), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_oeq_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_oeq_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_EQ_F32_e64_]]
+    ; WAVE32-LABEL: name: fcmp_oeq_s32_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_EQ_F32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(oeq), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_ogt_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_ogt_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_GT_F32_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ogt_s32_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_GT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_GT_F32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(ogt), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_oge_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_oge_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_GE_F32_e64_]]
+    ; WAVE32-LABEL: name: fcmp_oge_s32_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_GE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_GE_F32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(oge), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_olt_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_olt_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_LT_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LT_F32_e64_]]
+    ; WAVE32-LABEL: name: fcmp_olt_s32_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_LT_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_LT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LT_F32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(olt), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_ole_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_ole_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_LE_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LE_F32_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ole_s32_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_LE_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_LE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LE_F32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(ole), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_one_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_one_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_LG_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LG_F32_e64_]]
+    ; WAVE32-LABEL: name: fcmp_one_s32_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_LG_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_LG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LG_F32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(one), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_ord_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_ord_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_O_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_O_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_O_F32_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ord_s32_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_O_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_O_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_O_F32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(ord), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_uno_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_uno_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_U_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_U_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_U_F32_e64_]]
+    ; WAVE32-LABEL: name: fcmp_uno_s32_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_U_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_U_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_U_F32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(uno), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_ueq_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_ueq_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NLG_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLG_F32_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ueq_s32_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NLG_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NLG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLG_F32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(ueq), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_ugt_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_ugt_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NLE_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLE_F32_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ugt_s32_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NLE_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NLE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLE_F32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(ugt), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_uge_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_uge_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLT_F32_e64_]]
+    ; WAVE32-LABEL: name: fcmp_uge_s32_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NLT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLT_F32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(uge), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_ult_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_ult_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NGE_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NGE_F32_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ult_s32_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NGE_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NGE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NGE_F32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(ult), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_ule_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_ule_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NGT_F32_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ule_s32_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NGT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NGT_F32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(ule), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_une_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_une_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NEQ_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NEQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NEQ_F32_e64_]]
+    ; WAVE32-LABEL: name: fcmp_une_s32_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NEQ_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NEQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NEQ_F32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(une), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_true_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_true_s32_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; WAVE64: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[COPY]](s32), [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[FCMP]](s1)
+    ; WAVE32-LABEL: name: fcmp_true_s32_vv
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; WAVE32: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[COPY]](s32), [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[FCMP]](s1)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_FCMP floatpred(true), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_false_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_false_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+    ; WAVE64: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[COPY]](s64), [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[FCMP]](s1)
+    ; WAVE32-LABEL: name: fcmp_false_s64_vv
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+    ; WAVE32: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[COPY]](s64), [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[FCMP]](s1)
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(false), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_oeq_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_oeq_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE64: [[V_CMP_EQ_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_EQ_F64_e64_]]
+    ; WAVE32-LABEL: name: fcmp_oeq_s64_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE32: [[V_CMP_EQ_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_EQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_EQ_F64_e64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(oeq), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_ogt_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_ogt_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE64: [[V_CMP_GT_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_GT_F64_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ogt_s64_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE32: [[V_CMP_GT_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_GT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_GT_F64_e64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(ogt), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_oge_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_oge_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE64: [[V_CMP_GE_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_GE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_GE_F64_e64_]]
+    ; WAVE32-LABEL: name: fcmp_oge_s64_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE32: [[V_CMP_GE_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_GE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_GE_F64_e64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(oge), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_olt_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_olt_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE64: [[V_CMP_LT_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LT_F64_e64_]]
+    ; WAVE32-LABEL: name: fcmp_olt_s64_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE32: [[V_CMP_LT_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_LT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LT_F64_e64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(olt), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_ole_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_ole_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE64: [[V_CMP_LE_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LE_F64_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ole_s64_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE32: [[V_CMP_LE_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_LE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LE_F64_e64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(ole), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_one_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_one_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE64: [[V_CMP_LG_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LG_F64_e64_]]
+    ; WAVE32-LABEL: name: fcmp_one_s64_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE32: [[V_CMP_LG_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_LG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LG_F64_e64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(one), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_ord_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_ord_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE64: [[V_CMP_O_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_O_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_O_F64_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ord_s64_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE32: [[V_CMP_O_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_O_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_O_F64_e64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(ord), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_uno_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_uno_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE64: [[V_CMP_U_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_U_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_U_F64_e64_]]
+    ; WAVE32-LABEL: name: fcmp_uno_s64_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE32: [[V_CMP_U_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_U_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_U_F64_e64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(uno), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_ueq_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_ueq_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE64: [[V_CMP_NLG_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLG_F64_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ueq_s64_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE32: [[V_CMP_NLG_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NLG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLG_F64_e64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(ueq), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_ugt_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_ugt_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE64: [[V_CMP_NLE_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLE_F64_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ugt_s64_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE32: [[V_CMP_NLE_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NLE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLE_F64_e64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(ugt), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_uge_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_uge_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE64: [[V_CMP_NLT_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLT_F64_e64_]]
+    ; WAVE32-LABEL: name: fcmp_uge_s64_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE32: [[V_CMP_NLT_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NLT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLT_F64_e64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(uge), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_ult_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_ult_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE64: [[V_CMP_NGE_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NGE_F64_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ult_s64_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE32: [[V_CMP_NGE_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NGE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NGE_F64_e64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(ult), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_ule_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_ule_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE64: [[V_CMP_NGT_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NGT_F64_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ule_s64_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE32: [[V_CMP_NGT_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NGT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NGT_F64_e64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(ule), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_une_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_une_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE64: [[V_CMP_NEQ_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NEQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NEQ_F64_e64_]]
+    ; WAVE32-LABEL: name: fcmp_une_s64_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; WAVE32: [[V_CMP_NEQ_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NEQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NEQ_F64_e64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(une), %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: fcmp_true_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: fcmp_true_s64_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+    ; WAVE64: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[COPY]](s64), [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[FCMP]](s1)
+    ; WAVE32-LABEL: name: fcmp_true_s64_vv
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+    ; WAVE32: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[COPY]](s64), [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[FCMP]](s1)
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vcc(s1) = G_FCMP floatpred(true), %0, %1
+    S_ENDPGM 0, implicit %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir
new file mode 100644
index 00000000000000..99cba9351a5442
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir
@@ -0,0 +1,441 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s  | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s  | FileCheck -check-prefix=WAVE32 %s
+
+---
+name: fcmp_false_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_false_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; WAVE64: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE64: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE64: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[TRUNC]](s16), [[TRUNC1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[FCMP]](s1)
+    ; WAVE32-LABEL: name: fcmp_false_s16_vv
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; WAVE32: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE32: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE32: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[TRUNC]](s16), [[TRUNC1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[FCMP]](s1)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(false), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+name: fcmp_oeq_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_oeq_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_EQ_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_e64_]]
+    ; WAVE32-LABEL: name: fcmp_oeq_s16_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_EQ_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(oeq), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+name: fcmp_ogt_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_ogt_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_GT_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_GT_F16_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ogt_s16_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_GT_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_GT_F16_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(ogt), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+name: fcmp_oge_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_oge_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_GE_F16_e64_]]
+    ; WAVE32-LABEL: name: fcmp_oge_s16_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_GE_F16_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(oge), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+name: fcmp_olt_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_olt_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_LT_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LT_F16_e64_]]
+    ; WAVE32-LABEL: name: fcmp_olt_s16_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_LT_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LT_F16_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(olt), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+name: fcmp_ole_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_ole_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_LE_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LE_F16_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ole_s16_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_LE_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LE_F16_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(ole), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+---
+name: fcmp_one_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_one_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]]
+    ; WAVE32-LABEL: name: fcmp_one_s16_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(one), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+name: fcmp_ord_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_ord_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ord_s16_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(one), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+name: fcmp_uno_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_uno_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_U_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_U_F16_e64_]]
+    ; WAVE32-LABEL: name: fcmp_uno_s16_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_U_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_U_F16_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(uno), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+name: fcmp_ueq_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_ueq_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NLG_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLG_F16_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ueq_s16_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NLG_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLG_F16_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(ueq), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+name: fcmp_ugt_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_ugt_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NLE_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLE_F16_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ugt_s16_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NLE_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLE_F16_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(ugt), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+name: fcmp_uge_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_uge_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NLT_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLT_F16_e64_]]
+    ; WAVE32-LABEL: name: fcmp_uge_s16_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NLT_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLT_F16_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(uge), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+name: fcmp_ult_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_ult_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NGE_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NGE_F16_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ult_s16_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NGE_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NGE_F16_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(ult), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+name: fcmp_ule_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_ule_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NGT_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NGT_F16_e64_]]
+    ; WAVE32-LABEL: name: fcmp_ule_s16_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NGT_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NGT_F16_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(ule), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+name: fcmp_une_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_une_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NEQ_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NEQ_F16_e64_]]
+    ; WAVE32-LABEL: name: fcmp_une_s16_vv
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NEQ_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NEQ_F16_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(une), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+name: fcmp_true_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: fcmp_true_s16_vv
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; WAVE64: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE64: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE64: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[TRUNC]](s16), [[TRUNC1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[FCMP]](s1)
+    ; WAVE32-LABEL: name: fcmp_true_s16_vv
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; WAVE32: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE32: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE32: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[TRUNC]](s16), [[TRUNC1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[FCMP]](s1)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vcc(s1) = G_FCMP floatpred(true), %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/mul.mir
index ff738927b6fbe6..6b4246b35bce70 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/mul.mir
+++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/mul.mir
@@ -49,11 +49,9 @@ body:             |
     ; MIPS32: [[MUL:%[0-9]+]]:gpr32 = MUL [[COPY]], [[COPY1]], implicit-def dead $hi0, implicit-def dead $lo0
     ; MIPS32: [[PseudoMULTu:%[0-9]+]]:acc64 = PseudoMULTu [[COPY]], [[COPY1]]
     ; MIPS32: [[PseudoMFHI:%[0-9]+]]:gpr32 = PseudoMFHI [[PseudoMULTu]]
-    ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 0
-    ; MIPS32: [[XOR:%[0-9]+]]:gpr32 = XOR [[PseudoMFHI]], [[ORi]]
-    ; MIPS32: [[SLTu:%[0-9]+]]:gpr32 = SLTu $zero, [[XOR]]
-    ; MIPS32: [[ORi1:%[0-9]+]]:gpr32 = ORi $zero, 1
-    ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[SLTu]], [[ORi1]]
+    ; MIPS32: [[SLTu:%[0-9]+]]:gpr32 = SLTu $zero, [[PseudoMFHI]]
+    ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1
+    ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[SLTu]], [[ORi]]
     ; MIPS32: SB [[AND]], [[COPY3]], 0 :: (store 1 into %ir.pcarry_flag)
     ; MIPS32: SW [[MUL]], [[COPY2]], 0 :: (store 4 into %ir.pmul)
     ; MIPS32: RetRA
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll
index 1043f0483c2ea3..2e918adb1251f2 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll
@@ -290,8 +290,7 @@ define i64 @shl_i64(i64 %a, i64 %b) {
 ; MIPS32-NEXT:    subu $3, $1, $6
 ; MIPS32-NEXT:    ori $8, $zero, 0
 ; MIPS32-NEXT:    sltu $1, $6, $1
-; MIPS32-NEXT:    xor $9, $6, $8
-; MIPS32-NEXT:    sltiu $9, $9, 1
+; MIPS32-NEXT:    sltiu $9, $6, 1
 ; MIPS32-NEXT:    sllv $10, $4, $6
 ; MIPS32-NEXT:    srlv $3, $4, $3
 ; MIPS32-NEXT:    sllv $6, $5, $6
@@ -321,10 +320,8 @@ define i64 @ashl_i64(i64 %a, i64 %b) {
 ; MIPS32-NEXT:    ori $1, $zero, 32
 ; MIPS32-NEXT:    subu $2, $6, $1
 ; MIPS32-NEXT:    subu $3, $1, $6
-; MIPS32-NEXT:    ori $8, $zero, 0
 ; MIPS32-NEXT:    sltu $1, $6, $1
-; MIPS32-NEXT:    xor $8, $6, $8
-; MIPS32-NEXT:    sltiu $8, $8, 1
+; MIPS32-NEXT:    sltiu $8, $6, 1
 ; MIPS32-NEXT:    srav $9, $5, $6
 ; MIPS32-NEXT:    srlv $6, $4, $6
 ; MIPS32-NEXT:    sllv $3, $5, $3
@@ -354,8 +351,7 @@ define i64 @lshr_i64(i64 %a, i64 %b) {
 ; MIPS32-NEXT:    subu $3, $1, $6
 ; MIPS32-NEXT:    ori $8, $zero, 0
 ; MIPS32-NEXT:    sltu $1, $6, $1
-; MIPS32-NEXT:    xor $9, $6, $8
-; MIPS32-NEXT:    sltiu $9, $9, 1
+; MIPS32-NEXT:    sltiu $9, $6, 1
 ; MIPS32-NEXT:    srlv $10, $5, $6
 ; MIPS32-NEXT:    srlv $6, $4, $6
 ; MIPS32-NEXT:    sllv $3, $5, $3
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll
index bb098761f33487..b336e957c50d76 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll
@@ -164,8 +164,6 @@ define i1 @eq_i64(i64 %a, i64 %b){
 ; MIPS32-NEXT:    xor $1, $4, $6
 ; MIPS32-NEXT:    xor $2, $5, $7
 ; MIPS32-NEXT:    or $1, $1, $2
-; MIPS32-NEXT:    ori $2, $zero, 0
-; MIPS32-NEXT:    xor $1, $1, $2
 ; MIPS32-NEXT:    sltiu $2, $1, 1
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
@@ -180,8 +178,6 @@ define i1 @ne_i64(i64 %a, i64 %b) {
 ; MIPS32-NEXT:    xor $1, $4, $6
 ; MIPS32-NEXT:    xor $2, $5, $7
 ; MIPS32-NEXT:    or $1, $1, $2
-; MIPS32-NEXT:    ori $2, $zero, 0
-; MIPS32-NEXT:    xor $1, $1, $2
 ; MIPS32-NEXT:    sltu $2, $zero, $1
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
index 43b4b63f66e63a..09a7439aaa5355 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
@@ -186,8 +186,6 @@ define void @umul_with_overflow(i32 %lhs, i32 %rhs, i32* %pmul, i1* %pcarry_flag
 ; MIPS32-NEXT:    mul $1, $4, $5
 ; MIPS32-NEXT:    multu $4, $5
 ; MIPS32-NEXT:    mfhi $2
-; MIPS32-NEXT:    ori $3, $zero, 0
-; MIPS32-NEXT:    xor $2, $2, $3
 ; MIPS32-NEXT:    sltu $2, $zero, $2
 ; MIPS32-NEXT:    ori $3, $zero, 1
 ; MIPS32-NEXT:    and $2, $2, $3
diff --git a/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td b/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td
index 2e3332c1390e7a..f96e0fec760588 100644
--- a/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td
+++ b/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td
@@ -7,6 +7,7 @@ def GPR32 : RegisterClass<"MyTarget", [i32], 32, (add R0)>;
 def GPR32Op : RegisterOperand<GPR32>;
 def F0 : Register<"f0"> { let Namespace = "MyTarget"; }
 def FPR32 : RegisterClass<"MyTarget", [f32], 32, (add F0)>;
+def FPR32Op : RegisterOperand<FPR32>;
 def p0 : PtrValueType <i32, 0>;
 
 class I<dag OOps, dag IOps, list<dag> Pat>
diff --git a/llvm/test/TableGen/GlobalISelEmitter-setcc.td b/llvm/test/TableGen/GlobalISelEmitter-setcc.td
new file mode 100644
index 00000000000000..1bad1754f64d11
--- /dev/null
+++ b/llvm/test/TableGen/GlobalISelEmitter-setcc.td
@@ -0,0 +1,24 @@
+// RUN: llvm-tblgen -gen-global-isel -warn-on-skipped-patterns -optimize-match-table=false -I %p/../../include -I %p/Common %s -o - 2> %t < %s | FileCheck -check-prefix=GISEL %s
+// RUN: FileCheck -DFILE=%s -check-prefix=ERR %s < %t
+
+include "llvm/Target/Target.td"
+include "GlobalISelEmitterCommon.td"
+
+// GISEL: GIM_Try
+// GISEL: GIM_CheckNumOperands, /*MI*/0, /*Expected*/4,
+// GISEL-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_FCMP,
+// GISEL: GIM_CheckCmpPredicate, /*MI*/0, /*Op*/1, /*Predicate*/CmpInst::FCMP_OEQ,
+def FCMPOEQ : I<(outs GPR32:$dst), (ins FPR32Op:$src0, FPR32:$src1),
+              [(set GPR32:$dst, (i32 (setcc f32:$src0, f32:$src1, SETOEQ)))]>;
+
+// GISEL: GIM_Try
+// GISEL: GIM_CheckNumOperands, /*MI*/0, /*Expected*/4,
+// GISEL-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_ICMP,
+// GISEL: GIM_CheckCmpPredicate, /*MI*/0, /*Op*/1, /*Predicate*/CmpInst::ICMP_EQ,
+def ICMPEQ : I<(outs GPR32:$dst), (ins GPR32Op:$src0, GPR32:$src1),
+               [(set GPR32:$dst, (i32 (setcc i32:$src0, i32:$src1, SETEQ)))]>;
+
+// Check there is an error if not a CondCode operand.
+// ERR: [[FILE]]:[[@LINE+1]]:1: warning: Skipped pattern: Unable to handle CondCode
+def FCMP_NOTCC : I<(outs GPR32:$dst), (ins FPR32Op:$src0, FPR32:$src1),
+                   [(set GPR32:$dst, (i32 (setcc f32:$src0, f32:$src1, i32)))]>;
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 60fa0ac69ba45c..774c4ef818d553 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -1064,6 +1064,7 @@ class PredicateMatcher {
     OPM_SameOperand,
     OPM_ComplexPattern,
     OPM_IntrinsicID,
+    OPM_CmpPredicate,
     OPM_Instruction,
     OPM_Int,
     OPM_LiteralInt,
@@ -1389,6 +1390,36 @@ class LiteralIntOperandMatcher : public OperandPredicateMatcher {
   }
 };
 
+/// Generates code to check that an operand is an CmpInst predicate
+class CmpPredicateOperandMatcher : public OperandPredicateMatcher {
+protected:
+  std::string PredName;
+
+public:
+  CmpPredicateOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
+                             std::string P)
+    : OperandPredicateMatcher(OPM_CmpPredicate, InsnVarID, OpIdx), PredName(P) {}
+
+  bool isIdentical(const PredicateMatcher &B) const override {
+    return OperandPredicateMatcher::isIdentical(B) &&
+           PredName == cast<CmpPredicateOperandMatcher>(&B)->PredName;
+  }
+
+  static bool classof(const PredicateMatcher *P) {
+    return P->getKind() == OPM_CmpPredicate;
+  }
+
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIM_CheckCmpPredicate")
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+          << MatchTable::Comment("Predicate")
+          << MatchTable::NamedValue("CmpInst", PredName)
+          << MatchTable::LineBreak;
+  }
+};
+
 /// Generates code to check that an operand is an intrinsic ID.
 class IntrinsicIDOperandMatcher : public OperandPredicateMatcher {
 protected:
@@ -3256,6 +3287,13 @@ Record *GlobalISelEmitter::findNodeEquiv(Record *N) const {
 
 const CodeGenInstruction *
 GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const {
+  if (N->getNumChildren() >= 1) {
+    // setcc operation maps to two different G_* instructions based on the type.
+    if (!Equiv.isValueUnset("IfFloatingPoint") &&
+        MVT(N->getChild(0)->getSimpleType(0)).isFloatingPoint())
+      return &Target.getInstruction(Equiv.getValueAsDef("IfFloatingPoint"));
+  }
+
   for (const TreePredicateCall &Call : N->getPredicateCalls()) {
     const TreePredicateFn &Predicate = Call.Fn;
     if (!Equiv.isValueUnset("IfSignExtend") && Predicate.isLoad() &&
@@ -3265,6 +3303,7 @@ GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const {
         Predicate.isZeroExtLoad())
       return &Target.getInstruction(Equiv.getValueAsDef("IfZeroExtend"));
   }
+
   return &Target.getInstruction(Equiv.getValueAsDef("I"));
 }
 
@@ -3505,6 +3544,34 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
       return InsnMatcher;
     }
 
+    // Special case because the operand order is changed from setcc. The
+    // predicate operand needs to be swapped from the last operand to the first
+    // source.
+
+    unsigned NumChildren = Src->getNumChildren();
+    bool IsFCmp = SrcGIOrNull->TheDef->getName() == "G_FCMP";
+
+    if (IsFCmp || SrcGIOrNull->TheDef->getName() == "G_ICMP") {
+      TreePatternNode *SrcChild = Src->getChild(NumChildren - 1);
+      if (SrcChild->isLeaf()) {
+        DefInit *DI = dyn_cast<DefInit>(SrcChild->getLeafValue());
+        Record *CCDef = DI ? DI->getDef() : nullptr;
+        if (!CCDef || !CCDef->isSubClassOf("CondCode"))
+          return failedImport("Unable to handle CondCode");
+
+        OperandMatcher &OM =
+          InsnMatcher.addOperand(OpIdx++, SrcChild->getName(), TempOpIdx);
+        StringRef PredType = IsFCmp ? CCDef->getValueAsString("FCmpPredicate") :
+                                      CCDef->getValueAsString("ICmpPredicate");
+
+        if (!PredType.empty()) {
+          OM.addPredicate<CmpPredicateOperandMatcher>(PredType);
+          // Process the other 2 operands normally.
+          --NumChildren;
+        }
+      }
+    }
+
     // Match the used operands (i.e. the children of the operator).
     bool IsIntrinsic =
         SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" ||
@@ -3513,7 +3580,7 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
     if (IsIntrinsic && !II)
       return failedImport("Expected IntInit containing intrinsic ID)");
 
-    for (unsigned i = 0, e = Src->getNumChildren(); i != e; ++i) {
+    for (unsigned i = 0; i != NumChildren; ++i) {
       TreePatternNode *SrcChild = Src->getChild(i);
 
       // SelectionDAG allows pointers to be represented with iN since it doesn't

From 216d8ff60b51ce112eab9c887cc5a82895fc69e1 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 29 Aug 2019 01:13:47 +0000
Subject: [PATCH 3/9] AMDGPU: Don't use frame virtual registers

SGPR spills aren't really handled after SILowerSGPRSpills. In order to
directly control what happens if the scavenger needs to spill, the
scavenger needs to be used directly. There is an alternative to
spilling in these contexts anyway since the frame register can be
increment and restored.

This does present another possible issue if spilling is needed for the
unused carry out if an add is needed. I think this can be avoided by
using a scalar add (although that clobbers SCC, which happens anyway).

llvm-svn: 370281
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        | 17 ++++
 llvm/lib/Target/AMDGPU/SIInstrInfo.h          |  6 ++
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     | 84 ++++++++++---------
 .../CodeGen/AMDGPU/frame-index-elimination.ll | 34 ++++----
 .../test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir | 42 ++++++++++
 llvm/test/CodeGen/AMDGPU/spill-m0.ll          |  8 +-
 6 files changed, 128 insertions(+), 63 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0f4fc5bbe276f8..7b7c34ed8a244e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6087,6 +6087,23 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
            .addReg(UnusedCarry, RegState::Define | RegState::Dead);
 }
 
+MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator I,
+                                               const DebugLoc &DL,
+                                               Register DestReg,
+                                               RegScavenger &RS) const {
+  if (ST.hasAddNoCarry())
+    return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
+
+  Register UnusedCarry = RS.scavengeRegister(RI.getBoolRC(), I, 0, false);
+  // TODO: Users need to deal with this.
+  if (!UnusedCarry.isValid())
+    report_fatal_error("failed to scavenge unused carry-out SGPR");
+
+  return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
+           .addReg(UnusedCarry, RegState::Define | RegState::Dead);
+}
+
 bool SIInstrInfo::isKillTerminator(unsigned Opcode) {
   switch (Opcode) {
   case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 3ff35da0b96307..2e629c47256681 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -963,6 +963,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
                                     const DebugLoc &DL,
                                     unsigned DestReg) const;
 
+  MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I,
+                                    const DebugLoc &DL,
+                                    Register DestReg,
+                                    RegScavenger &RS) const;
+
   static bool isKillTerminator(unsigned Opcode);
   const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
 
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 7cc7d32dc50ee1..29f50503ad5ee5 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -302,32 +302,17 @@ bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const
 
 bool SIRegisterInfo::requiresFrameIndexScavenging(
   const MachineFunction &MF) const {
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
-  if (MFI.hasStackObjects())
-    return true;
-
-  // May need to deal with callee saved registers.
-  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-  return !Info->isEntryFunction();
+  // Do not use frame virtual registers. They used to be used for SGPRs, but
+  // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
+  // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
+  // spill.
+  return false;
 }
 
 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
   const MachineFunction &MF) const {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
-  if (!MFI.hasStackObjects())
-    return false;
-
-  // The scavenger is used for large frames which may require finding a free
-  // register for large offsets.
-  if (!isUInt<12>(MFI.getStackSize()))
-    return true;
-
-  // If using scalar stores, for spills, m0 is needed for the scalar store
-  // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual
-  // register for it during frame index elimination, so the scavenger is
-  // directly needed.
-  return MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
-         MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
+  return MFI.hasStackObjects();
 }
 
 bool SIRegisterInfo::requiresVirtualBaseRegisters(
@@ -804,7 +789,6 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
   if (OnlyToVGPR && !SpillToVGPR)
     return false;
 
-  MachineRegisterInfo &MRI = MF->getRegInfo();
   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
 
@@ -831,7 +815,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
 
   if (SpillToSMEM) {
     if (RS->isRegUsed(AMDGPU::M0)) {
-      M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+      M0CopyReg = RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
         .addReg(AMDGPU::M0);
     }
@@ -850,6 +834,10 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
 
+  // Scavenged temporary VGPR to use. It must be scavenged once for any number
+  // of spilled subregs.
+  Register TmpVGPR;
+
   // SubReg carries the "Kill" flag when SubReg == SuperReg.
   unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
@@ -926,14 +914,14 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
 
       // Spill SGPR to a frame index.
       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
-      Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      if (!TmpVGPR.isValid())
+        TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
 
       MachineInstrBuilder Mov
-        = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+        = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
         .addReg(SubReg, SubKillState);
 
-
       // There could be undef components of a spilled super register.
       // TODO: Can we detect this and skip the spill?
       if (NumSubRegs > 1) {
@@ -951,7 +939,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                    EltSize, MinAlign(Align, EltSize * i));
       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
-        .addReg(TmpReg, RegState::Kill)       // src
+        .addReg(TmpVGPR, RegState::Kill)      // src
         .addFrameIndex(Index)                 // vaddr
         .addReg(MFI->getScratchRSrcReg())     // srrsrc
         .addReg(MFI->getStackPtrOffsetReg())  // soffset
@@ -975,7 +963,6 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
                                  RegScavenger *RS,
                                  bool OnlyToVGPR) const {
   MachineFunction *MF = MI->getParent()->getParent();
-  MachineRegisterInfo &MRI = MF->getRegInfo();
   MachineBasicBlock *MBB = MI->getParent();
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
 
@@ -1002,7 +989,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
 
   if (SpillToSMEM) {
     if (RS->isRegUsed(AMDGPU::M0)) {
-      M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+      M0CopyReg = RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
         .addReg(AMDGPU::M0);
     }
@@ -1027,6 +1014,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
   // SubReg carries the "Kill" flag when SubReg == SuperReg.
   int64_t FrOffset = FrameInfo.getObjectOffset(Index);
 
+  Register TmpVGPR;
+
   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
     Register SubReg =
         NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
@@ -1081,7 +1070,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
 
       // Restore SGPR from a stack slot.
       // FIXME: We should use S_LOAD_DWORD here for VI.
-      Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      if (!TmpVGPR.isValid())
+        TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
       unsigned Align = FrameInfo.getObjectAlignment(Index);
 
       MachinePointerInfo PtrInfo
@@ -1091,7 +1081,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
         MachineMemOperand::MOLoad, EltSize,
         MinAlign(Align, EltSize * i));
 
-      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
+      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpVGPR)
         .addFrameIndex(Index)                 // vaddr
         .addReg(MFI->getScratchRSrcReg())     // srsrc
         .addReg(MFI->getStackPtrOffsetReg())  // soffset
@@ -1100,7 +1090,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
 
       auto MIB =
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
-        .addReg(TmpReg, RegState::Kill);
+        .addReg(TmpVGPR, RegState::Kill);
 
       if (NumSubRegs > 1)
         MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
@@ -1151,7 +1141,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                         int SPAdj, unsigned FIOperandNum,
                                         RegScavenger *RS) const {
   MachineFunction *MF = MI->getParent()->getParent();
-  MachineRegisterInfo &MRI = MF->getRegInfo();
   MachineBasicBlock *MBB = MI->getParent();
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
@@ -1265,13 +1254,16 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
         // In an entry function/kernel the offset is already the absolute
         // address relative to the frame register.
 
-        Register DiffReg =
-            MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+        Register TmpDiffReg =
+          RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
+
+        // If there's no free SGPR, in-place modify the FP
+        Register DiffReg = TmpDiffReg.isValid() ? TmpDiffReg : FrameReg;
 
         bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
         Register ResultReg = IsCopy ?
           MI->getOperand(0).getReg() :
-          MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+          RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
 
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
           .addReg(FrameReg)
@@ -1285,31 +1277,41 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
             .addReg(DiffReg);
         } else {
           Register ScaledReg =
-              MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+            RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
 
+          // FIXME: Assusmed VGPR use.
           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
             .addImm(Log2_32(ST.getWavefrontSize()))
             .addReg(DiffReg, RegState::Kill);
 
           // TODO: Fold if use instruction is another add of a constant.
           if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
-            TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
+
+            // FIXME: This can fail
+            TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)
               .addImm(Offset)
               .addReg(ScaledReg, RegState::Kill)
               .addImm(0); // clamp bit
           } else {
             Register ConstOffsetReg =
-                MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+              RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
 
             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
               .addImm(Offset);
-            TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
+            TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)
               .addReg(ConstOffsetReg, RegState::Kill)
               .addReg(ScaledReg, RegState::Kill)
               .addImm(0); // clamp bit
           }
         }
 
+        if (!TmpDiffReg.isValid()) {
+          // Restore the FP.
+          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), FrameReg)
+            .addReg(FrameReg)
+            .addReg(MFI->getScratchWaveOffsetReg());
+        }
+
         // Don't introduce an extra copy if we're just materializing in a mov.
         if (IsCopy)
           MI->eraseFromParent();
@@ -1347,7 +1349,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       int64_t Offset = FrameInfo.getObjectOffset(Index);
       FIOp.ChangeToImmediate(Offset);
       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
-        Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+        Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
           .addImm(Offset);
         FIOp.ChangeToRegister(TmpReg, false, false, true);
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index d49c343398ce14..07ec95ca36d361 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -7,7 +7,7 @@
 ; Materialize into a mov. Make sure there isn't an unnecessary copy.
 ; GCN-LABEL: {{^}}func_mov_fi_i32:
 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
+; GCN: s_sub_u32 [[SUB:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
 
 ; CI-NEXT: v_lshr_b32_e64 v0, [[SUB]], 6
 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, [[SUB]]
@@ -24,22 +24,20 @@ define void @func_mov_fi_i32() #0 {
 ; GCN-LABEL: {{^}}func_mov_fi_i32_offset:
 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 
-; CI: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
-; CI-NEXT: v_lshr_b32_e64 v0, [[SUB]], 6
-
-; CI: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
-; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[SUB]], 6
+; CI: s_sub_u32 [[SUB0:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
+; CI-NEXT: s_sub_u32 [[SUB1:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
+; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[SUB1]], 6
+; CI-NEXT: v_lshr_b32_e64 v0, [[SUB0]], 6
 ; CI-NEXT: v_add_i32_e64 v1, s{{\[[0-9]+:[0-9]+\]}}, 4, [[SCALED]]
 ; CI-NOT: v_mov
 ; CI: ds_write_b32 v0, v0
 ; CI-NEXT: ds_write_b32 v0, v1
 
-; GFX9: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
-; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, [[SUB]]
+; GFX9: s_sub_u32 [[SUB0:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
+; GFX9-NEXT: s_sub_u32 [[SUB1:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
+; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, [[SUB0]]
+; GFX9-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, [[SUB1]]
 ; GFX9-DAG: ds_write_b32 v0, v0
-
-; GFX9-DAG: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
-; GFX9-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, [[SUB]]
 ; GFX9-NEXT: v_add_u32_e32 v0, 4, [[SCALED]]
 ; GFX9-NEXT: ds_write_b32 v0, v0
 define void @func_mov_fi_i32_offset() #0 {
@@ -55,7 +53,7 @@ define void @func_mov_fi_i32_offset() #0 {
 
 ; GCN-LABEL: {{^}}func_add_constant_to_fi_i32:
 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
+; GCN: s_sub_u32 [[SUB:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
 
 ; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[SUB]], 6
 ; CI-NEXT: v_add_i32_e32 v0, vcc, 4, [[SCALED]]
@@ -77,7 +75,7 @@ define void @func_add_constant_to_fi_i32() #0 {
 ; into.
 
 ; GCN-LABEL: {{^}}func_other_fi_user_i32:
-; GCN: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
+; GCN: s_sub_u32 [[SUB:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
 
 ; CI-NEXT: v_lshr_b32_e64 v0, [[SUB]], 6
 
@@ -112,7 +110,7 @@ define void @func_load_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 {
 
 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr:
 ; GCN: s_waitcnt
-; GCN-NEXT: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s32, s33
+; GCN-NEXT: s_sub_u32 [[SUB_OFFSET:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
 
 ; CI-NEXT: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6
 ; CI-NEXT: v_or_b32_e32 v0, 4, [[SHIFT]]
@@ -177,11 +175,11 @@ ret:
 
 ; Added offset can't be used with VOP3 add
 ; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32:
-; GCN: s_sub_u32 [[SUB:s[0-9]+]], s32, s33
-; GCN-DAG: s_movk_i32 [[K:s[0-9]+]], 0x200
+; GCN: s_sub_u32 [[SUB:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
+; GCN-DAG: s_movk_i32 [[K:s[0-9]+|vcc_lo|vcc_hi]], 0x200
 
 ; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[SUB]], 6
-; CI: v_add_i32_e64 [[VZ:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[SCALED]]
+; CI: v_add_i32_e32 [[VZ:v[0-9]+]], vcc, [[K]], [[SCALED]]
 
 ; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, [[SUB]]
 ; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], [[K]], [[SCALED]]
@@ -258,7 +256,7 @@ bb5:
 ; GCN-LABEL: {{^}}alloca_ptr_nonentry_block:
 ; GCN: s_and_saveexec_b64
 ; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4
-; GCN: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s32, s33
+; GCN: s_sub_u32 [[SUB_OFFSET:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
 
 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6
 ; CI-NEXT: v_or_b32_e32 [[PTR:v[0-9]+]], 4, [[SHIFT]]
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
new file mode 100644
index 00000000000000..853e2346031e76
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
@@ -0,0 +1,42 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
+
+# Frame virtual SGPRs should not be used, as the register scavenger cannot usefully spill them anymore.
+# Spilling is also worse than increment and restore of a frame register. There should be no spills remaining.
+
+---
+name: scavenge_sgpr_pei
+tracksRegLiveness: true
+
+stack:
+  - { id: 0, type: default, size: 4, alignment: 4096 }
+
+machineFunctionInfo:
+  isEntryFunction: false
+  scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr34
+  frameOffsetReg:  $sgpr33
+  stackPtrOffsetReg:  $sgpr32
+
+body:             |
+  bb.0:
+    liveins: $vgpr1
+
+    ; CHECK-LABEL: name: scavenge_sgpr_pei
+    ; CHECK: liveins: $vgpr1
+    ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
+    ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 262080, implicit-def $scc
+    ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294705152, implicit-def $scc
+    ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 524288, implicit-def $scc
+    ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
+    ; CHECK: $sgpr33 = S_SUB_U32 $sgpr33, $sgpr34, implicit-def $scc
+    ; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+    ; CHECK: $sgpr33 = S_ADD_U32 $sgpr33, $sgpr34, implicit-def $scc
+    ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+    ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 524288, implicit-def $scc
+    ; CHECK: $sgpr33 = frame-setup COPY $sgpr27
+    ; CHECK: S_ENDPGM 0, implicit $vcc
+    S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
+    $vgpr0 = V_OR_B32_e32 %stack.0, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+    S_ENDPGM 0, implicit $vcc
+...
diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
index 641b1c90f8d6e6..3f6f4a27503c10 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
@@ -120,10 +120,10 @@ endif:                                            ; preds = %else, %if
 
 ; GCN: ; clobber m0
 
-; TOSMEM: s_mov_b32 s2, m0
+; TOSMEM: s_mov_b32 vcc_hi, m0
 ; TOSMEM: s_add_u32 m0, s3, 0x100
 ; TOSMEM-NEXT: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill
-; TOSMEM: s_mov_b32 m0, s2
+; TOSMEM: s_mov_b32 m0, vcc_hi
 
 ; TOSMEM: s_mov_b64 exec,
 ; TOSMEM: s_cbranch_execz
@@ -171,10 +171,10 @@ endif:
 
 ; TOSMEM: s_mov_b32 m0, -1
 
-; TOSMEM: s_mov_b32 s0, m0
+; TOSMEM: s_mov_b32 vcc_hi, m0
 ; TOSMEM: s_add_u32 m0, s3, 0x200
 ; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload
-; TOSMEM: s_mov_b32 m0, s0
+; TOSMEM: s_mov_b32 m0, vcc_hi
 ; TOSMEM: s_waitcnt lgkmcnt(0)
 
 ; TOSMEM: ds_write_b64

From 1aac182f3137ced28fb393780773ff0ae816d276 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Thu, 29 Aug 2019 01:26:09 +0000
Subject: [PATCH 4/9] [Attributor] Fix typo

llvm-svn: 370282
---
 llvm/lib/Transforms/IPO/Attributor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index be609a2b238d6e..84036813517b43 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -1417,7 +1417,7 @@ struct AANonNullCallSiteArgument final : AANonNullFloating {
   AANonNullCallSiteArgument(const IRPosition &IRP) : AANonNullFloating(IRP) {}
 
   /// See AbstractAttribute::trackStatistics()
-  void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(nonnul) }
+  void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(nonnull) }
 };
 
 /// NonNull attribute for a call site return position.

From 62a9c1da787e821e8bda02d23df7bad84577dd99 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Thu, 29 Aug 2019 01:26:58 +0000
Subject: [PATCH 5/9] [Attributor][Fix] Indicate change correctly

llvm-svn: 370283
---
 llvm/lib/Transforms/IPO/Attributor.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 84036813517b43..3681bd65225e11 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -1956,6 +1956,7 @@ ChangeStatus AAIsDeadImpl::updateImpl(Attributor &A) {
     // which will prevent us from querying isAssumedDead().
     indicatePessimisticFixpoint();
     assert(!isValidState() && "Expected an invalid state!");
+    Status = ChangeStatus::CHANGED;
   }
 
   return Status;

From a283125ef2b24d3c07d70b83866eea63362dca9c Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Thu, 29 Aug 2019 01:28:30 +0000
Subject: [PATCH 6/9] [Attributor][NFC] Add const to map key

llvm-svn: 370284
---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 4b5d266d4d80f6..d65ee9cb0327d1 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -643,8 +643,7 @@ struct Attributor {
   /// `getAAFor` to explicitly record true dependences through this method.
   void recordDependence(const AbstractAttribute &FromAA,
                         const AbstractAttribute &ToAA) {
-    QueryMap[const_cast<AbstractAttribute *>(&FromAA)].insert(
-        const_cast<AbstractAttribute *>(&ToAA));
+    QueryMap[&FromAA].insert(const_cast<AbstractAttribute *>(&ToAA));
   }
 
   /// Introduce a new abstract attribute into the fixpoint analysis.
@@ -776,7 +775,7 @@ struct Attributor {
   /// to the getAAFor<...>(...) method.
   ///{
   using QueryMapTy =
-      MapVector<AbstractAttribute *, SetVector<AbstractAttribute *>>;
+      MapVector<const AbstractAttribute *, SetVector<AbstractAttribute *>>;
   QueryMapTy QueryMap;
   ///}
 

From bf112139ac5f94b2362e57c120f2ca8356140340 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Thu, 29 Aug 2019 01:29:44 +0000
Subject: [PATCH 7/9] [Attributor] Improve messages in iteration verify mode

When we now verify the iteration count we will see the actual count
and the expected count before the assertion is triggered.

llvm-svn: 370285
---
 llvm/lib/Transforms/IPO/Attributor.cpp | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 3681bd65225e11..2d386ae036565d 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -2608,22 +2608,14 @@ ChangeStatus Attributor::run() {
     Worklist.clear();
     Worklist.insert(ChangedAAs.begin(), ChangedAAs.end());
 
-  } while (!Worklist.empty() && IterationCounter++ < MaxFixpointIterations);
-
-  size_t NumFinalAAs = AllAbstractAttributes.size();
-
-  if (VerifyMaxFixpointIterations && IterationCounter != MaxFixpointIterations) {
-    errs() << "\n[Attributor] Fixpoint iteration done after: "
-           << IterationCounter << "/" << MaxFixpointIterations
-           << " iterations\n";
-    llvm_unreachable("The fixpoint was not reached with exactly the number of "
-                     "specified iterations!");
-  }
+  } while (!Worklist.empty() && (IterationCounter++ < MaxFixpointIterations ||
+                                 VerifyMaxFixpointIterations));
 
   LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
                     << IterationCounter << "/" << MaxFixpointIterations
                     << " iterations\n");
 
+  size_t NumFinalAAs = AllAbstractAttributes.size();
 
   bool FinishedAtFixpoint = Worklist.empty();
 
@@ -2737,6 +2729,15 @@ ChangeStatus Attributor::run() {
     Fn->eraseFromParent();
   }
 
+  if (VerifyMaxFixpointIterations &&
+      IterationCounter != MaxFixpointIterations) {
+    errs() << "\n[Attributor] Fixpoint iteration done after: "
+           << IterationCounter << "/" << MaxFixpointIterations
+           << " iterations\n";
+    llvm_unreachable("The fixpoint was not reached with exactly the number of "
+                     "specified iterations!");
+  }
+
   return ManifestChange;
 }
 

From a633d29ba18b98511855f5a42cda3630306bdd13 Mon Sep 17 00:00:00 2001
From: Davide Italiano <davide@freebsd.org>
Date: Thu, 29 Aug 2019 01:39:26 +0000
Subject: [PATCH 8/9] [TSanRuntime] Upstream thread swift race detector.

Summary:
This is self-contained, and doesn't need anything in the
compiler to work. Mainly to reduce the diff between upstream
and downstream.

Patch by Kuba Mracek!

Reviewers: kubamracek

Subscribers: lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D66915

llvm-svn: 370286
---
 .../TSan/TSanRuntime.cpp                      | 38 ++++++++++++++++---
 .../InstrumentationRuntime/TSan/TSanRuntime.h |  3 +-
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/lldb/source/Plugins/InstrumentationRuntime/TSan/TSanRuntime.cpp b/lldb/source/Plugins/InstrumentationRuntime/TSan/TSanRuntime.cpp
index 89f2139db71b12..c9b46e34588a1c 100644
--- a/lldb/source/Plugins/InstrumentationRuntime/TSan/TSanRuntime.cpp
+++ b/lldb/source/Plugins/InstrumentationRuntime/TSan/TSanRuntime.cpp
@@ -88,6 +88,7 @@ extern "C"
     // TODO: dlsym won't work on Windows.
     void *dlsym(void* handle, const char* symbol);
     int (*ptr__tsan_get_report_loc_object_type)(void *report, unsigned long idx, const char **object_type);
+    int (*ptr__tsan_get_report_tag)(void *report, unsigned long *tag);
 }
 
 const int REPORT_TRACE_SIZE = 128;
@@ -97,6 +98,7 @@ struct data {
     void *report;
     const char *description;
     int report_count;
+    unsigned long tag;
     
     void *sleep_trace[REPORT_TRACE_SIZE];
     
@@ -163,10 +165,14 @@ const char *thread_sanitizer_retrieve_report_data_command = R"(
 data t = {0};
 
 ptr__tsan_get_report_loc_object_type = (typeof(ptr__tsan_get_report_loc_object_type))(void *)dlsym((void*)-2 /*RTLD_DEFAULT*/, "__tsan_get_report_loc_object_type");
+ptr__tsan_get_report_tag = (typeof(ptr__tsan_get_report_tag))(void *)dlsym((void*)-2 /*RTLD_DEFAULT*/, "__tsan_get_report_tag");
 
 t.report = __tsan_get_current_report();
 __tsan_get_report_data(t.report, &t.description, &t.report_count, &t.stack_count, &t.mop_count, &t.loc_count, &t.mutex_count, &t.thread_count, &t.unique_tid_count, t.sleep_trace, REPORT_TRACE_SIZE);
 
+if (ptr__tsan_get_report_tag)
+    ptr__tsan_get_report_tag(t.report, &t.tag);
+
 if (t.stack_count > REPORT_ARRAY_SIZE) t.stack_count = REPORT_ARRAY_SIZE;
 for (int i = 0; i < t.stack_count; i++) {
     t.stacks[i].idx = i;
@@ -347,6 +353,9 @@ ThreadSanitizerRuntime::RetrieveReportData(ExecutionContextRef exe_ctx_ref) {
                            ->GetValueAsUnsigned(0));
   dict->AddItem("sleep_trace", StructuredData::ObjectSP(CreateStackTrace(
                                    main_value, ".sleep_trace")));
+  dict->AddIntegerItem(
+      "tag",
+      main_value->GetValueForExpressionPath(".tag")->GetValueAsUnsigned(0));
 
   StructuredData::Array *stacks = ConvertToStructuredArray(
       main_value, ".stacks", ".stack_count",
@@ -485,8 +494,8 @@ ThreadSanitizerRuntime::RetrieveReportData(ExecutionContextRef exe_ctx_ref) {
   return StructuredData::ObjectSP(dict);
 }
 
-std::string
-ThreadSanitizerRuntime::FormatDescription(StructuredData::ObjectSP report) {
+std::string ThreadSanitizerRuntime::FormatDescription(
+    StructuredData::ObjectSP report, bool &is_swift_access_race) {
   std::string description = report->GetAsDictionary()
                                 ->GetValueForKey("issue_type")
                                 ->GetAsString()
@@ -521,8 +530,18 @@ ThreadSanitizerRuntime::FormatDescription(StructuredData::ObjectSP report) {
   } else if (description == "lock-order-inversion") {
     return "Lock order inversion (potential deadlock)";
   } else if (description == "external-race") {
+    auto tag = report->GetAsDictionary()
+                   ->GetValueForKey("tag")
+                   ->GetAsInteger()
+                   ->GetValue();
+    static const unsigned long kSwiftAccessRaceTag = 0x1;
+    if (tag == kSwiftAccessRaceTag) {
+      is_swift_access_race = true;
+      return "Swift access race";
+    }
     return "Race on a library object";
   } else if (description == "swift-access-race") {
+    is_swift_access_race = true;
     return "Swift access race";
   }
 
@@ -616,9 +635,14 @@ ThreadSanitizerRuntime::GenerateSummary(StructuredData::ObjectSP report) {
                             ->GetValueForKey("description")
                             ->GetAsString()
                             ->GetValue();
+  bool is_swift_access_race = report->GetAsDictionary()
+                                  ->GetValueForKey("is_swift_access_race")
+                                  ->GetAsBoolean()
+                                  ->GetValue();
+
   bool skip_one_frame =
-      report->GetObjectForDotSeparatedPath("issue_type")->GetStringValue() ==
-      "external-race";
+      (report->GetObjectForDotSeparatedPath("issue_type")->GetStringValue() ==
+      "external-race") && (!is_swift_access_race);
 
   addr_t pc = 0;
   if (report->GetAsDictionary()
@@ -810,8 +834,12 @@ bool ThreadSanitizerRuntime::NotifyBreakpointHit(
       instance->RetrieveReportData(context->exe_ctx_ref);
   std::string stop_reason_description;
   if (report) {
-    std::string issue_description = instance->FormatDescription(report);
+    bool is_swift_access_race = false;
+    std::string issue_description =
+        instance->FormatDescription(report, is_swift_access_race);
     report->GetAsDictionary()->AddStringItem("description", issue_description);
+    report->GetAsDictionary()->AddBooleanItem("is_swift_access_race",
+                                              is_swift_access_race);
     stop_reason_description = issue_description + " detected";
     report->GetAsDictionary()->AddStringItem("stop_description",
                                              stop_reason_description);
diff --git a/lldb/source/Plugins/InstrumentationRuntime/TSan/TSanRuntime.h b/lldb/source/Plugins/InstrumentationRuntime/TSan/TSanRuntime.h
index db8bb1db79960f..966edd34797b2c 100644
--- a/lldb/source/Plugins/InstrumentationRuntime/TSan/TSanRuntime.h
+++ b/lldb/source/Plugins/InstrumentationRuntime/TSan/TSanRuntime.h
@@ -61,7 +61,8 @@ class ThreadSanitizerRuntime : public lldb_private::InstrumentationRuntime {
 
   StructuredData::ObjectSP RetrieveReportData(ExecutionContextRef exe_ctx_ref);
 
-  std::string FormatDescription(StructuredData::ObjectSP report);
+  std::string FormatDescription(StructuredData::ObjectSP report,
+                                bool &is_swift_access_race);
 
   std::string GenerateSummary(StructuredData::ObjectSP report);
 

From 54b3aa91d0ad0d021d416c4b6366d1f8d230d2e0 Mon Sep 17 00:00:00 2001
From: Davide Italiano <davide@freebsd.org>
Date: Thu, 29 Aug 2019 01:45:10 +0000
Subject: [PATCH 9/9] [python] remove testsuite vestiges.

llvm-svn: 370287
---
 .../Python/lldbsuite/test/attic/dotest.pl     |  44 ------
 .../Python/lldbsuite/test/attic/tester.py     | 149 ------------------
 2 files changed, 193 deletions(-)
 delete mode 100644 lldb/packages/Python/lldbsuite/test/attic/dotest.pl
 delete mode 100644 lldb/packages/Python/lldbsuite/test/attic/tester.py

diff --git a/lldb/packages/Python/lldbsuite/test/attic/dotest.pl b/lldb/packages/Python/lldbsuite/test/attic/dotest.pl
deleted file mode 100644
index f093ed8f8f22ec..00000000000000
--- a/lldb/packages/Python/lldbsuite/test/attic/dotest.pl
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/perl -w
-
-#
-# Use this script to visit each python test case under the specified directory
-# and invoke unittest.main() on each test case.
-#
-
-use strict;
-use FindBin;
-use File::Find;
-use File::Basename;
-use Cwd;
-use Cwd 'abs_path';
-
-scalar(@ARGV) == 1 or die "Usage: dotest.pl testdir";
-
-my $scriptDir = $FindBin::Bin;
-my $baseDir = abs_path("$scriptDir/..");
-my $pluginDir = "$baseDir/test/plugins";
-my $testDir = $ARGV[0];
-
-my $dbgPath = "$baseDir/build/Debug/LLDB.framework/Resources/Python";
-my $relPath = "$baseDir/build/Release/LLDB.framework/Resources/Python";
-if (-d $dbgPath) {
-  $ENV{'PYTHONPATH'} = "$dbgPath:$scriptDir:$pluginDir";
-} elsif (-d $relPath) {
-  $ENV{'PYTHONPATH'} = "$relPath:$scriptDir:$pluginDir";
-}
-#print("ENV{PYTHONPATH}=$ENV{'PYTHONPATH'}\n");
-
-# Traverse the directory to find our python test cases.
-find(\&handleFind, $testDir);
-
-sub handleFind {
-  my $foundFile = $File::Find::name;
-  my $dir = getcwd;
-  #print("foundFile: $foundFile\n");
-  
-  # Test*.py is the naming pattern for our test cases.
-  if ($foundFile =~ /.*\/(Test.*\.py)$/) {
-    print("Running python $1 (cwd = $dir)...\n");
-    system("python $1");
-  }
-}
diff --git a/lldb/packages/Python/lldbsuite/test/attic/tester.py b/lldb/packages/Python/lldbsuite/test/attic/tester.py
deleted file mode 100644
index 2e783512369d48..00000000000000
--- a/lldb/packages/Python/lldbsuite/test/attic/tester.py
+++ /dev/null
@@ -1,149 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf8 -*-
-
-from __future__ import print_function
-
-import math
-import os.path
-import re
-import sys
-import time
-import unittest
-
-
-def setupSysPath():
-    testPath = sys.path[0]
-    rem = re.match("(^.*/)test$", testPath)
-    if not rem:
-        print("This script expects to reside in .../test.")
-        sys.exit(-1)
-    lldbBasePath = rem.group(1)
-    lldbDebugPythonPath = "build/Debug/LLDB.framework/Resources/Python"
-    lldbReleasePythonPath = "build/Release/LLDB.framework/Resources/Python"
-    lldbPythonPath = None
-    if os.path.isfile(lldbDebugPythonPath + "/lldb.py"):
-        lldbPythonPath = lldbDebugPythonPath
-    if os.path.isfile(lldbReleasePythonPath + "/lldb.py"):
-        lldbPythonPath = lldbReleasePythonPath
-    if not lldbPythonPath:
-        print(
-            "This script requires lldb.py to be in either " +
-            lldbDebugPythonPath,
-            end='')
-        print("or" + lldbReleasePythonPath)
-        sys.exit(-1)
-    sys.path.append(lldbPythonPath)
-
-
-def prettyTime(t):
-    if t == 0.0:
-        return "0s"
-    if t < 0.000001:
-        return ("%.3f" % (t * 1000000000.0)) + "ns"
-    if t < 0.001:
-        return ("%.3f" % (t * 1000000.0)) + "µs"
-    if t < 1:
-        return ("%.3f" % (t * 1000.0)) + "ms"
-    return str(t) + "s"
-
-
-class ExecutionTimes:
-
-    @classmethod
-    def executionTimes(cls):
-        if cls.m_executionTimes is None:
-            cls.m_executionTimes = ExecutionTimes()
-            for i in range(100):
-                cls.m_executionTimes.start()
-                cls.m_executionTimes.end("null")
-        return cls.m_executionTimes
-
-    def __init__(self):
-        self.m_times = dict()
-
-    def start(self):
-        self.m_start = time.time()
-
-    def end(self, component):
-        e = time.time()
-        if component not in self.m_times:
-            self.m_times[component] = list()
-        self.m_times[component].append(e - self.m_start)
-
-    def dumpStats(self):
-        for key in list(self.m_times.keys()):
-            if len(self.m_times[key]):
-                sampleMin = float('inf')
-                sampleMax = float('-inf')
-                sampleSum = 0.0
-                sampleCount = 0.0
-                for time in self.m_times[key]:
-                    if time > sampleMax:
-                        sampleMax = time
-                    if time < sampleMin:
-                        sampleMin = time
-                    sampleSum += time
-                    sampleCount += 1.0
-                sampleMean = sampleSum / sampleCount
-                sampleVariance = 0
-                for time in self.m_times[key]:
-                    sampleVariance += (time - sampleMean) ** 2
-                sampleVariance /= sampleCount
-                sampleStandardDeviation = math.sqrt(sampleVariance)
-                print(
-                    key +
-                    ": [" +
-                    prettyTime(sampleMin) +
-                    ", " +
-                    prettyTime(sampleMax) +
-                    "] ",
-                    end='')
-                print(
-                    "µ " +
-                    prettyTime(sampleMean) +
-                    ", σ " +
-                    prettyTime(sampleStandardDeviation))
-    m_executionTimes = None
-
-setupSysPath()
-
-import lldb
-
-
-class LLDBTestCase(unittest.TestCase):
-
-    def setUp(self):
-        debugger = lldb.SBDebugger.Create()
-        debugger.SetAsync(True)
-        self.m_commandInterpreter = debugger.GetCommandInterpreter()
-        if not self.m_commandInterpreter:
-            print("Couldn't get the command interpreter")
-            sys.exit(-1)
-
-    def runCommand(self, command, component):
-        res = lldb.SBCommandReturnObject()
-        ExecutionTimes.executionTimes().start()
-        self.m_commandInterpreter.HandleCommand(command, res, False)
-        ExecutionTimes.executionTimes().end(component)
-        if res.Succeeded():
-            return res.GetOutput()
-        else:
-            self.fail("Command " + command + " returned an error")
-            return None
-
-    def getCategories(self):
-        return []
-
-
-class SanityCheckTestCase(LLDBTestCase):
-
-    def runTest(self):
-        ret = self.runCommand("show arch", "show-arch")
-        # print(ret)
-
-    def getCategories(self):
-        return []
-
-suite = unittest.TestLoader().loadTestsFromTestCase(SanityCheckTestCase)
-unittest.TextTestRunner(verbosity=2).run(suite)
-ExecutionTimes.executionTimes().dumpStats()