-
Notifications
You must be signed in to change notification settings - Fork 14.1k
[X86][APX] Exclusively emit setzucc to avoid false dependency #142092
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
1. Added pattern to emit SetZUCC instruction with APX NDD enabled to avoid false dependency with SetCC. SetCC is emitted with APX NDD disabled. 2. Reverted part of llvm#129506 (changing setzucc back to setcc + zext). Keeping the check of SetZUCC instruction will call rewriteSetCC for SetZUCC instruction and remove redundant test after SetZUCC in X86 Flags Copy Lowering pass. 3. Also added SetZUCC support in FixupSetCC pass to eliminate zext instruction after SetZUCC.
@llvm/pr-subscribers-backend-x86 Author: Feng Zou (fzou1) Changes
Patch is 278.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142092.diff 17 Files Affected:
diff --git a/llvm/lib/Target/X86/X86FixupSetCC.cpp b/llvm/lib/Target/X86/X86FixupSetCC.cpp
index 2de89947c4519..8ea5ed695c356 100644
--- a/llvm/lib/Target/X86/X86FixupSetCC.cpp
+++ b/llvm/lib/Target/X86/X86FixupSetCC.cpp
@@ -79,10 +79,11 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
if (MI.definesRegister(X86::EFLAGS, /*TRI=*/nullptr))
FlagsDefMI = &MI;
- // Find a setcc that is used by a zext.
+ // Find a setcc/setzucc (if ZU is enabled) that is used by a zext.
// This doesn't have to be the only use, the transformation is safe
// regardless.
- if (MI.getOpcode() != X86::SETCCr)
+ if (MI.getOpcode() != X86::SETCCr &&
+ (!ST->hasZU() || MI.getOpcode() != X86::SETZUCCr))
continue;
MachineInstr *ZExt = nullptr;
@@ -122,7 +123,8 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
// register.
Register ZeroReg = MRI->createVirtualRegister(RC);
if (ST->hasZU()) {
- MI.setDesc(TII->get(X86::SETZUCCr));
+ if (MI.getOpcode() != X86::SETZUCCr)
+ MI.setDesc(TII->get(X86::SETZUCCr));
BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF), ZeroReg);
} else {
diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index ab6e6d0687b71..90c975e7971c9 100644
--- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -746,8 +746,10 @@ Register X86FlagsCopyLoweringPass::promoteCondToReg(
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
const DebugLoc &TestLoc, X86::CondCode Cond) {
Register Reg = MRI->createVirtualRegister(PromoteRC);
- auto SetI = BuildMI(TestMBB, TestPos, TestLoc, TII->get(X86::SETCCr), Reg)
- .addImm(Cond);
+ auto SetI =
+ BuildMI(TestMBB, TestPos, TestLoc,
+ TII->get(Subtarget->hasZU() ? X86::SETZUCCr : X86::SETCCr), Reg)
+ .addImm(Cond);
(void)SetI;
LLVM_DEBUG(dbgs() << " save cond: "; SetI->dump());
++NumSetCCsInserted;
@@ -791,29 +793,6 @@ void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &MBB,
if (!CondReg)
CondReg = promoteCondToReg(MBB, Pos, Loc, Cond);
- if (X86::isSETZUCC(MI.getOpcode())) {
- // SETZUCC is generated for register only for now.
- assert(!MI.mayStore() && "Cannot handle memory variants");
- assert(MI.getOperand(0).isReg() &&
- "Cannot have a non-register defined operand to SETZUcc!");
- Register OldReg = MI.getOperand(0).getReg();
- // Drop Kill flags on the old register before replacing. CondReg may have
- // a longer live range.
- MRI->clearKillFlags(OldReg);
- for (auto &Use : MRI->use_instructions(OldReg)) {
- assert(Use.getOpcode() == X86::INSERT_SUBREG &&
- "SETZUCC should be only used by INSERT_SUBREG");
- Use.getOperand(2).setReg(CondReg);
- // Recover MOV32r0 before INSERT_SUBREG, which removed by SETZUCC.
- Register ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
- BuildMI(*Use.getParent(), &Use, Use.getDebugLoc(), TII->get(X86::MOV32r0),
- ZeroReg);
- Use.getOperand(1).setReg(ZeroReg);
- }
- MI.eraseFromParent();
- return;
- }
-
// Rewriting a register def is trivial: we just replace the register and
// remove the setcc.
if (!MI.mayStore()) {
diff --git a/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
index 7d5d7cf4a83ab..06bf70f18e30d 100644
--- a/llvm/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -137,11 +137,14 @@ let Predicates = [HasCMOV, HasCF] in {
}
// SetCC instructions.
-let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in {
+let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1, Predicates = [NoNDD] in {
def SETCCr : I<0x90, MRMXrCC, (outs GR8:$dst), (ins ccode:$cond),
"set${cond}\t$dst",
[(set GR8:$dst, (X86setcc timm:$cond, EFLAGS))]>,
TB, Sched<[WriteSETCC]>;
+}
+
+let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in {
def SETCCm : I<0x90, MRMXmCC, (outs), (ins i8mem:$dst, ccode:$cond),
"set${cond}\t$dst",
[(store (X86setcc timm:$cond, EFLAGS), addr:$dst)]>,
@@ -152,7 +155,8 @@ let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in {
let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1,
hasSideEffects = 0, Predicates = [In64BitMode], Predicates = [HasNDD] in {
def SETZUCCr : I<0x40, MRMXrCC, (outs GR8:$dst), (ins ccode:$cond),
- "setzu${cond}\t$dst", []>,
+ "setzu${cond}\t$dst",
+ [(set GR8:$dst, (X86setcc timm:$cond, EFLAGS))]>,
XD, ZU, NoCD8, Sched<[WriteSETCC]>;
def SETCCr_EVEX : I<0x40, MRMXrCC, (outs GR8:$dst), (ins ccode:$cond),
"set${cond}\t$dst", []>,
diff --git a/llvm/test/CodeGen/X86/apx/add.ll b/llvm/test/CodeGen/X86/apx/add.ll
index 86343811901a9..c8e1e055f8ca4 100644
--- a/llvm/test/CodeGen/X86/apx/add.ll
+++ b/llvm/test/CodeGen/X86/apx/add.ll
@@ -758,7 +758,7 @@ define i1 @add64ri_reloc(i16 %k) {
; CHECK-NEXT: addq %rax, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x01,0xc0]
; CHECK-NEXT: addq $val, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x05,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: val, kind: reloc_signed_4byte
-; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; CHECK-NEXT: setzune %al # encoding: [0x62,0xf4,0x7f,0x18,0x45,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: add64ri_reloc:
@@ -768,7 +768,7 @@ define i1 @add64ri_reloc(i16 %k) {
; NF-NEXT: addq %rax, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x01,0xc0]
; NF-NEXT: addq $val, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x05,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: val, kind: reloc_signed_4byte
-; NF-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NF-NEXT: setzune %al # encoding: [0x62,0xf4,0x7f,0x18,0x45,0xc0]
; NF-NEXT: retq # encoding: [0xc3]
%g = getelementptr inbounds i16, ptr @val, i16 %k
%cmp = icmp ne ptr %g, null
diff --git a/llvm/test/CodeGen/X86/apx/and.ll b/llvm/test/CodeGen/X86/apx/and.ll
index 3379ac9dec893..6de07a5b72587 100644
--- a/llvm/test/CodeGen/X86/apx/and.ll
+++ b/llvm/test/CodeGen/X86/apx/and.ll
@@ -435,7 +435,7 @@ define i1 @andflag8rr(i8 %a, i8 %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: notb %sil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xf6,0xd6]
; CHECK-NEXT: andb %al, %dil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x20,0xc7]
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
@@ -444,7 +444,7 @@ define i1 @andflag8rr(i8 %a, i8 %b) {
; NF: # %bb.0:
; NF-NEXT: notb %sil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xf6,0xd6]
; NF-NEXT: andb %al, %dil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x20,0xc7]
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NF-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
@@ -460,7 +460,7 @@ define i1 @andflag16rr(i16 %a, i16 %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: notw %si, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xf7,0xd6]
; CHECK-NEXT: andw %ax, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x21,0xc7]
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
@@ -469,7 +469,7 @@ define i1 @andflag16rr(i16 %a, i16 %b) {
; NF: # %bb.0:
; NF-NEXT: notw %si, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xf7,0xd6]
; NF-NEXT: andw %ax, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x21,0xc7]
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NF-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
@@ -484,7 +484,7 @@ define i1 @andflag32rr(i32 %a, i32 %b) {
; CHECK-LABEL: andflag32rr:
; CHECK: # %bb.0:
; CHECK-NEXT: andl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x21,0xfe]
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
@@ -492,7 +492,7 @@ define i1 @andflag32rr(i32 %a, i32 %b) {
; NF-LABEL: andflag32rr:
; NF: # %bb.0:
; NF-NEXT: andl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x21,0xfe]
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NF-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
@@ -506,7 +506,7 @@ define i1 @andflag64rr(i64 %a, i64 %b) {
; CHECK-LABEL: andflag64rr:
; CHECK: # %bb.0:
; CHECK-NEXT: andq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x21,0xfe]
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
@@ -514,7 +514,7 @@ define i1 @andflag64rr(i64 %a, i64 %b) {
; NF-LABEL: andflag64rr:
; NF: # %bb.0:
; NF-NEXT: andq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x21,0xfe]
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NF-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
@@ -529,7 +529,7 @@ define i1 @andflag8rm(ptr %ptr, i8 %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: notb %sil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xf6,0xd6]
; CHECK-NEXT: andb (%rdi), %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x22,0x07]
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
@@ -538,7 +538,7 @@ define i1 @andflag8rm(ptr %ptr, i8 %b) {
; NF: # %bb.0:
; NF-NEXT: notb %sil, %al # encoding: [0x62,0xf4,0x7c,0x18,0xf6,0xd6]
; NF-NEXT: andb (%rdi), %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x22,0x07]
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NF-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
@@ -555,7 +555,7 @@ define i1 @andflag16rm(ptr %ptr, i16 %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: notw %si, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xf7,0xd6]
; CHECK-NEXT: andw (%rdi), %ax, %cx # encoding: [0x62,0xf4,0x75,0x18,0x23,0x07]
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
@@ -564,7 +564,7 @@ define i1 @andflag16rm(ptr %ptr, i16 %b) {
; NF: # %bb.0:
; NF-NEXT: notw %si, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xf7,0xd6]
; NF-NEXT: andw (%rdi), %ax, %cx # encoding: [0x62,0xf4,0x75,0x18,0x23,0x07]
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NF-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
@@ -580,7 +580,7 @@ define i1 @andflag32rm(ptr %ptr, i32 %b) {
; CHECK-LABEL: andflag32rm:
; CHECK: # %bb.0:
; CHECK-NEXT: andl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x23,0x37]
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
@@ -588,7 +588,7 @@ define i1 @andflag32rm(ptr %ptr, i32 %b) {
; NF-LABEL: andflag32rm:
; NF: # %bb.0:
; NF-NEXT: andl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x23,0x37]
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NF-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
@@ -603,7 +603,7 @@ define i1 @andflag64rm(ptr %ptr, i64 %b) {
; CHECK-LABEL: andflag64rm:
; CHECK: # %bb.0:
; CHECK-NEXT: andq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x23,0x37]
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
@@ -611,7 +611,7 @@ define i1 @andflag64rm(ptr %ptr, i64 %b) {
; NF-LABEL: andflag64rm:
; NF: # %bb.0:
; NF-NEXT: andq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x23,0x37]
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NF-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
@@ -626,7 +626,7 @@ define i1 @andflag8ri(i8 %a) {
; CHECK-LABEL: andflag8ri:
; CHECK: # %bb.0:
; CHECK-NEXT: andb $-124, %dil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xe7,0x84]
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
@@ -634,7 +634,7 @@ define i1 @andflag8ri(i8 %a) {
; NF-LABEL: andflag8ri:
; NF: # %bb.0:
; NF-NEXT: andb $-124, %dil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xe7,0x84]
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NF-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
@@ -650,7 +650,7 @@ define i1 @andflag16ri(i16 %a) {
; CHECK: # %bb.0:
; CHECK-NEXT: andw $-1235, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x81,0xe7,0x2d,0xfb]
; CHECK-NEXT: # imm = 0xFB2D
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
@@ -659,7 +659,7 @@ define i1 @andflag16ri(i16 %a) {
; NF: # %bb.0:
; NF-NEXT: andw $-1235, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x81,0xe7,0x2d,0xfb]
; NF-NEXT: # imm = 0xFB2D
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NF-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
@@ -675,7 +675,7 @@ define i1 @andflag32ri(i32 %a) {
; CHECK: # %bb.0:
; CHECK-NEXT: andl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xe7,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
@@ -684,7 +684,7 @@ define i1 @andflag32ri(i32 %a) {
; NF: # %bb.0:
; NF-NEXT: andl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xe7,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NF-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
@@ -699,7 +699,7 @@ define i1 @andflag64ri(i64 %a) {
; CHECK: # %bb.0:
; CHECK-NEXT: andq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xe7,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
@@ -708,7 +708,7 @@ define i1 @andflag64ri(i64 %a) {
; NF: # %bb.0:
; NF-NEXT: andq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xe7,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NF-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
@@ -722,7 +722,7 @@ define i1 @andflag16ri8(i16 %a) {
; CHECK-LABEL: andflag16ri8:
; CHECK: # %bb.0:
; CHECK-NEXT: andw $-124, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xe7,0x84]
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
@@ -730,7 +730,7 @@ define i1 @andflag16ri8(i16 %a) {
; NF-LABEL: andflag16ri8:
; NF: # %bb.0:
; NF-NEXT: andw $-124, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xe7,0x84]
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NF-NEXT: setzue %al # encoding: [0x62,0xf4,0x7f,0x18,0x44,0xc0]
; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
@@ -745,7 +745,7 @@ define i1 @andflag32ri8(i32 %a) {
; CHECK-LABEL: andflag32ri8:
; CHECK: # %bb.0:
; CHECK-NEXT: andl $123, %edi # EVEX TO LEGAC...
[truncated]
|
if (MI.getOpcode() != X86::SETZUCCr) | ||
MI.setDesc(TII->get(X86::SETZUCCr)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't we always generate SETZUCCr
now?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't we always generate
SETZUCCr
now?
Yes. We do always emit SETZUCCr
when NDD feature is specified. This is to transform SETCCr to SETZUCCr when it came from IR in LIT test, like apx/setzucc.ll.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand why NDD matters here. apx/setzucc.ll doesn't enable NDD either.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to check ZU flag for SETZUCCr instruction in td and added assertion here to ensure MI is SETZUCCr instruction.
// This doesn't have to be the only use, the transformation is safe | ||
// regardless. | ||
if (MI.getOpcode() != X86::SETCCr) | ||
if (MI.getOpcode() != X86::SETCCr && | ||
(!ST->hasZU() || MI.getOpcode() != X86::SETZUCCr)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
hasZU
is not needed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated.
llvm/lib/Target/X86/X86FastISel.cpp
Outdated
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr), | ||
FlagReg2).addImm(SETFOpc[1]); | ||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, | ||
TII.get(Subtarget->hasZU() ? X86::SETZUCCr : X86::SETCCr), FlagReg1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#define GET_SETCC (Subtarget->hasZU() ? X86::SETZUCCr : X86::SETCCr)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated.
@@ -137,11 +137,14 @@ let Predicates = [HasCMOV, HasCF] in { | |||
} | |||
|
|||
// SetCC instructions. | |||
let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in { | |||
let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1, Predicates = [NoNDD] in { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NoNDD
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NoNDD
?
Yes. See the definition in X86InstrPredicates.td as below:
def HasNDD : Predicate<"Subtarget->hasNDD()">;
def NoNDD : Predicate<"!Subtarget->hasNDD()">;
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean why not checking HasZU
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated. Thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Uh oh!
There was an error while loading. Please reload this page.