Skip to content

Commit d45031c

Browse files
frederik-harsenm
andauthored
[AMDGPU] si-peephole-sdwa: Disable V_CNDMASK_B32 conversion with sext (#140760)
The sext modifier on an operand of V_CNDMASK_B32_sdwa gets erroneously turned into a neg modifier in the assembly output. As a workaround, to avoid miscompilation, this patch disables the conversion of V_CNDMASK_B32 to the SDWA form if any operand uses an sext modifier. Fixes #138766. --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
1 parent 03f4fe1 commit d45031c

File tree

3 files changed

+74
-0
lines changed

3 files changed

+74
-0
lines changed

llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,21 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
430430
case AMDGPU::V_CVT_PK_F32_BF8_sdwa:
431431
// Does not support input modifiers: noabs, noneg, nosext.
432432
return false;
433+
case AMDGPU::V_CNDMASK_B32_sdwa:
434+
// SISrcMods uses the same bitmask for SEXT and NEG modifiers and
435+
// hence the compiler can only support one type of modifier for
436+
// each SDWA instruction. For V_CNDMASK_B32_sdwa, this is NEG
437+
// since its operands get printed using
438+
// AMDGPUInstPrinter::printOperandAndFPInputMods which produces
439+
// the output intended for NEG if SEXT is set.
440+
//
441+
// The ISA does actually support both modifiers on most SDWA
442+
// instructions.
443+
//
444+
// FIXME Accept SEXT here after fixing this issue.
445+
if (Sext)
446+
return false;
447+
break;
433448
}
434449

435450
// Find operand in instruction that matches source operand and replace it with
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck %s
3+
4+
; FIXME The sext modifier is turned into a neg modifier in the asm output
5+
6+
define i32 @test_select_on_sext_sdwa(i8 %x, i32 %y, i1 %cond) {
7+
; CHECK-LABEL: test_select_on_sext_sdwa:
8+
; CHECK: ; %bb.0:
9+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10+
; CHECK-NEXT: v_and_b32_e32 v2, 1, v2
11+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
12+
; CHECK-NEXT: v_bfe_i32 v0, v0, 0, 8
13+
; CHECK-NEXT: s_nop 0
14+
; CHECK-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
15+
; CHECK-NEXT: v_or_b32_e32 v0, v0, v1
16+
; CHECK-NEXT: s_setpc_b64 s[30:31]
17+
%sext = sext i8 %x to i32
18+
%select = select i1 %cond, i32 %sext, i32 0
19+
%or = or i32 %select, %y
20+
ret i32 %or
21+
}

llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-wave64.mir

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,3 +231,41 @@ body: |
231231
$vgpr0 = COPY %3
232232
SI_RETURN implicit $vgpr0
233233
...
234+
235+
# SDWA conversion of V_CNDMASK_B32 with V_BFE_I32 operand had to be
236+
# disabled.
237+
# FIXME sext modifier gets erroneously printed as neg modifier.
238+
239+
...
240+
---
241+
name: issue138766_cndmask_with_sext
242+
tracksRegLiveness: true
243+
body: |
244+
bb.0:
245+
liveins: $vgpr0, $vgpr1, $vgpr2
246+
247+
; CHECK-LABEL: name: issue138766_cndmask_with_sext
248+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
249+
; CHECK-NEXT: {{ $}}
250+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
251+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
252+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
253+
; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[COPY]], implicit $exec
254+
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec
255+
; CHECK-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY2]], 0, 8, implicit $exec
256+
; CHECK-NEXT: $vcc = COPY killed [[V_CMP_EQ_U32_e64_]]
257+
; CHECK-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 0, killed [[V_BFE_I32_e64_]], implicit $vcc, implicit $exec
258+
; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed [[V_CNDMASK_B32_e32_]], [[COPY1]], implicit $exec
259+
; CHECK-NEXT: $vgpr0 = COPY [[V_OR_B32_e64_]]
260+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
261+
%10:vgpr_32 = COPY $vgpr2
262+
%9:vgpr_32 = COPY $vgpr1
263+
%8:vgpr_32 = COPY $vgpr0
264+
%11:vgpr_32 = V_AND_B32_e64 1, %10, implicit $exec
265+
%12:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %11, 1, implicit $exec
266+
%14:vgpr_32 = V_BFE_I32_e64 %8, 0, 8, implicit $exec
267+
%16:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, killed %14, killed %12, implicit $exec
268+
%18:vgpr_32 = V_OR_B32_e64 killed %16, %9, implicit $exec
269+
$vgpr0 = COPY %18
270+
SI_RETURN implicit $vgpr0
271+
...

0 commit comments

Comments
 (0)