[AMDGPU] Support tfe operand in image_atomic instructions #92469

jwanggit86 · 2024-05-16T23:06:29Z

Current, if an image_atomic instruction has the 'tfe' operand, the llvm-mc assembler in general would reject it. The only exception is when dmask is 0x1 and the instruction is not image_atomic_cmpswap (e.g., image_atomic_add v[5:6], v252, s[8:15] dmask:0x1 tfe). This patch fixes this problem and allows tfe to be specified in image_atomic instructions.

llvmbot · 2024-05-16T23:06:50Z

@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Jun Wang (jwanggit86)

Changes

Current, if an image_atomic instruction has the 'tfe' operand, the llvm-mc assembler in general would reject it. The only exception is when dmask is 0x1 and the instruction is not image_atomic_cmpswap (e.g., image_atomic_add v[5:6], v252, s[8:15] dmask:0x1 tfe). This patch fixes this problem and allows tfe to be specified in image_atomic instructions.

Full diff: https://github.com/llvm/llvm-project/pull/92469.diff

5 Files Affected:

(modified) llvm/lib/Target/AMDGPU/MIMGInstructions.td (+4)
(modified) llvm/test/MC/AMDGPU/gfx10_asm_mimg.s (+25)
(modified) llvm/test/MC/AMDGPU/gfx11_asm_mimg.s (+25)
(modified) llvm/test/MC/AMDGPU/mimg.s (+33)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt (+1-1)

diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 351263d079768..ebd7acdeef5a5 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1101,6 +1101,10 @@ multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
       defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP, renamed>;
       let VDataDwords = !if(isCmpSwap, 4, 2) in
       defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP, renamed>;
+      let VDataDwords = !if(isCmpSwap, 2, 2) in
+      defm _V3 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_96, 0, isFP, renamed>;
+      let VDataDwords = !if(isCmpSwap, 4, 4) in
+      defm _V4 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_160, 0, isFP, renamed>;
     }
   } // End IsAtomicRet = 1
 }
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_mimg.s b/llvm/test/MC/AMDGPU/gfx10_asm_mimg.s
index 7b137289aa817..96b8959038296 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_mimg.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_mimg.s
@@ -654,3 +654,28 @@ image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dma
 
 image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
 ; GFX10: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; encoding: [0x08,0x0f,0x88,0xf0,0x00,0x00,0x40,0x40]
+
+; Test dmask + tfe for image_atomic instructions
+image_atomic_add v0, v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; GFX10: image_atomic_add v0, v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x01,0x44,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
+; GFX10: image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x45,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D
+; GFX10: image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x03,0x44,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_add v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
+; GFX10: image_atomic_add v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x45,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_cmpswap v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D
+; GFX10: image_atomic_cmpswap v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x03,0x40,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_cmpswap v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
+; GFX10: image_atomic_cmpswap v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x41,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_cmpswap v[0:3], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10: image_atomic_cmpswap v[0:3], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x40,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_cmpswap v[0:4], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+; GFX10: image_atomic_cmpswap v[0:4], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x0f,0x41,0xf0,0x0a,0x00,0x04,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_mimg.s b/llvm/test/MC/AMDGPU/gfx11_asm_mimg.s
index 6d467dfa1d8e1..d873932b75cba 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_mimg.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_mimg.s
@@ -5603,3 +5603,28 @@ image_store_pck v1, v[2:3], s[96:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA unorm a1
 
 image_store_pck v255, v[254:255], ttmp[8:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA unorm glc slc dlc a16 lwe
 // GFX11: [0x98,0x74,0x21,0xf0,0xfe,0xff,0x5d,0x00]
+
+; Test dmask + tfe for image_atomic instructions
+image_atomic_add v0, v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D
+// GFX11: [0x04,0x01,0x30,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
+// GFX11: [0x04,0x01,0x30,0xf0,0x0a,0x00,0x24,0x00]
+
+image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D
+// GFX11: [0x04,0x03,0x30,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_add v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
+// GFX11: [0x04,0x03,0x30,0xf0,0x0a,0x00,0x24,0x00]
+
+image_atomic_cmpswap v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D
+// GFX11: [0x04,0x03,0x2c,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_cmpswap v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
+// GFX11: [0x04,0x03,0x2c,0xf0,0x0a,0x00,0x24,0x00]
+
+image_atomic_cmpswap v[0:3], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D
+// GFX11: [0x04,0x0f,0x2c,0xf0,0x0a,0x00,0x04,0x00]
+
+image_atomic_cmpswap v[0:4], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+// GFX11: [0x04,0x0f,0x2c,0xf0,0x0a,0x00,0x24,0x00]
diff --git a/llvm/test/MC/AMDGPU/mimg.s b/llvm/test/MC/AMDGPU/mimg.s
index 38927b40f3347..fd8fd5afbc487 100644
--- a/llvm/test/MC/AMDGPU/mimg.s
+++ b/llvm/test/MC/AMDGPU/mimg.s
@@ -439,6 +439,39 @@ image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf unorm glc
 // SICI:  image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf unorm glc ; encoding: [0x00,0x3f,0x40,0xf0,0xc0,0x04,0x07,0x00]
 // GFX89: image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf unorm glc ; encoding: [0x00,0x3f,0x44,0xf0,0xc0,0x04,0x07,0x00]
 
+; Test dmask + tfe for image_atomic instructions
+image_atomic_add v4, v10, s[8:15] dmask:0x1
+// SICI:  image_atomic_add v4, v10, s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x44,0xf0,0x0a,0x04,0x02,0x00]
+// GFX89: image_atomic_add v4, v10, s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x48,0xf0,0x0a,0x04,0x02,0x00]
+
+image_atomic_add v[4:5], v10, s[8:15] dmask:0x1 tfe
+// SICI:  image_atomic_add v[4:5], v10, s[8:15] dmask:0x1 tfe ; encoding: [0x00,0x01,0x45,0xf0,0x0a,0x04,0x02,0x00]
+// GFX89: image_atomic_add v[4:5], v10, s[8:15] dmask:0x1 tfe ; encoding: [0x00,0x01,0x49,0xf0,0x0a,0x04,0x02,0x00]
+
+image_atomic_add v[4:5], v10, s[8:15] dmask:0x3
+// SICI:  image_atomic_add v[4:5], v10, s[8:15] dmask:0x3 ; encoding: [0x00,0x03,0x44,0xf0,0x0a,0x04,0x02,0x00]
+// GFX89: image_atomic_add v[4:5], v10, s[8:15] dmask:0x3 ; encoding: [0x00,0x03,0x48,0xf0,0x0a,0x04,0x02,0x00]
+
+image_atomic_add v[4:6], v10, s[8:15] dmask:0x3 tfe
+// SICI:  image_atomic_add v[4:6], v10, s[8:15] dmask:0x3 tfe ; encoding: [0x00,0x03,0x45,0xf0,0x0a,0x04,0x02,0x00]
+// GFX89: image_atomic_add v[4:6], v10, s[8:15] dmask:0x3 tfe ; encoding: [0x00,0x03,0x49,0xf0,0x0a,0x04,0x02,0x00]
+
+image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x3
+// SICI:  image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x3 ; encoding: [0x00,0x03,0x40,0xf0,0xc0,0x04,0x07,0x00]
+// GFX89: image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x3 ; encoding: [0x00,0x03,0x44,0xf0,0xc0,0x04,0x07,0x00]
+
+image_atomic_cmpswap v[4:6], v[192:195], s[28:35] dmask:0x3 tfe
+// SICI:  image_atomic_cmpswap v[4:6], v[192:195], s[28:35] dmask:0x3 tfe ; encoding: [0x00,0x03,0x41,0xf0,0xc0,0x04,0x07,0x00]
+// GFX89: image_atomic_cmpswap v[4:6], v[192:195], s[28:35] dmask:0x3 tfe ; encoding: [0x00,0x03,0x45,0xf0,0xc0,0x04,0x07,0x00]
+
+image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf
+// SICI:  image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf ; encoding: [0x00,0x0f,0x40,0xf0,0xc0,0x04,0x07,0x00]
+// GFX89: image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf ; encoding: [0x00,0x0f,0x44,0xf0,0xc0,0x04,0x07,0x00]
+
+image_atomic_cmpswap v[4:8], v[192:195], s[28:35] dmask:0xf tfe
+// SICI:  image_atomic_cmpswap v[4:8], v[192:195], s[28:35] dmask:0xf tfe ; encoding: [0x00,0x0f,0x41,0xf0,0xc0,0x04,0x07,0x00]
+// GFX89: image_atomic_cmpswap v[4:8], v[192:195], s[28:35] dmask:0xf tfe ; encoding: [0x00,0x0f,0x45,0xf0,0xc0,0x04,0x07,0x00]
+
 // FIXME: This test is incorrect because r128 assumes a 128-bit SRSRC.
 image_atomic_add v10, v6, s[8:15] dmask:0x1 r128
 // SICI: image_atomic_add v10, v6, s[8:15] dmask:0x1 r128 ; encoding: [0x00,0x81,0x44,0xf0,0x06,0x0a,0x02,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
index 292af1850db86..0a5bafc55f4d4 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
@@ -195,7 +195,7 @@
 # VI: image_atomic_add v5, v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00]
 0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00
 
-# VI: image_atomic_add v5, v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00]
+# VI: image_atomic_add v[5:9], v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00]
 0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00
 
 # VI: image_atomic_cmpswap v[5:6], v1, s[8:15] unorm ; encoding: [0x00,0x10,0x44,0xf0,0x01,0x05,0x02,0x00]

b-sumner · 2024-05-17T01:32:55Z

I'm not opposed, but I doubt that some runtimes can properly handle TFE=1. Do we know, e.g. that it is valid for amdgcn-amd-amdhsa?

arsenm · 2024-05-17T07:15:02Z

I'm not opposed, but I doubt that some runtimes can properly handle TFE=1. Do we know, e.g. that it is valid for amdgcn-amd-amdhsa?

It's not the job of the assembler to know or care about whether it works, it just needs to emit the encoded instruction

arsenm · 2024-05-17T07:19:18Z

llvm/test/MC/AMDGPU/gfx10_asm_mimg.s

+
+image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
+; GFX10: image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x45,0xf0,0x0a,0x00,0x04,0x00]
+


Tests didn't check dmask 0x2? Also, should comprehensively test all the image opcodes, not just add and cmpswap

Tests didn't check dmask 0x2? Also, should comprehensively test all the image opcodes, not just add and cmpswap

For image_atomic instructions, code in AMDGPUAsmParser::validateMIMGAtomicDMask requires DMask to be 0x1, 0x3, or 0xf.

bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { ... // This is an incomplete check because image_atomic_cmpswap // may only use 0x3 and 0xf while other atomic operations // may use 0x1 and 0x3. However these limitations are // verified when we check that dmask matches dst size. return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; }

Should still test all the various other opcodes

Tests have been added for all other image_atomic opcodes.

b-sumner · 2024-05-17T14:32:56Z

It's not the job of the assembler to know or care about whether it works, it just needs to emit the encoded instruction

I fully agree. I'm just observing that this may be something else to look for when odd failures occur.

jwanggit86 · 2024-05-22T17:34:20Z

@b-sumner @kosarev @dstutt @piotrAMD Do you have any comments?

…2469) Current, if an image_atomic instruction has the 'tfe' operand, the llvm-mc assembler in general would reject it. The only exception is when dmask is 0x1 and the instruction is not image_atomic_cmpswap (e.g., image_atomic_add v[5:6], v252, s[8:15] dmask:0x1 tfe). This patch fixes this problem and allows tfe to be specified in image_atomic instructions. --------- Co-authored-by: Jun Wang <jun.wang7@amd.com>

jwanggit86 added the backend:AMDGPU label May 16, 2024

jwanggit86 requested review from arsenm and b-sumner May 16, 2024 23:06

llvmbot added the mc Machine (object) code label May 16, 2024

jwanggit86 mentioned this pull request May 16, 2024

[AMDGPU][MC] image_atomic instructions do not support tfe #48190

Open

arsenm reviewed May 17, 2024

View reviewed changes

jayfoad requested review from kosarev, dstutt and piotrAMD May 17, 2024 08:49

Add tests for image_atomic instructions besides add and cmpswap.

41301eb

arsenm approved these changes May 20, 2024

View reviewed changes

jwanggit86 merged commit 6e7b45c into llvm:main May 29, 2024
3 of 4 checks passed

jwanggit86 deleted the parsing_image_atomic_with_tfe branch May 29, 2024 22:56

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AMDGPU] Support tfe operand in image_atomic instructions #92469

[AMDGPU] Support tfe operand in image_atomic instructions #92469

jwanggit86 commented May 16, 2024

llvmbot commented May 16, 2024 •

edited

b-sumner commented May 17, 2024

arsenm commented May 17, 2024

arsenm May 17, 2024

jwanggit86 May 20, 2024

arsenm May 20, 2024

jwanggit86 May 20, 2024

b-sumner commented May 17, 2024

jwanggit86 commented May 22, 2024


		image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
		; GFX10: image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x45,0xf0,0x0a,0x00,0x04,0x00]

[AMDGPU] Support tfe operand in image_atomic instructions #92469

[AMDGPU] Support tfe operand in image_atomic instructions #92469

Conversation

jwanggit86 commented May 16, 2024

llvmbot commented May 16, 2024 • edited

b-sumner commented May 17, 2024

arsenm commented May 17, 2024

arsenm May 17, 2024

Choose a reason for hiding this comment

jwanggit86 May 20, 2024

Choose a reason for hiding this comment

arsenm May 20, 2024

Choose a reason for hiding this comment

jwanggit86 May 20, 2024

Choose a reason for hiding this comment

b-sumner commented May 17, 2024

jwanggit86 commented May 22, 2024

llvmbot commented May 16, 2024 •

edited