[AMDGPU] Add all type for bitcast on VReg_512 #131775

Shoreshen · 2025-03-18T10:12:48Z

Add all types pattern for bitcast on VReg_512

llvmbot · 2025-03-18T10:13:19Z

@llvm/pr-subscribers-backend-amdgpu

Author: None (Shoreshen)

Changes

Add all types pattern for bitcast on VReg_512

Full diff: https://github.com/llvm/llvm-project/pull/131775.diff

2 Files Affected:

(modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+8-37)
(modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll (+127)

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index de77401eb0137..2eee87068a3b8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1841,58 +1841,29 @@ def : BitConvert <v12i32, v12f32, VReg_384>;
 def : BitConvert <v12f32, v12i32, VReg_384>;
 
 // 512-bit bitcast
-def : BitConvert <v32f16, v32i16, VReg_512>;
-def : BitConvert <v32i16, v32f16, VReg_512>;
-def : BitConvert <v32f16, v16i32, VReg_512>;
-def : BitConvert <v32f16, v16f32, VReg_512>;
-def : BitConvert <v16f32, v32f16, VReg_512>;
-def : BitConvert <v16i32, v32f16, VReg_512>;
-def : BitConvert <v32i16, v16i32, VReg_512>;
-def : BitConvert <v32i16, v16f32, VReg_512>;
-def : BitConvert <v16f32, v32i16, VReg_512>;
-def : BitConvert <v16i32, v32i16, VReg_512>;
-def : BitConvert <v16i32, v16f32, VReg_512>;
-def : BitConvert <v16f32, v16i32, VReg_512>;
-def : BitConvert <v8i64,  v8f64,  VReg_512>;
-def : BitConvert <v8f64,  v8i64,  VReg_512>;
-def : BitConvert <v8i64,  v16i32, VReg_512>;
-def : BitConvert <v8f64,  v16i32, VReg_512>;
-def : BitConvert <v16i32, v8i64,  VReg_512>;
-def : BitConvert <v16i32, v8f64,  VReg_512>;
-def : BitConvert <v8i64,  v16f32, VReg_512>;
-def : BitConvert <v8f64,  v16f32, VReg_512>;
-def : BitConvert <v16f32, v8i64,  VReg_512>;
-def : BitConvert <v16f32, v8f64,  VReg_512>;
-
-
-
-def : BitConvert <v32bf16, v32i16, VReg_512>;
-def : BitConvert <v32i16, v32bf16, VReg_512>;
+foreach vt = VReg_512.RegTypes in {
+  foreach st = VReg_512.RegTypes in {
+    if !not(!eq (vt, st)) then {
+        def : BitConvert <vt, st, VReg_512>;
+    }
+  }
+}
+
 def : BitConvert <v32bf16, v32i16, SReg_512>;
 def : BitConvert <v32i16, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v32f16, VReg_512>;
-def : BitConvert <v32f16, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v32f16, SReg_512>;
 def : BitConvert <v32f16, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v16i32, VReg_512>;
-def : BitConvert <v16i32, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v16i32, SReg_512>;
 def : BitConvert <v16i32, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v16f32, VReg_512>;
-def : BitConvert <v16f32, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v16f32, SReg_512>;
 def : BitConvert <v16f32, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v8f64, VReg_512>;
-def : BitConvert <v8f64, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v8f64, SReg_512>;
 def : BitConvert <v8f64, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v8i64, VReg_512>;
-def : BitConvert <v8i64, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v8i64, SReg_512>;
 def : BitConvert <v8i64, v32bf16, SReg_512>;
 
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
index 5065f57c67dfd..b36ade582c878 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
@@ -1946,6 +1946,133 @@ end:
   ret void
 }
 
+; CHECK-LABEL: {{^}}v_bitcast_v8i64_to_v32f16:
+define <32 x half> @v_bitcast_v8i64_to_v32f16(<8 x i64> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = add <8 x i64> %a, splat (i64 3)
+  %a2 = bitcast <8 x i64> %a1 to <32 x half>
+  br label %end
+cmp.false:
+  %a3 = bitcast <8 x i64> %a to <32 x half>
+  br label %end
+end:
+  %phi = phi <32 x half> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <32 x half> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v8i64_to_v32i16:
+define <32 x i16> @v_bitcast_v8i64_to_v32i16(<8 x i64> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = add <8 x i64> %a, splat (i64 3)
+  %a2 = bitcast <8 x i64> %a1 to <32 x i16>
+  br label %end
+cmp.false:
+  %a3 = bitcast <8 x i64> %a to <32 x i16>
+  br label %end
+end:
+  %phi = phi <32 x i16> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <32 x i16> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v8f64_to_v32i16:
+define <32 x i16> @v_bitcast_v8f64_to_v32i16(<8 x double> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = fadd <8 x double> %a, splat (double 1.000000e+00)
+  %a2 = bitcast <8 x double> %a1 to <32 x i16>
+  br label %end
+cmp.false:
+  %a3 = bitcast <8 x double> %a to <32 x i16>
+  br label %end
+end:
+  %phi = phi <32 x i16> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <32 x i16> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v8f64_to_v32f16:
+define <32 x half> @v_bitcast_v8f64_to_v32f16(<8 x double> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = fadd <8 x double> %a, splat (double 1.000000e+00)
+  %a2 = bitcast <8 x double> %a1 to <32 x half>
+  br label %end
+cmp.false:
+  %a3 = bitcast <8 x double> %a to <32 x half>
+  br label %end
+end:
+  %phi = phi <32 x half> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <32 x half> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32f16_to_v8i64:
+define <8 x i64> @v_bitcast_v32f16_to_v8i64(<32 x half> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = fadd <32 x half> %a, splat (half 0xH0200)
+  %a2 = bitcast <32 x half> %a1 to <8 x i64>
+  br label %end
+cmp.false:
+  %a3 = bitcast <32 x half> %a to <8 x i64>
+  br label %end
+end:
+  %phi = phi <8 x i64> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <8 x i64> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32f16_to_v8f64:
+define <8 x double> @v_bitcast_v32f16_to_v8f64(<32 x half> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = fadd <32 x half> %a, splat (half 0xH0200)
+  %a2 = bitcast <32 x half> %a1 to <8 x double>
+  br label %end
+cmp.false:
+  %a3 = bitcast <32 x half> %a to <8 x double>
+  br label %end
+end:
+  %phi = phi <8 x double> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <8 x double> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32i16_to_v8i64:
+define <8 x i64> @v_bitcast_v32i16_to_v8i64(<32 x i16> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = add <32 x i16> %a, splat (i16 3)
+  %a2 = bitcast <32 x i16> %a1 to <8 x i64>
+  br label %end
+cmp.false:
+  %a3 = bitcast <32 x i16> %a to <8 x i64>
+  br label %end
+end:
+  %phi = phi <8 x i64> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <8 x i64> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32i16_to_v8f64:
+define <8 x double> @v_bitcast_v32i16_to_v8f64(<32 x i16> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = add <32 x i16> %a, splat (i16 3)
+  %a2 = bitcast <32 x i16> %a1 to <8 x double>
+  br label %end
+cmp.false:
+  %a3 = bitcast <32 x i16> %a to <8 x double>
+  br label %end
+end:
+  %phi = phi <8 x double> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <8 x double> %phi
+}

llvm/lib/Target/AMDGPU/SIInstructions.td

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll

Shoreshen · 2025-03-21T01:25:04Z

Hi @arsenm @shiltian , just ask if there is any fix up I need for this PR. Thanks~

Add all type for bitcast

96502df

llvmbot added the backend:AMDGPU label Mar 18, 2025

arsenm reviewed Mar 18, 2025

View reviewed changes

llvm/lib/Target/AMDGPU/SIInstructions.td Outdated Show resolved Hide resolved

Shoreshen added 3 commits March 18, 2025 20:57

fix comment

8708548

fix lit

5046309

fix comment

c31f255

shiltian reviewed Mar 18, 2025

View reviewed changes

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll Outdated Show resolved Hide resolved

Shoreshen requested review from arsenm and shiltian March 19, 2025 02:14

Merge branch 'main' into Complete-VREG512-bitconvert-patterns

d021cbb

arsenm reviewed Mar 19, 2025

View reviewed changes

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll Outdated Show resolved Hide resolved

Shoreshen added 2 commits March 20, 2025 10:06

Merge branch 'main' into Complete-VREG512-bitconvert-patterns

a864e0e

add other bitconvert type for vreg512

aa1895b

Shoreshen requested a review from arsenm March 20, 2025 02:53

Merge branch 'main' into Complete-VREG512-bitconvert-patterns

e0b8b69

shiltian approved these changes Mar 21, 2025

View reviewed changes

Merge branch 'main' into Complete-VREG512-bitconvert-patterns

c4b3161

Shoreshen merged commit 054e0b4 into llvm:main Mar 24, 2025
11 checks passed

Shoreshen mentioned this pull request May 14, 2025

AMDGPU: Add sgpr bit convert tests #136112

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Add all type for bitcast on VReg_512 #131775

[AMDGPU] Add all type for bitcast on VReg_512 #131775

Uh oh!

Shoreshen commented Mar 18, 2025

Uh oh!

llvmbot commented Mar 18, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Shoreshen commented Mar 21, 2025

Uh oh!

Uh oh!

Uh oh!

[AMDGPU] Add all type for bitcast on VReg_512 #131775

[AMDGPU] Add all type for bitcast on VReg_512 #131775

Uh oh!

Conversation

Shoreshen commented Mar 18, 2025

Uh oh!

llvmbot commented Mar 18, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Shoreshen commented Mar 21, 2025

Uh oh!

Uh oh!

Uh oh!