Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Add all type for bitcast on VReg_512 #131775

Merged
merged 9 commits into from
Mar 24, 2025

Conversation

Shoreshen
Copy link
Contributor

Add all types pattern for bitcast on VReg_512

@llvmbot
Copy link
Member

llvmbot commented Mar 18, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: None (Shoreshen)

Changes

Add all types pattern for bitcast on VReg_512


Full diff: https://github.com/llvm/llvm-project/pull/131775.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+8-37)
  • (modified) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll (+127)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index de77401eb0137..2eee87068a3b8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1841,58 +1841,29 @@ def : BitConvert <v12i32, v12f32, VReg_384>;
 def : BitConvert <v12f32, v12i32, VReg_384>;
 
 // 512-bit bitcast
-def : BitConvert <v32f16, v32i16, VReg_512>;
-def : BitConvert <v32i16, v32f16, VReg_512>;
-def : BitConvert <v32f16, v16i32, VReg_512>;
-def : BitConvert <v32f16, v16f32, VReg_512>;
-def : BitConvert <v16f32, v32f16, VReg_512>;
-def : BitConvert <v16i32, v32f16, VReg_512>;
-def : BitConvert <v32i16, v16i32, VReg_512>;
-def : BitConvert <v32i16, v16f32, VReg_512>;
-def : BitConvert <v16f32, v32i16, VReg_512>;
-def : BitConvert <v16i32, v32i16, VReg_512>;
-def : BitConvert <v16i32, v16f32, VReg_512>;
-def : BitConvert <v16f32, v16i32, VReg_512>;
-def : BitConvert <v8i64,  v8f64,  VReg_512>;
-def : BitConvert <v8f64,  v8i64,  VReg_512>;
-def : BitConvert <v8i64,  v16i32, VReg_512>;
-def : BitConvert <v8f64,  v16i32, VReg_512>;
-def : BitConvert <v16i32, v8i64,  VReg_512>;
-def : BitConvert <v16i32, v8f64,  VReg_512>;
-def : BitConvert <v8i64,  v16f32, VReg_512>;
-def : BitConvert <v8f64,  v16f32, VReg_512>;
-def : BitConvert <v16f32, v8i64,  VReg_512>;
-def : BitConvert <v16f32, v8f64,  VReg_512>;
-
-
-
-def : BitConvert <v32bf16, v32i16, VReg_512>;
-def : BitConvert <v32i16, v32bf16, VReg_512>;
+foreach vt = VReg_512.RegTypes in {
+  foreach st = VReg_512.RegTypes in {
+    if !not(!eq (vt, st)) then {
+        def : BitConvert <vt, st, VReg_512>;
+    }
+  }
+}
+
 def : BitConvert <v32bf16, v32i16, SReg_512>;
 def : BitConvert <v32i16, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v32f16, VReg_512>;
-def : BitConvert <v32f16, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v32f16, SReg_512>;
 def : BitConvert <v32f16, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v16i32, VReg_512>;
-def : BitConvert <v16i32, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v16i32, SReg_512>;
 def : BitConvert <v16i32, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v16f32, VReg_512>;
-def : BitConvert <v16f32, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v16f32, SReg_512>;
 def : BitConvert <v16f32, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v8f64, VReg_512>;
-def : BitConvert <v8f64, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v8f64, SReg_512>;
 def : BitConvert <v8f64, v32bf16, SReg_512>;
 
-def : BitConvert <v32bf16, v8i64, VReg_512>;
-def : BitConvert <v8i64, v32bf16, VReg_512>;
 def : BitConvert <v32bf16, v8i64, SReg_512>;
 def : BitConvert <v8i64, v32bf16, SReg_512>;
 
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
index 5065f57c67dfd..b36ade582c878 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
@@ -1946,6 +1946,133 @@ end:
   ret void
 }
 
+; CHECK-LABEL: {{^}}v_bitcast_v8i64_to_v32f16:
+define <32 x half> @v_bitcast_v8i64_to_v32f16(<8 x i64> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = add <8 x i64> %a, splat (i64 3)
+  %a2 = bitcast <8 x i64> %a1 to <32 x half>
+  br label %end
+cmp.false:
+  %a3 = bitcast <8 x i64> %a to <32 x half>
+  br label %end
+end:
+  %phi = phi <32 x half> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <32 x half> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v8i64_to_v32i16:
+define <32 x i16> @v_bitcast_v8i64_to_v32i16(<8 x i64> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = add <8 x i64> %a, splat (i64 3)
+  %a2 = bitcast <8 x i64> %a1 to <32 x i16>
+  br label %end
+cmp.false:
+  %a3 = bitcast <8 x i64> %a to <32 x i16>
+  br label %end
+end:
+  %phi = phi <32 x i16> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <32 x i16> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v8f64_to_v32i16:
+define <32 x i16> @v_bitcast_v8f64_to_v32i16(<8 x double> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = fadd <8 x double> %a, splat (double 1.000000e+00)
+  %a2 = bitcast <8 x double> %a1 to <32 x i16>
+  br label %end
+cmp.false:
+  %a3 = bitcast <8 x double> %a to <32 x i16>
+  br label %end
+end:
+  %phi = phi <32 x i16> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <32 x i16> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v8f64_to_v32f16:
+define <32 x half> @v_bitcast_v8f64_to_v32f16(<8 x double> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = fadd <8 x double> %a, splat (double 1.000000e+00)
+  %a2 = bitcast <8 x double> %a1 to <32 x half>
+  br label %end
+cmp.false:
+  %a3 = bitcast <8 x double> %a to <32 x half>
+  br label %end
+end:
+  %phi = phi <32 x half> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <32 x half> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32f16_to_v8i64:
+define <8 x i64> @v_bitcast_v32f16_to_v8i64(<32 x half> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = fadd <32 x half> %a, splat (half 0xH0200)
+  %a2 = bitcast <32 x half> %a1 to <8 x i64>
+  br label %end
+cmp.false:
+  %a3 = bitcast <32 x half> %a to <8 x i64>
+  br label %end
+end:
+  %phi = phi <8 x i64> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <8 x i64> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32f16_to_v8f64:
+define <8 x double> @v_bitcast_v32f16_to_v8f64(<32 x half> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = fadd <32 x half> %a, splat (half 0xH0200)
+  %a2 = bitcast <32 x half> %a1 to <8 x double>
+  br label %end
+cmp.false:
+  %a3 = bitcast <32 x half> %a to <8 x double>
+  br label %end
+end:
+  %phi = phi <8 x double> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <8 x double> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32i16_to_v8i64:
+define <8 x i64> @v_bitcast_v32i16_to_v8i64(<32 x i16> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = add <32 x i16> %a, splat (i16 3)
+  %a2 = bitcast <32 x i16> %a1 to <8 x i64>
+  br label %end
+cmp.false:
+  %a3 = bitcast <32 x i16> %a to <8 x i64>
+  br label %end
+end:
+  %phi = phi <8 x i64> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <8 x i64> %phi
+}
+
+; CHECK-LABEL: {{^}}v_bitcast_v32i16_to_v8f64:
+define <8 x double> @v_bitcast_v32i16_to_v8f64(<32 x i16> %a, i32 %b) {
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+cmp.true:
+  %a1 = add <32 x i16> %a, splat (i16 3)
+  %a2 = bitcast <32 x i16> %a1 to <8 x double>
+  br label %end
+cmp.false:
+  %a3 = bitcast <32 x i16> %a to <8 x double>
+  br label %end
+end:
+  %phi = phi <8 x double> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <8 x double> %phi
+}
 
 
 

@Shoreshen Shoreshen requested review from arsenm and shiltian March 19, 2025 02:14
@Shoreshen Shoreshen requested a review from arsenm March 20, 2025 02:53
@Shoreshen
Copy link
Contributor Author

Hi @arsenm @shiltian , just ask if there is any fix up I need for this PR. Thanks~

@Shoreshen Shoreshen merged commit 054e0b4 into llvm:main Mar 24, 2025
11 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants